2476 files changed, 107337 insertions, 24133 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6871e65..fffdf9b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -84,20 +84,16 @@ set(LLVM_ALL_TARGETS
   NVPTX
   PowerPC
   Sparc
+  SystemZ
   X86
   XCore
   )
 
 # List of targets with JIT support:
-set(LLVM_TARGETS_WITH_JIT X86 PowerPC ARM Mips)
+set(LLVM_TARGETS_WITH_JIT X86 PowerPC ARM Mips SystemZ)
 
-if( MSVC )
-  set(LLVM_TARGETS_TO_BUILD X86
-    CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
-else( MSVC )
-  set(LLVM_TARGETS_TO_BUILD "all"
+set(LLVM_TARGETS_TO_BUILD "all"
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
-endif( MSVC )
 
 set(LLVM_EXPERIMENTAL_TARGETS_TO_BUILD ""
   CACHE STRING "Semicolon-separated list of experimental targets to build.")
@@ -129,6 +125,8 @@ set(LLVM_TARGET_ARCH "host"
 
 option(LLVM_ENABLE_THREADS "Use threads if available." ON)
 
+option(LLVM_ENABLE_ZLIB "Use zlib for compression/decompression if available." ON)
+
 if( LLVM_TARGETS_TO_BUILD STREQUAL "all" )
   set( LLVM_TARGETS_TO_BUILD ${LLVM_ALL_TARGETS} )
 endif()
@@ -419,7 +417,6 @@ add_subdirectory(utils/count)
 add_subdirectory(utils/not)
 add_subdirectory(utils/llvm-lit)
 add_subdirectory(utils/yaml-bench)
-add_subdirectory(utils/yaml2obj)
 
 add_subdirectory(projects)
 
diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT
index 10bf071..8e13e35 100644
--- a/CODE_OWNERS.TXT
+++ b/CODE_OWNERS.TXT
@@ -23,7 +23,7 @@ D: Gold plugin (tools/gold/*)
 N: Chandler Carruth
 E: chandlerc@gmail.com
 E: chandlerc@google.com
-D: Config, ADT, Support, inlining & related passse, SROA/mem2reg & related passes, CMake, library layering
+D: Config, ADT, Support, inlining & related passes, SROA/mem2reg & related passes, CMake, library layering
 
 N: Evan Cheng
 E: evan.cheng@apple.com
@@ -109,6 +109,10 @@ N: Nadav Rotem
 E: nrotem@apple.com
 D: X86 Backend, Loop Vectorizer
 
+N: Richard Sandiford
+E: rsandifo@linux.vnet.ibm.com
+D: SystemZ Backend
+
 N: Duncan Sands
 E: baldrick@free.fr
 D: DragonEgg
@@ -128,4 +132,4 @@ D: IndVar Simplify, Loop Strength Reduction, Instruction Scheduling
 
 N: Bill Wendling
 E: wendling@apple.com
-D: libLTO & IR Linker
+D: libLTO, IR Linker
diff --git a/CREDITS.TXT b/CREDITS.TXT
index e89f19e..844ad39 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -251,6 +251,12 @@ D: The initial llvm-ar tool, converted regression testsuite to dejagnu
 D: Modulo scheduling in the SparcV9 backend
 D: Release manager (1.7+)
 
+N: Sylvestre Ledru
+E: sylvestre@debian.org
+W: http://sylvesre.ledru.info/
+D: Debian and Ubuntu packaging
+D: Continous integration with jenkins
+
 N: Andrew Lenharth
 E: alenhar2@cs.uiuc.edu
 W: http://www.lenharth.org/~andrewl/
@@ -423,6 +429,7 @@ D: Thread Local Storage implementation
 
 N: Bill Wendling
 E: wendling@apple.com
+D: Release manager
 D: Bunches of stuff
 
 N: Bob Wilson
diff --git a/Makefile.config.in b/Makefile.config.in
index 26e3709..fd4f6ef 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -302,6 +302,9 @@ ENABLE_DOXYGEN = @ENABLE_DOXYGEN@
 # Do we want to enable threads?
 ENABLE_THREADS := @LLVM_ENABLE_THREADS@
 
+# Do we want to enable zlib?
+ENABLE_ZLIB := @LLVM_ENABLE_ZLIB@
+
 # Do we want to build with position independent code?
 ENABLE_PIC := @ENABLE_PIC@
 
diff --git a/Makefile.rules b/Makefile.rules
index 2c834aa..f0c542b 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -1515,6 +1515,8 @@ $(ToolBuildPath): $(ToolDir)/.dir
 endif
 
 ifdef CODESIGN_TOOLS
+TOOL_CODESIGN_IDENTITY ?= -
+
 $(ToolBuildPath): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
 	$(Echo) Linking $(BuildMode) executable $(TOOLNAME) $(StripWarnMsg)
 	$(Verb) $(Link) -o $@ $(TOOLLINKOPTS) $(ObjectsO) $(ProjLibsOptions) \
@@ -1522,7 +1524,7 @@ $(ToolBuildPath): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
 	$(Echo) ======= Finished Linking $(BuildMode) Executable $(TOOLNAME) \
           $(StripWarnMsg)
 	$(Echo) ======= Code-Signing $(BuildMode) Executable $(TOOLNAME)
-	$(Verb) codesign -s - $@
+	$(Verb) codesign -s $(TOOL_CODESIGN_IDENTITY) $@
 else
 $(ToolBuildPath): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
 	$(Echo) Linking $(BuildMode) executable $(TOOLNAME) $(StripWarnMsg)
@@ -1967,7 +1969,7 @@ endif
 # CHECK: Running the test suite
 ###############################################################################
 
-check::
+check:: all
 	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \
 	  if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \
 	    $(EchoCmd) Running test suite ; \
diff --git a/README.txt b/README.txt
index 193330f..e957a4d 100644
--- a/README.txt
+++ b/README.txt
@@ -15,3 +15,4 @@ documentation setup.
 
 If you're writing a package for LLVM, see docs/Packaging.rst for our
 suggestions.
+
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 0097db3..a5caac9 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -31,7 +31,7 @@ dnl===
 dnl===-----------------------------------------------------------------------===
 dnl Initialize autoconf and define the package name, version number and
 dnl address for reporting bugs.
-AC_INIT([LLVM],[3.3svn],[http://llvm.org/bugs/])
+AC_INIT([LLVM],[3.3],[http://llvm.org/bugs/])
 AC_DEFINE([LLVM_VERSION_MAJOR], [3], [Major version of the LLVM API])
 AC_DEFINE([LLVM_VERSION_MINOR], [3], [Minor version of the LLVM API])
 
@@ -402,6 +402,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
   nvptx-*)                llvm_cv_target_arch="NVPTX" ;;
+  s390x-*)                llvm_cv_target_arch="SystemZ" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
 esac])
 
@@ -435,6 +436,7 @@ case $host in
   msp430-*)               host_arch="MSP430" ;;
   hexagon-*)              host_arch="Hexagon" ;;
   mblaze-*)               host_arch="MBlaze" ;;
+  s390x-*)                host_arch="SystemZ" ;;
   *)                      host_arch="Unknown" ;;
 esac
 
@@ -654,6 +656,7 @@ else
     Hexagon)     AC_SUBST(TARGET_HAS_JIT,0) ;;
     MBlaze)      AC_SUBST(TARGET_HAS_JIT,0) ;;
     NVPTX)       AC_SUBST(TARGET_HAS_JIT,0) ;;
+    SystemZ)     AC_SUBST(TARGET_HAS_JIT,1) ;;
     *)           AC_SUBST(TARGET_HAS_JIT,0) ;;
   esac
 fi
@@ -708,6 +711,21 @@ case "$enableval" in
   *) AC_MSG_ERROR([Invalid setting for --enable-pthreads. Use "yes" or "no"]) ;;
 esac
 
+dnl Allow disablement of zlib
+AC_ARG_ENABLE(zlib,
+              AS_HELP_STRING([--enable-zlib],
+                             [Use zlib for compression/decompression if
+                              available (default is YES)]),,
+                              enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(LLVM_ENABLE_ZLIB,[1]) ;;
+  no)  AC_SUBST(LLVM_ENABLE_ZLIB,[0]) ;;
+  default) AC_SUBST(LLVM_ENABLE_ZLIB,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-zlib. Use "yes" or "no"]) ;;
+esac
+AC_DEFINE_UNQUOTED([LLVM_ENABLE_ZLIB],$LLVM_ENABLE_ZLIB,
+                   [Define if zlib is enabled])
+
 dnl Allow building without position independent code
 AC_ARG_ENABLE(pic,
   AS_HELP_STRING([--enable-pic],
@@ -780,13 +798,13 @@ TARGETS_TO_BUILD=""
 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
     [Build specific host targets: all or target1,target2,... Valid targets are:
      host, x86, x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
-     xcore, msp430, nvptx, and cpp (default=all)]),,
+     xcore, msp430, nvptx, systemz, and cpp (default=all)]),,
     enableval=all)
 if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend MBlaze NVPTX Hexagon SystemZ" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -805,11 +823,13 @@ case "$enableval" in
         hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
         nvptx)    TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
+        systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
             PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+            AArch64)     TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
             ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
@@ -817,6 +837,7 @@ case "$enableval" in
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
             NVPTX)       TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
+            SystemZ)     TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
             *)       AC_MSG_ERROR([Can not set target to build]) ;;
           esac ;;
         *) AC_MSG_ERROR([Unrecognized target $a_target]) ;;
@@ -1367,6 +1388,11 @@ if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
                  [Have pthread_getspecific]))
 fi
 
+dnl zlib is optional; used for compression/uncompression
+if test "$LLVM_ENABLE_ZLIB" -eq 1 ; then
+  AC_CHECK_LIB(z, compress2)
+fi
+
 dnl Allow extra x86-disassembler library
 AC_ARG_WITH(udis86,
   AS_HELP_STRING([--with-udis86=<path>],
@@ -1486,6 +1512,9 @@ AC_HEADER_STAT
 AC_HEADER_SYS_WAIT
 AC_HEADER_TIME
 
+AC_LANG_PUSH([C++])
+AC_CHECK_HEADERS([cxxabi.h])
+AC_LANG_POP([C++])
 AC_CHECK_HEADERS([dlfcn.h execinfo.h fcntl.h inttypes.h limits.h link.h])
 AC_CHECK_HEADERS([malloc.h setjmp.h signal.h stdint.h termios.h unistd.h])
 AC_CHECK_HEADERS([utime.h windows.h])
@@ -1501,6 +1530,13 @@ if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
 else
   AC_SUBST(HAVE_PTHREAD, 0)
 fi
+if test "$LLVM_ENABLE_ZLIB" -eq 1 ; then
+  AC_CHECK_HEADERS(zlib.h,
+                   AC_SUBST(HAVE_LIBZ, 1),
+                   AC_SUBST(HAVE_LIBZ, 0))
+else
+  AC_SUBST(HAVE_LIBZ, 0)
+fi
 
 dnl Try to find ffi.h.
 if test "$llvm_cv_enable_libffi" = "yes" ; then
@@ -1746,7 +1782,7 @@ AC_DEFINE_UNQUOTED(LLVM_MANDIR, "$LLVM_MANDIR",
                    [Installation directory for man pages])
 AC_DEFINE_UNQUOTED(LLVM_CONFIGTIME, "$LLVM_CONFIGTIME",
                    [Time at which LLVM was configured])
-AC_DEFINE_UNQUOTED(LLVM_HOSTTRIPLE, "$host",
+AC_DEFINE_UNQUOTED(LLVM_HOST_TRIPLE, "$host",
                    [Host triple LLVM will be executed on])
 AC_DEFINE_UNQUOTED(LLVM_DEFAULT_TARGET_TRIPLE, "$target",
                    [Target triple LLVM will generate code for by default])
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 7cad190..7f8c48d 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -4,6 +4,7 @@ if( WIN32 AND NOT CYGWIN )
 endif()
 
 include(CheckIncludeFile)
+include(CheckIncludeFileCXX)
 include(CheckLibraryExists)
 include(CheckSymbolExists)
 include(CheckFunctionExists)
@@ -37,6 +38,7 @@ endfunction()
 check_include_file(argz.h HAVE_ARGZ_H)
 check_include_file(assert.h HAVE_ASSERT_H)
 check_include_file(ctype.h HAVE_CTYPE_H)
+check_include_file_cxx(cxxabi.h HAVE_CXXABI_H)
 check_include_file(dirent.h HAVE_DIRENT_H)
 check_include_file(dl.h HAVE_DL_H)
 check_include_file(dld.h HAVE_DLD_H)
@@ -79,6 +81,7 @@ check_include_file(unistd.h HAVE_UNISTD_H)
 check_include_file(utime.h HAVE_UTIME_H)
 check_include_file(valgrind/valgrind.h HAVE_VALGRIND_VALGRIND_H)
 check_include_file(windows.h HAVE_WINDOWS_H)
+check_include_file(zlib.h HAVE_ZLIB_H)
 check_include_file(fenv.h HAVE_FENV_H)
 check_symbol_exists(FE_ALL_EXCEPT "fenv.h" HAVE_DECL_FE_ALL_EXCEPT)
 check_symbol_exists(FE_INEXACT "fenv.h" HAVE_DECL_FE_INEXACT)
@@ -104,6 +107,11 @@ if( NOT PURE_WINDOWS )
   endif()
   check_library_exists(dl dlopen "" HAVE_LIBDL)
   check_library_exists(rt clock_gettime "" HAVE_LIBRT)
+  if (LLVM_ENABLE_ZLIB)
+    check_library_exists(z compress2 "" HAVE_LIBZ)
+  else()
+    set(HAVE_LIBZ 0)
+  endif()
 endif()
 
 # function checks
@@ -332,12 +340,13 @@ if (CMAKE_COMPILER_IS_GNUCXX)
   endif()
 endif()
 
-include(GetHostTriple)
-get_host_triple(LLVM_HOST_TRIPLE)
-
 # By default, we target the host, but this can be overridden at CMake
 # invocation time.
-set(LLVM_HOSTTRIPLE "${LLVM_HOST_TRIPLE}")
+include(GetHostTriple)
+get_host_triple(LLVM_INFERRED_HOST_TRIPLE)
+
+set(LLVM_HOST_TRIPLE "${LLVM_INFERRED_HOST_TRIPLE}" CACHE STRING
+    "Host on which LLVM binaries will run")
 
 # Determine the native architecture.
 string(TOLOWER "${LLVM_TARGET_ARCH}" LLVM_NATIVE_ARCH)
@@ -357,6 +366,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "sparc")
   set(LLVM_NATIVE_ARCH Sparc)
 elseif (LLVM_NATIVE_ARCH MATCHES "powerpc")
   set(LLVM_NATIVE_ARCH PowerPC)
+elseif (LLVM_NATIVE_ARCH MATCHES "aarch64")
+  set(LLVM_NATIVE_ARCH AArch64)
 elseif (LLVM_NATIVE_ARCH MATCHES "arm")
   set(LLVM_NATIVE_ARCH ARM)
 elseif (LLVM_NATIVE_ARCH MATCHES "mips")
@@ -367,6 +378,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "msp430")
   set(LLVM_NATIVE_ARCH MSP430)
 elseif (LLVM_NATIVE_ARCH MATCHES "hexagon")
   set(LLVM_NATIVE_ARCH Hexagon)
+elseif (LLVM_NATIVE_ARCH MATCHES "s390x")
+  set(LLVM_NATIVE_ARCH SystemZ)
 else ()
   message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}")
 endif ()
@@ -450,4 +463,11 @@ else( LLVM_ENABLE_THREADS )
   message(STATUS "Threads disabled.")
 endif()
 
+if (LLVM_ENABLE_ZLIB )
+  # Check if zlib is available in the system.
+  if ( NOT HAVE_ZLIB_H OR NOT HAVE_LIBZ )
+    set(LLVM_ENABLE_ZLIB 0)
+  endif()
+endif()
+
 set(LLVM_PREFIX ${CMAKE_INSTALL_PREFIX})
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index 4e59a3e..f8a017d 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -21,6 +21,9 @@ if( LLVM_ENABLE_ASSERTIONS )
   # explicitly undefine it:
   if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
     add_definitions( -UNDEBUG )
+    # Also remove /D NDEBUG to avoid MSVC warnings about conflicting defines.
+    string (REGEX REPLACE "(^| )[/-]D *NDEBUG($| )" " "
+      CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
   endif()
 else()
   if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
@@ -249,8 +252,6 @@ if(LLVM_USE_SANITIZER)
     elseif (LLVM_USE_SANITIZER MATCHES "Memory(WithOrigins)?")
       append_common_sanitizer_flags()
       add_flag_or_print_warning("-fsanitize=memory")
-      # -pie is required for MSan.
-      set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie")
       if(LLVM_USE_SANITIZER STREQUAL "MemoryWithOrigins")
         add_flag_or_print_warning("-fsanitize-memory-track-origins")
       endif()
diff --git a/cmake/modules/LLVM-Config.cmake b/cmake/modules/LLVM-Config.cmake
index 163401c..2ddc0b2 100755
--- a/cmake/modules/LLVM-Config.cmake
+++ b/cmake/modules/LLVM-Config.cmake
@@ -13,6 +13,9 @@ function(get_system_libs return_var)
       if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
         set(system_libs ${system_libs} pthread)
       endif()
+      if ( LLVM_ENABLE_ZLIB AND HAVE_LIBZ )
+        set(system_libs ${system_libs} z)
+      endif()
     endif( MINGW )
   endif( NOT MSVC )
   set(${return_var} ${system_libs} PARENT_SCOPE)
diff --git a/cmake/modules/LLVMConfig.cmake.in b/cmake/modules/LLVMConfig.cmake.in
index c43119e..f0b8c14 100644
--- a/cmake/modules/LLVMConfig.cmake.in
+++ b/cmake/modules/LLVMConfig.cmake.in
@@ -22,12 +22,15 @@ set(LLVM_TOOLS_BINARY_DIR @LLVM_TOOLS_BINARY_DIR@)
 
 set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@)
 
+set(LLVM_ENABLE_ZLIB @LLVM_ENABLE_ZLIB@)
+
 set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@)
 
 set(LLVM_ENABLE_PIC @LLVM_ENABLE_PIC@)
 
 set(HAVE_LIBDL @HAVE_LIBDL@)
 set(HAVE_LIBPTHREAD @HAVE_LIBPTHREAD@)
+set(HAVE_LIBZ @HAVE_LIBZ@)
 set(LLVM_ON_UNIX @LLVM_ON_UNIX@)
 set(LLVM_ON_WIN32 @LLVM_ON_WIN32@)
 
diff --git a/configure b/configure
index e70b131..decbc40 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for LLVM 3.3svn.
+# Generated by GNU Autoconf 2.60 for LLVM 3.3.
 #
 # Report bugs to <http://llvm.org/bugs/>.
 #
@@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='LLVM'
 PACKAGE_TARNAME='llvm'
-PACKAGE_VERSION='3.3svn'
-PACKAGE_STRING='LLVM 3.3svn'
+PACKAGE_VERSION='3.3'
+PACKAGE_STRING='LLVM 3.3'
 PACKAGE_BUGREPORT='http://llvm.org/bugs/'
 
 ac_unique_file="lib/IR/Module.cpp"
@@ -703,6 +703,7 @@ ENABLE_DOCS
 ENABLE_DOXYGEN
 LLVM_ENABLE_THREADS
 ENABLE_PTHREADS
+LLVM_ENABLE_ZLIB
 ENABLE_PIC
 ENABLE_SHARED
 ENABLE_EMBED_STDCXX
@@ -776,7 +777,9 @@ USE_INTEL_JITEVENTS
 XML2CONFIG
 LIBXML2_LIBS
 LIBXML2_INC
+CXXCPP
 HAVE_PTHREAD
+HAVE_LIBZ
 HUGE_VAL_SANITY
 MMAP_FILE
 SHLIBEXT
@@ -811,7 +814,8 @@ CPPFLAGS
 CXX
 CXXFLAGS
 CCC
-CPP'
+CPP
+CXXCPP'
 ac_subdirs_all='projects/llvm-gcc
 projects/test-suite
 projects/llvm-test
@@ -1327,7 +1331,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures LLVM 3.3svn to adapt to many kinds of systems.
+\`configure' configures LLVM 3.3 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1393,7 +1397,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of LLVM 3.3svn:";;
+     short | recursive ) echo "Configuration of LLVM 3.3:";;
    esac
   cat <<\_ACEOF
 
@@ -1426,6 +1430,8 @@ Optional Features:
   --enable-doxygen        Build doxygen documentation (default is NO)
   --enable-threads        Use threads if available (default is YES)
   --enable-pthreads       Use pthreads if available (default is YES)
+  --enable-zlib           Use zlib for compression/decompression if available
+                          (default is YES)
   --enable-pic            Build LLVM with Position Independent Code (default
                           is YES)
   --enable-shared         Build a shared library and link tools against it
@@ -1439,7 +1445,7 @@ Optional Features:
   --enable-targets        Build specific host targets: all or
                           target1,target2,... Valid targets are: host, x86,
                           x86_64, sparc, powerpc, arm, aarch64, mips, hexagon,
-                          xcore, msp430, nvptx, and cpp (default=all)
+                          xcore, msp430, nvptx, systemz, and cpp (default=all)
   --enable-experimental-targets
                           Build experimental host targets: disable or
                           target1,target2,... (default=disable)
@@ -1487,6 +1493,7 @@ Some influential environment variables:
   CXX         C++ compiler command
   CXXFLAGS    C++ compiler flags
   CPP         C preprocessor
+  CXXCPP      C++ preprocessor
 
 Use these variables to override the choices made by `configure' or to help
 it to find libraries and programs with nonstandard names/locations.
@@ -1552,7 +1559,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-LLVM configure 3.3svn
+LLVM configure 3.3
 generated by GNU Autoconf 2.60
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1568,7 +1575,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by LLVM $as_me 3.3svn, which was
+It was created by LLVM $as_me 3.3, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   $ $0 $@
@@ -4019,6 +4026,7 @@ else
   hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
   nvptx-*)                llvm_cv_target_arch="NVPTX" ;;
+  s390x-*)                llvm_cv_target_arch="SystemZ" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
 esac
 fi
@@ -4052,6 +4060,7 @@ case $host in
   msp430-*)               host_arch="MSP430" ;;
   hexagon-*)              host_arch="Hexagon" ;;
   mblaze-*)               host_arch="MBlaze" ;;
+  s390x-*)                host_arch="SystemZ" ;;
   *)                      host_arch="Unknown" ;;
 esac
 
@@ -5392,6 +5401,8 @@ else
  ;;
     NVPTX)       TARGET_HAS_JIT=0
  ;;
+    SystemZ)     TARGET_HAS_JIT=1
+ ;;
     *)           TARGET_HAS_JIT=0
  ;;
   esac
@@ -5478,6 +5489,30 @@ echo "$as_me: error: Invalid setting for --enable-pthreads. Use \"yes\" or \"no\
    { (exit 1); exit 1; }; } ;;
 esac
 
+# Check whether --enable-zlib was given.
+if test "${enable_zlib+set}" = set; then
+  enableval=$enable_zlib;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) LLVM_ENABLE_ZLIB=1
+ ;;
+  no)  LLVM_ENABLE_ZLIB=0
+ ;;
+  default) LLVM_ENABLE_ZLIB=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-zlib. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-zlib. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_ENABLE_ZLIB $LLVM_ENABLE_ZLIB
+_ACEOF
+
+
 # Check whether --enable-pic was given.
 if test "${enable_pic+set}" = set; then
   enableval=$enable_pic;
@@ -5601,7 +5636,7 @@ if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC AArch64 ARM Mips XCore MSP430 CppBackend MBlaze NVPTX Hexagon SystemZ" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -5620,11 +5655,13 @@ case "$enableval" in
         hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
         nvptx)    TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
+        systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
             PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+            AArch64)     TARGETS_TO_BUILD="AArch64 $TARGETS_TO_BUILD" ;;
             ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
@@ -5632,6 +5669,7 @@ case "$enableval" in
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
             NVPTX)       TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
+            SystemZ)     TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
             *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
 echo "$as_me: error: Can not set target to build" >&2;}
    { (exit 1); exit 1; }; } ;;
@@ -10497,7 +10535,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10500 "configure"
+#line 10538 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -13440,6 +13478,96 @@ fi
 
 fi
 
+if test "$LLVM_ENABLE_ZLIB" -eq 1 ; then
+
+{ echo "$as_me:$LINENO: checking for compress2 in -lz" >&5
+echo $ECHO_N "checking for compress2 in -lz... $ECHO_C" >&6; }
+if test "${ac_cv_lib_z_compress2+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lz  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char compress2 ();
+int
+main ()
+{
+return compress2 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_z_compress2=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_z_compress2=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_z_compress2" >&5
+echo "${ECHO_T}$ac_cv_lib_z_compress2" >&6; }
+if test $ac_cv_lib_z_compress2 = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBZ 1
+_ACEOF
+
+  LIBS="-lz $LIBS"
+
+fi
+
+fi
+
 
 # Check whether --with-udis86 was given.
 if test "${with_udis86+set}" = set; then
@@ -14743,6 +14871,442 @@ _ACEOF
 fi
 
 
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+{ echo "$as_me:$LINENO: checking how to run the C++ preprocessor" >&5
+echo $ECHO_N "checking how to run the C++ preprocessor... $ECHO_C" >&6; }
+if test -z "$CXXCPP"; then
+  if test "${ac_cv_prog_CXXCPP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+      # Double quotes because CXXCPP needs to be expanded
+    for CXXCPP in "$CXX -E" "/lib/cpp"
+    do
+      ac_preproc_ok=false
+for ac_cxx_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_cxx_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Broken: fails on valid input.
+continue
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_cxx_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  # Broken: success on invalid input.
+continue
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+  break
+fi
+
+    done
+    ac_cv_prog_CXXCPP=$CXXCPP
+
+fi
+  CXXCPP=$ac_cv_prog_CXXCPP
+else
+  ac_cv_prog_CXXCPP=$CXXCPP
+fi
+{ echo "$as_me:$LINENO: result: $CXXCPP" >&5
+echo "${ECHO_T}$CXXCPP" >&6; }
+ac_preproc_ok=false
+for ac_cxx_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_cxx_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Broken: fails on valid input.
+continue
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_cxx_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  # Broken: success on invalid input.
+continue
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+  :
+else
+  { { echo "$as_me:$LINENO: error: C++ preprocessor \"$CXXCPP\" fails sanity check
+See \`config.log' for more details." >&5
+echo "$as_me: error: C++ preprocessor \"$CXXCPP\" fails sanity check
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+
+
+for ac_header in cxxabi.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_cxx_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_cxx_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_cxx_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
 
 
 
@@ -16294,6 +16858,184 @@ else
   HAVE_PTHREAD=0
 
 fi
+if test "$LLVM_ENABLE_ZLIB" -eq 1 ; then
+
+for ac_header in zlib.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+ HAVE_LIBZ=1
+
+else
+  HAVE_LIBZ=0
+
+fi
+
+done
+
+else
+  HAVE_LIBZ=0
+
+fi
 
 if test "$llvm_cv_enable_libffi" = "yes" ; then
 
@@ -21639,7 +22381,7 @@ _ACEOF
 
 
 cat >>confdefs.h <<_ACEOF
-#define LLVM_HOSTTRIPLE "$host"
+#define LLVM_HOST_TRIPLE "$host"
 _ACEOF
 
 
@@ -22294,7 +23036,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by LLVM $as_me 3.3svn, which was
+This file was extended by LLVM $as_me 3.3, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -22347,7 +23089,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-LLVM config.status 3.3svn
+LLVM config.status 3.3
 configured by $0, generated by GNU Autoconf 2.60,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
@@ -22691,6 +23433,7 @@ ENABLE_DOCS!$ENABLE_DOCS$ac_delim
 ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
 LLVM_ENABLE_THREADS!$LLVM_ENABLE_THREADS$ac_delim
 ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
+LLVM_ENABLE_ZLIB!$LLVM_ENABLE_ZLIB$ac_delim
 ENABLE_PIC!$ENABLE_PIC$ac_delim
 ENABLE_SHARED!$ENABLE_SHARED$ac_delim
 ENABLE_EMBED_STDCXX!$ENABLE_EMBED_STDCXX$ac_delim
@@ -22764,7 +23507,9 @@ USE_INTEL_JITEVENTS!$USE_INTEL_JITEVENTS$ac_delim
 XML2CONFIG!$XML2CONFIG$ac_delim
 LIBXML2_LIBS!$LIBXML2_LIBS$ac_delim
 LIBXML2_INC!$LIBXML2_INC$ac_delim
+CXXCPP!$CXXCPP$ac_delim
 HAVE_PTHREAD!$HAVE_PTHREAD$ac_delim
+HAVE_LIBZ!$HAVE_LIBZ$ac_delim
 HUGE_VAL_SANITY!$HUGE_VAL_SANITY$ac_delim
 MMAP_FILE!$MMAP_FILE$ac_delim
 SHLIBEXT!$SHLIBEXT$ac_delim
@@ -22781,9 +23526,6 @@ LLVM_MANDIR!$LLVM_MANDIR$ac_delim
 LLVM_CONFIGTIME!$LLVM_CONFIGTIME$ac_delim
 BINDINGS_TO_BUILD!$BINDINGS_TO_BUILD$ac_delim
 ALL_BINDINGS!$ALL_BINDINGS$ac_delim
-OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim
-ENABLE_VISIBILITY_INLINES_HIDDEN!$ENABLE_VISIBILITY_INLINES_HIDDEN$ac_delim
-RPATH!$RPATH$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -22825,13 +23567,16 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim
+ENABLE_VISIBILITY_INLINES_HIDDEN!$ENABLE_VISIBILITY_INLINES_HIDDEN$ac_delim
+RPATH!$RPATH$ac_delim
 RDYNAMIC!$RDYNAMIC$ac_delim
 program_prefix!$program_prefix$ac_delim
 LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 4; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 7; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/docs/CMake.rst b/docs/CMake.rst
index fb081d7..8459081 100644
--- a/docs/CMake.rst
+++ b/docs/CMake.rst
@@ -168,8 +168,8 @@ LLVM-specific variables
 
 **LLVM_TARGETS_TO_BUILD**:STRING
   Semicolon-separated list of targets to build, or *all* for building all
-  targets. Case-sensitive. For Visual C++ defaults to *X86*. On the other cases
-  defaults to *all*. Example: ``-DLLVM_TARGETS_TO_BUILD="X86;PowerPC"``.
+  targets. Case-sensitive. Defaults to *all*. Example:
+  ``-DLLVM_TARGETS_TO_BUILD="X86;PowerPC"``.
 
 **LLVM_BUILD_TOOLS**:BOOL
   Build LLVM tools. Defaults to ON. Targets for building each tool are generated
@@ -271,6 +271,10 @@ LLVM-specific variables
 **LLVM_USE_INTEL_JITEVENTS**:BOOL
   Enable building support for Intel JIT Events API. Defaults to OFF
 
+**LLVM_ENABLE_ZLIB**:BOOL
+  Build with zlib to support compression/uncompression in LLVM tools.
+  Defaults to ON.
+
 Executing the test suite
 ========================
 
diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst
index 75415ab..10ca307 100644
--- a/docs/CodeGenerator.rst
+++ b/docs/CodeGenerator.rst
@@ -1794,6 +1794,7 @@ Here is the table:
 :raw-html:`<th>NVPTX</th>`
 :raw-html:`<th>PowerPC</th>`
 :raw-html:`<th>Sparc</th>`
+:raw-html:`<th>SystemZ</th>`
 :raw-html:`<th>X86</th>`
 :raw-html:`<th>XCore</th>`
 :raw-html:`</tr>`
@@ -1808,8 +1809,9 @@ Here is the table:
 :raw-html:`<td class="yes"></td> <!-- NVPTX -->`
 :raw-html:`<td class="yes"></td> <!-- PowerPC -->`
 :raw-html:`<td class="yes"></td> <!-- Sparc -->`
+:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
-:raw-html:`<td class="unknown"></td> <!-- XCore -->`
+:raw-html:`<td class="yes"></td> <!-- XCore -->`
 :raw-html:`</tr>`
 
 :raw-html:`<tr>`
@@ -1822,6 +1824,7 @@ Here is the table:
 :raw-html:`<td class="no"></td> <!-- NVPTX -->`
 :raw-html:`<td class="no"></td> <!-- PowerPC -->`
 :raw-html:`<td class="no"></td> <!-- Sparc -->`
+:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
 :raw-html:`<td class="no"></td> <!-- XCore -->`
 :raw-html:`</tr>`
@@ -1835,9 +1838,10 @@ Here is the table:
 :raw-html:`<td class="no"></td> <!-- Mips -->`
 :raw-html:`<td class="na"></td> <!-- NVPTX -->`
 :raw-html:`<td class="no"></td> <!-- PowerPC -->`
+:raw-html:`<td class="no"></td> <!-- SystemZ -->`
 :raw-html:`<td class="no"></td> <!-- Sparc -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
-:raw-html:`<td class="no"></td> <!-- XCore -->`
+:raw-html:`<td class="yes"></td> <!-- XCore -->`
 :raw-html:`</tr>`
 
 :raw-html:`<tr>`
@@ -1850,8 +1854,9 @@ Here is the table:
 :raw-html:`<td class="yes"></td> <!-- NVPTX -->`
 :raw-html:`<td class="yes"></td> <!-- PowerPC -->`
 :raw-html:`<td class="unknown"></td> <!-- Sparc -->`
+:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
-:raw-html:`<td class="unknown"></td> <!-- XCore -->`
+:raw-html:`<td class="yes"></td> <!-- XCore -->`
 :raw-html:`</tr>`
 
 :raw-html:`<tr>`
@@ -1864,8 +1869,9 @@ Here is the table:
 :raw-html:`<td class="na"></td> <!-- NVPTX -->`
 :raw-html:`<td class="yes"></td> <!-- PowerPC -->`
 :raw-html:`<td class="unknown"></td> <!-- Sparc -->`
+:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
-:raw-html:`<td class="unknown"></td> <!-- XCore -->`
+:raw-html:`<td class="no"></td> <!-- XCore -->`
 :raw-html:`</tr>`
 
 :raw-html:`<tr>`
@@ -1878,6 +1884,7 @@ Here is the table:
 :raw-html:`<td class="na"></td> <!-- NVPTX -->`
 :raw-html:`<td class="no"></td> <!-- PowerPC -->`
 :raw-html:`<td class="no"></td> <!-- Sparc -->`
+:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
 :raw-html:`<td class="no"></td> <!-- XCore -->`
 :raw-html:`</tr>`
@@ -1892,8 +1899,9 @@ Here is the table:
 :raw-html:`<td class="no"></td> <!-- NVPTX -->`
 :raw-html:`<td class="yes"></td> <!-- PowerPC -->`
 :raw-html:`<td class="unknown"></td> <!-- Sparc -->`
+:raw-html:`<td class="no"></td> <!-- SystemZ -->`
 :raw-html:`<td class="yes"></td> <!-- X86 -->`
-:raw-html:`<td class="unknown"></td> <!-- XCore -->`
+:raw-html:`<td class="no"></td> <!-- XCore -->`
 :raw-html:`</tr>`
 
 :raw-html:`<tr>`
@@ -1906,6 +1914,7 @@ Here is the table:
 :raw-html:`<td class="no"></td> <!-- NVPTX -->`
 :raw-html:`<td class="no"></td> <!-- PowerPC -->`
 :raw-html:`<td class="no"></td> <!-- Sparc -->`
+:raw-html:`<td class="no"></td> <!-- SystemZ -->`
 :raw-html:`<td class="partial"><a href="#feat_segstacks_x86">*</a></td> <!-- X86 -->`
 :raw-html:`<td class="no"></td> <!-- XCore -->`
 :raw-html:`</tr>`
diff --git a/docs/CommandGuide/index.rst b/docs/CommandGuide/index.rst
index ac8a944..b3b4bc3 100644
--- a/docs/CommandGuide/index.rst
+++ b/docs/CommandGuide/index.rst
@@ -50,3 +50,4 @@ Developer Tools
    tblgen
    lit
    llvm-build
+   llvm-readobj
diff --git a/docs/CommandGuide/llc.rst b/docs/CommandGuide/llc.rst
index 70354b0..e6a5976 100644
--- a/docs/CommandGuide/llc.rst
+++ b/docs/CommandGuide/llc.rst
@@ -69,6 +69,14 @@ End-user Options
 
    llvm-as < /dev/null | llc -march=xyz -mcpu=help
 
+.. option:: -filetype=<output file type>
+
+ Specify what kind of output ``llc`` should generated.  Options are: ``asm``
+ for textual assembly ( ``'.s'``), ``obj`` for native object files (``'.o'``)
+ and ``null`` for not emitting anything (for performance testing).
+
+ Note that not all targets support all options.
+
 .. option:: -mattr=a1,+a2,-a3,...
 
  Override or control specific attributes of the target, such as whether SIMD
diff --git a/docs/CommandGuide/llvm-readobj.rst b/docs/CommandGuide/llvm-readobj.rst
new file mode 100644
index 0000000..b1918b5
--- /dev/null
+++ b/docs/CommandGuide/llvm-readobj.rst
@@ -0,0 +1,86 @@
+llvm-readobj - LLVM Object Reader
+=================================
+
+SYNOPSIS
+--------
+
+:program:`llvm-readobj` [*options*] [*input...*]
+
+DESCRIPTION
+-----------
+
+The :program:`llvm-readobj` tool displays low-level format-specific information
+about one or more object files. The tool and its output is primarily designed
+for use in FileCheck-based tests.
+
+OPTIONS
+-------
+
+If ``input`` is "``-``" or omitted, :program:`llvm-readobj` reads from standard
+input. Otherwise, it will read from the specified ``filenames``.
+
+.. option:: -help
+
+ Print a summary of command line options.
+
+.. option:: -version
+
+ Display the version of this program
+
+.. option:: -file-headers, -h
+
+ Display file headers.
+
+.. option:: -sections, -s
+
+ Display all sections.
+
+.. option:: -section-data, -sd
+
+ When used with ``-sections``, display section data for each section shown.
+
+.. option:: -section-relocations, -sr
+
+ When used with ``-sections``, display relocations for each section shown.
+
+.. option:: -section-symbols, -st
+
+ When used with ``-sections``, display symbols for each section shown.
+
+.. option:: -relocations, -r
+
+ Display the relocation entries in the file.
+
+.. option:: -symbols, -t
+
+ Display the symbol table.
+
+.. option:: -dyn-symbols
+
+ Display the dynamic symbol table (only for ELF object files).
+
+.. option:: -unwind, -u
+
+ Display unwind information.
+
+.. option:: -expand-relocs
+
+ When used with ``-relocations``, display each relocation in an expanded
+ multi-line format.
+
+.. option:: -dynamic-table
+
+ Display the ELF .dynamic section table (only for ELF object files).
+
+.. option:: -needed-libs
+
+ Display the needed libraries (only for ELF object files).
+
+.. option:: -program-headers
+
+ Display the ELF program headers (only for ELF object files).
+
+EXIT STATUS
+-----------
+
+:program:`llvm-readobj` returns 0.
diff --git a/docs/CommandGuide/tblgen.rst b/docs/CommandGuide/tblgen.rst
index 1858ee4..1c46828 100644
--- a/docs/CommandGuide/tblgen.rst
+++ b/docs/CommandGuide/tblgen.rst
@@ -23,6 +23,8 @@ file to read as input.
 OPTIONS
 -------
 
+.. program:: tblgen
+
 .. option:: -help
 
  Print a summary of command line options.
@@ -56,7 +58,7 @@ OPTIONS
 
 .. option:: -print-enums
 
- Print enumeration values for a class
+ Print enumeration values for a class.
 
 .. option:: -print-sets
 
diff --git a/docs/CommandLine.rst b/docs/CommandLine.rst
index 073958b..9b77a98 100644
--- a/docs/CommandLine.rst
+++ b/docs/CommandLine.rst
@@ -2,6 +2,9 @@
 CommandLine 2.0 Library Manual
 ==============================
 
+.. contents::
+   :local:
+
 Introduction
 ============
 
@@ -615,6 +618,55 @@ would yield the help output:
     -help             - display available options (-help-hidden for more)
     -o <filename>     - Specify output filename
 
+.. _grouping options into categories:
+
+Grouping options into categories
+--------------------------------
+
+If our program has a large number of options it may become difficult for users
+of our tool to navigate the output of ``-help``. To alleviate this problem we
+can put our options into categories. This can be done by declaring option
+categories (`cl::OptionCategory`_ objects) and then placing our options into
+these categories using the `cl::cat`_ option attribute. For example:
+
+.. code-block:: c++
+
+  cl::OptionCategory StageSelectionCat("Stage Selection Options",
+                                       "These control which stages are run.");
+
+  cl::opt<bool> Preprocessor("E",cl::desc("Run preprocessor stage."),
+                             cl::cat(StageSelectionCat));
+
+  cl::opt<bool> NoLink("c",cl::desc("Run all stages except linking."),
+                       cl::cat(StageSelectionCat));
+
+The output of ``-help`` will become categorized if an option category is
+declared. The output looks something like ::
+
+  OVERVIEW: This is a small program to demo the LLVM CommandLine API
+  USAGE: Sample [options]
+
+  OPTIONS:
+
+    General options:
+
+      -help              - Display available options (-help-hidden for more)
+      -help-list         - Display list of available options (-help-list-hidden for more)
+
+
+    Stage Selection Options:
+    These control which stages are run.
+
+      -E                 - Run preprocessor stage.
+      -c                 - Run all stages except linking.
+
+In addition to the behaviour of ``-help`` changing when an option category is
+declared, the command line option ``-help-list`` becomes visible which will
+print the command line options as uncategorized list.
+
+Note that Options that are not explicitly categorized will be placed in the
+``cl::GeneralCategory`` category.
+
 .. _Reference Guide:
 
 Reference Guide
@@ -943,6 +995,11 @@ This section describes the basic attributes that you can specify on options.
   of the usual modifiers on multi-valued options (besides
   ``cl::ValueDisallowed``, obviously).
 
+.. _cl::cat:
+
+* The **cl::cat** attribute specifies the option category that the option
+  belongs to. The category should be a `cl::OptionCategory`_ object.
+
 Option Modifiers
 ----------------
 
@@ -1212,6 +1269,57 @@ only consists of one function `cl::ParseCommandLineOptions`_) and three main
 classes: `cl::opt`_, `cl::list`_, and `cl::alias`_.  This section describes
 these three classes in detail.
 
+.. _cl::getRegisteredOptions:
+
+The ``cl::getRegisteredOptions`` function
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``cl::getRegisteredOptions`` function is designed to give a programmer
+access to declared non positional command line options so that how they appear
+in ``-help`` can be modified prior to calling `cl::ParseCommandLineOptions`_.
+Note this method should not be called during any static initialisation because
+it cannot be guaranteed that all options will have been initialised. Hence it
+should be called from ``main``.
+
+This function can be used to gain access to options declared in libraries that
+the tool writter may not have direct access to.
+
+The function retrieves a :ref:`StringMap <dss_stringmap>` that maps the option
+string (e.g. ``-help``) to an ``Option*``.
+
+Here is an example of how the function could be used:
+
+.. code-block:: c++
+
+  using namespace llvm;
+  int main(int argc, char **argv) {
+    cl::OptionCategory AnotherCategory("Some options");
+
+    StringMap<cl::Option*> Map;
+    cl::getRegisteredOptions(Map);
+
+    //Unhide useful option and put it in a different category
+    assert(Map.count("print-all-options") > 0);
+    Map["print-all-options"]->setHiddenFlag(cl::NotHidden);
+    Map["print-all-options"]->setCategory(AnotherCategory);
+
+    //Hide an option we don't want to see
+    assert(Map.count("enable-no-infs-fp-math") > 0);
+    Map["enable-no-infs-fp-math"]->setHiddenFlag(cl::Hidden);
+
+    //Change --version to --show-version
+    assert(Map.count("version") > 0);
+    Map["version"]->setArgStr("show-version");
+
+    //Change --help description
+    assert(Map.count("help") > 0);
+    Map["help"]->setDescription("Shows help");
+
+    cl::ParseCommandLineOptions(argc, argv, "This is a small program to demo the LLVM CommandLine API");
+    ...
+  }
+
+
 .. _cl::ParseCommandLineOptions:
 
 The ``cl::ParseCommandLineOptions`` function
@@ -1382,6 +1490,29 @@ For example:
 
   cl::extrahelp("\nADDITIONAL HELP:\n\n  This is the extra help\n");
 
+.. _cl::OptionCategory:
+
+The ``cl::OptionCategory`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``cl::OptionCategory`` class is a simple class for declaring
+option categories.
+
+.. code-block:: c++
+
+  namespace cl {
+    class OptionCategory;
+  }
+
+An option category must have a name and optionally a description which are
+passed to the constructor as ``const char*``.
+
+Note that declaring an option category and associating it with an option before
+parsing options (e.g. statically) will change the output of ``-help`` from
+uncategorized to categorized. If an option category is declared but not
+associated with an option then it will be hidden from the output of ``-help``
+but will be shown in the output of ``-help-hidden``.
+
 .. _different parser:
 .. _discussed previously:
 
diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst
index 681777c..e9a7bc8 100644
--- a/docs/CompilerWriterInfo.rst
+++ b/docs/CompilerWriterInfo.rst
@@ -20,11 +20,15 @@ ARM
 
 * `ABI <http://www.arm.com/products/DevTools/ABI.html>`_
 
+* `ARM C Language Extensions <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053a/IHI0053A_acle.pdf>`_
+
 AArch64
 -------
 
 * `ARMv8 Instruction Set Overview <http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.genc010197a/index.html>`_
 
+* `ARM C Language Extensions <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053a/IHI0053A_acle.pdf>`_
+
 Itanium (ia64)
 --------------
 
diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst
index 43bdc85..0655559 100644
--- a/docs/DeveloperPolicy.rst
+++ b/docs/DeveloperPolicy.rst
@@ -260,7 +260,7 @@ quality patches.  If you would like commit access, please send an email to
    from, e.g. "J. Random Hacker <hacker@yoyodyne.com>".
 
 #. A "password hash" of the password you want to use, e.g. "``2ACR96qjUqsyM``".
-   Note that you don't ever tell us what your password is, you just give it to
+   Note that you don't ever tell us what your password is; you just give it to
    us in an encrypted form.  To get this, run "``htpasswd``" (a utility that
    comes with apache) in crypt mode (often enabled with "``-d``"), or find a web
    page that will do it for you.
@@ -269,17 +269,17 @@ Once you've been granted commit access, you should be able to check out an LLVM
 tree with an SVN URL of "https://username@llvm.org/..." instead of the normal
 anonymous URL of "http://llvm.org/...".  The first time you commit you'll have
 to type in your password.  Note that you may get a warning from SVN about an
-untrusted key, you can ignore this.  To verify that your commit access works,
+untrusted key; you can ignore this.  To verify that your commit access works,
 please do a test commit (e.g. change a comment or add a blank line).  Your first
 commit to a repository may require the autogenerated email to be approved by a
-mailing list.  This is normal, and will be done when the mailing list owner has
+mailing list.  This is normal and will be done when the mailing list owner has
 time.
 
 If you have recently been granted commit access, these policies apply:
 
 #. You are granted *commit-after-approval* to all parts of LLVM.  To get
    approval, submit a `patch`_ to `llvm-commits
-   <http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_. When approved
+   <http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_. When approved,
    you may commit it yourself.
 
 #. You are allowed to commit patches without approval which you think are
@@ -291,7 +291,7 @@ If you have recently been granted commit access, these policies apply:
 #. You are allowed to commit patches without approval to those portions of LLVM
    that you have contributed or maintain (i.e., have been assigned
    responsibility for), with the proviso that such commits must not break the
-   build.  This is a "trust but verify" policy and commits of this nature are
+   build.  This is a "trust but verify" policy, and commits of this nature are
    reviewed after they are committed.
 
 #. Multiple violations of these policies or a single egregious violation may
@@ -300,7 +300,7 @@ If you have recently been granted commit access, these policies apply:
 In any case, your changes are still subject to `code review`_ (either before or
 after they are committed, depending on the nature of the change).  You are
 encouraged to review other peoples' patches as well, but you aren't required
-to.
+to do so.
 
 .. _discuss the change/gather consensus:
 
diff --git a/docs/ExtendingLLVM.rst b/docs/ExtendingLLVM.rst
index 3d8e9ee..3ae676a 100644
--- a/docs/ExtendingLLVM.rst
+++ b/docs/ExtendingLLVM.rst
@@ -45,7 +45,7 @@ function and then be turned into an instruction if warranted.
    what the restrictions are.  Talk to other people about it so that you are
    sure it's a good idea.
 
-#. ``llvm/include/llvm/Intrinsics*.td``:
+#. ``llvm/include/llvm/IR/Intrinsics*.td``:
 
    Add an entry for your intrinsic.  Describe its memory access characteristics
    for optimization (this controls whether it will be DCE'd, CSE'd, etc). Note
diff --git a/docs/Extensions.rst b/docs/Extensions.rst
new file mode 100644
index 0000000..062804a
--- /dev/null
+++ b/docs/Extensions.rst
@@ -0,0 +1,39 @@
+===============
+LLVM Extensions
+===============
+
+.. contents::
+   :local:
+   :depth: 1
+
+.. toctree::
+   :hidden:
+
+Introduction
+============
+
+This document describes extensions to tools and formats LLVM seeks compatibility
+with.
+
+Machine-specific Assembly Syntax
+================================
+
+X86/COFF-Dependent
+------------------
+
+The following additional relocation type is supported:
+
+**@IMGREL** (AT&T syntax only) generates an image-relative relocation that
+corresponds to the COFF relocation types ``IMAGE_REL_I386_DIR32NB`` (32-bit) or
+``IMAGE_REL_AMD64_ADDR32NB`` (64-bit).
+
+.. code-block:: gas
+
+  .text
+  fun:
+    mov foo@IMGREL(%ebx, %ecx, 4), %eax
+
+  .section .pdata
+    .long fun@IMGREL
+    .long (fun@imgrel + 0x3F)
+    .long $unwind$fun@imgrel
diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst
index 0bbbafc..6016b53 100644
--- a/docs/GettingStarted.rst
+++ b/docs/GettingStarted.rst
@@ -229,6 +229,8 @@ uses the package and provides other details.
 +--------------------------------------------------------------+-----------------+---------------------------------------------+
 | `libtool <http://savannah.gnu.org/projects/libtool>`_        | 1.5.22          | Shared library manager\ :sup:`4`            |
 +--------------------------------------------------------------+-----------------+---------------------------------------------+
+| `zlib <http://zlib.net>`_                                    | >=1.2.3.4       | Compression library\ :sup:`5`               |
++--------------------------------------------------------------+-----------------+---------------------------------------------+
 
 .. note::
 
@@ -243,6 +245,8 @@ uses the package and provides other details.
    #. If you want to make changes to the configure scripts, you will need GNU
       autoconf (2.60), and consequently, GNU M4 (version 1.4 or higher). You
       will also need automake (1.9.6). We only use aclocal from that package.
+   #. Optional, adds compression/uncompression capabilities to selected LLVM
+      tools.
 
 Additionally, your compilation host is expected to have the usual plethora of
 Unix utilities. Specifically:
@@ -659,35 +663,20 @@ This leaves your working directories on their master branches, so you'll need to
 ``checkout`` each working branch individually and ``rebase`` it on top of its
 parent branch.
 
-For those who wish to be able to update an llvm repo in a simpler fashion,
-consider placing the following Git script in your path under the name
-``git-svnup``:
-
-.. code-block:: bash
-
-  #!/bin/bash
-
-  STATUS=$(git status -s | grep -v "??")
-
-  if [ ! -z "$STATUS" ]; then
-      STASH="yes"
-      git stash >/dev/null
-  fi
-
-  git fetch
-  OLD_BRANCH=$(git rev-parse --abbrev-ref HEAD)
-  git checkout master 2> /dev/null
-  git svn rebase -l
-  git checkout $OLD_BRANCH 2> /dev/null
+For those who wish to be able to update an llvm repo/revert patches easily using
+git-svn, please look in the directory for the scripts ``git-svnup`` and
+``git-svnrevert``.
 
-  if [ ! -z $STASH ]; then
-      git stash pop >/dev/null
-  fi
+To perform the aforementioned update steps go into your source directory and
+just type ``git-svnup`` or ``git svnup`` and everything will just work.
 
-Then to perform the aforementioned update steps go into your source directory
-and just type ``git-svnup`` or ``git svnup`` and everything will just work.
+If one wishes to revert a commit with git-svn, but do not want the git hash to
+escape into the commit message, one can use the script ``git-svnrevert`` or
+``git svnrevert`` which will take in the git hash for the commit you want to
+revert, look up the appropriate svn revision, and output a message where all
+references to the git hash have been replaced with the svn revision.
 
-To commit back changes via git-svn, use ``dcommit``:
+To commit back changes via git-svn, use ``git svn dcommit``:
 
 .. code-block:: console
 
@@ -770,7 +759,7 @@ The following options can be used to set or enable LLVM specific options:
   case. The current set of targets is:
 
     ``arm, cpp, hexagon, mblaze, mips, mipsel, msp430, powerpc, ptx, sparc, spu,
-    x86, x86_64, xcore``.
+    systemz, x86, x86_64, xcore``.
 
 ``--enable-doxygen``
 
diff --git a/docs/GettingStartedVS.rst b/docs/GettingStartedVS.rst
index 4c80f2c..a80a9e2 100644
--- a/docs/GettingStartedVS.rst
+++ b/docs/GettingStartedVS.rst
@@ -137,15 +137,18 @@ Here's the short story for getting up and running quickly with LLVM:
 
      .. code-block:: bat
 
-        C:\..\llvm> llvm-lit test
+        C:\..\llvm> python ..\build\bin\llvm-lit --param build_config=Win32 --param build_mode=Debug --param llvm_site_config=../build/test/lit.site.cfg test
 
-     Note that quite a few of these test will fail.
+     This example assumes that Python is in your PATH variable, you
+     have built a Win32 Debug version of llvm with a standard out of
+     line build. You should not see any unexpected failures, but will
+     see many unsupported tests and expected failures.
 
      A specific test or test directory can be run with:
 
      .. code-block:: bat
 
-        C:\..\llvm> llvm-lit test/path/to/test
+        C:\..\llvm> python ..\build\bin\llvm-lit --param build_config=Win32 --param build_mode=Debug --param llvm_site_config=../build/test/lit.site.cfg test/path/to/test
 
 
 An Example Using the LLVM Tool Chain
diff --git a/docs/LLVMBuild.rst b/docs/LLVMBuild.rst
index d9215dd..040b044 100644
--- a/docs/LLVMBuild.rst
+++ b/docs/LLVMBuild.rst
@@ -123,8 +123,8 @@ the file format is below:
    boolean_property_name = 1 (or 0)
 
 LLVMBuild files are expected to define a strict set of sections and
-properties. An typical component description file for a library
-component would look typically look like the following example:
+properties. A typical component description file for a library
+component would look like the following example:
 
 .. code-block:: ini
 
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 659f02a..7743ff0 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -719,12 +719,17 @@ Currently, only the following parameter attributes are defined:
 ``nest``
     This indicates that the pointer parameter can be excised using the
     :ref:`trampoline intrinsics <int_trampoline>`. This is not a valid
-    attribute for return values.
-``nobuiltin``
-    This indicates that the callee function at a call site is not
-    recognized as a built-in function. LLVM will retain the original call
-    and not replace it with equivalent code based on the semantics of the
-    built-in function.
+    attribute for return values and can only be applied to one parameter.
+
+``returned``
+    This indicates that the value of the function always returns the value
+    of the parameter as its return value. This is an optimization hint to
+    the code generator when generating the caller, allowing tail call
+    optimization and omission of register saves and restores in some cases;
+    it is not checked or enforced when generating the callee. The parameter
+    and the function return type must be valid operands for the
+    :ref:`bitcast instruction <i_bitcast>`. This is not a valid attribute for
+    return values and can only be applied to one parameter.
 
 .. _gc:
 
@@ -764,10 +769,10 @@ inlined, has a stack alignment of 4, and which shouldn't use SSE instructions:
 .. code-block:: llvm
 
    ; Target-independent attributes:
-   #0 = attributes { alwaysinline alignstack=4 }
+   attributes #0 = { alwaysinline alignstack=4 }
 
    ; Target-dependent attributes:
-   #1 = attributes { "no-sse" }
+   attributes #1 = { "no-sse" }
 
    ; Function @f has attributes: alwaysinline, alignstack=4, and "no-sse".
    define void @f() #0 #1 { ... }
@@ -814,6 +819,12 @@ example:
 ``naked``
     This attribute disables prologue / epilogue emission for the
     function. This can have very system-specific consequences.
+``nobuiltin``
+    This indicates that the callee function at a call site is not
+    recognized as a built-in function. LLVM will retain the original call
+    and not replace it with equivalent code based on the semantics of the
+    built-in function. This is only valid at call sites, not on function
+    declarations or definitions.
 ``noduplicate``
     This attribute indicates that calls to the function cannot be
     duplicated. A call to a ``noduplicate`` function may be moved
@@ -1843,11 +1854,11 @@ double, and there are three forms of long double. The 80-bit format used
 by x86 is represented as ``0xK`` followed by 20 hexadecimal digits. The
 128-bit format used by PowerPC (two adjacent doubles) is represented by
 ``0xM`` followed by 32 hexadecimal digits. The IEEE 128-bit format is
-represented by ``0xL`` followed by 32 hexadecimal digits; no currently
-supported target uses this format. Long doubles will only work if they
-match the long double format on your target. The IEEE 16-bit format
-(half precision) is represented by ``0xH`` followed by 4 hexadecimal
-digits. All hexadecimal formats are big-endian (sign bit at the left).
+represented by ``0xL`` followed by 32 hexadecimal digits. Long doubles
+will only work if they match the long double format on your target.
+The IEEE 16-bit format (half precision) is represented by ``0xH``
+followed by 4 hexadecimal digits. All hexadecimal formats are big-endian
+(sign bit at the left).
 
 There are no constants of type x86mmx.
 
@@ -2857,11 +2868,10 @@ All globals of this sort should have a section specified as
 The '``llvm.used``' Global Variable
 -----------------------------------
 
-The ``@llvm.used`` global is an array with i8\* element type which has
-:ref:`appending linkage <linkage_appending>`. This array contains a list of
-pointers to global variables and functions which may optionally have a
-pointer cast formed of bitcast or getelementptr. For example, a legal
-use of it is:
+The ``@llvm.used`` global is an array which has :ref:`appending linkage
+<linkage_appending>`. This array contains a list of pointers to global
+variables, functions and aliases which may optionally have a pointer cast formed
+of bitcast or getelementptr. For example, a legal use of it is:
 
 .. code-block:: llvm
 
@@ -2873,13 +2883,13 @@ use of it is:
        i8* bitcast (i32* @Y to i8*)
     ], section "llvm.metadata"
 
-If a global variable appears in the ``@llvm.used`` list, then the
-compiler, assembler, and linker are required to treat the symbol as if
-there is a reference to the global that it cannot see. For example, if a
-variable has internal linkage and no references other than that from the
-``@llvm.used`` list, it cannot be deleted. This is commonly used to
-represent references from inline asms and other things the compiler
-cannot "see", and corresponds to "``attribute((used))``" in GNU C.
+If a symbol appears in the ``@llvm.used`` list, then the compiler, assembler,
+and linker are required to treat the symbol as if there is a reference to the
+symbol that it cannot see. For example, if a variable has internal linkage and
+no references other than that from the ``@llvm.used`` list, it cannot be
+deleted. This is commonly used to represent references from inline asms and
+other things the compiler cannot "see", and corresponds to
+"``attribute((used))``" in GNU C.
 
 On some targets, the code generator must emit a directive to the
 assembler or object file to prevent the assembler and linker from
@@ -4534,7 +4544,7 @@ The '``load``' instruction is used to read from memory.
 Arguments:
 """"""""""
 
-The argument to the '``load``' instruction specifies the memory address
+The argument to the ``load`` instruction specifies the memory address
 from which to load. The pointer must point to a :ref:`first
 class <t_firstclass>` type. If the ``load`` is marked as ``volatile``,
 then the optimizer is not allowed to modify the number or order of
@@ -4555,14 +4565,14 @@ any defined semantics for atomic loads.
 
 The optional constant ``align`` argument specifies the alignment of the
 operation (that is, the alignment of the memory address). A value of 0
-or an omitted ``align`` argument means that the operation has the abi
+or an omitted ``align`` argument means that the operation has the ABI
 alignment for the target. It is the responsibility of the code emitter
 to ensure that the alignment information is correct. Overestimating the
 alignment results in undefined behavior. Underestimating the alignment
 may produce less efficient code. An alignment of 1 is always safe.
 
 The optional ``!nontemporal`` metadata must reference a single
-metatadata name <index> corresponding to a metadata node with one
+metatadata name ``<index>`` corresponding to a metadata node with one
 ``i32`` entry of value 1. The existence of the ``!nontemporal``
 metatadata on the instruction tells the optimizer and code generator
 that this load is not expected to be reused in the cache. The code
@@ -4570,7 +4580,7 @@ generator may select special instructions to save cache bandwidth, such
 as the ``MOVNT`` instruction on x86.
 
 The optional ``!invariant.load`` metadata must reference a single
-metatadata name <index> corresponding to a metadata node with no
+metatadata name ``<index>`` corresponding to a metadata node with no
 entries. The existence of the ``!invariant.load`` metatadata on the
 instruction tells the optimizer and code generator that this load
 address points to memory which does not change value during program
@@ -4618,10 +4628,10 @@ The '``store``' instruction is used to write to memory.
 Arguments:
 """"""""""
 
-There are two arguments to the '``store``' instruction: a value to store
-and an address at which to store it. The type of the '``<pointer>``'
+There are two arguments to the ``store`` instruction: a value to store
+and an address at which to store it. The type of the ``<pointer>``
 operand must be a pointer to the :ref:`first class <t_firstclass>` type of
-the '``<value>``' operand. If the ``store`` is marked as ``volatile``,
+the ``<value>`` operand. If the ``store`` is marked as ``volatile``,
 then the optimizer is not allowed to modify the number or order of
 execution of this ``store`` with other :ref:`volatile
 operations <volatile>`.
@@ -4638,18 +4648,18 @@ has undefined behavior if the alignment is not set to a value which is
 at least the size in bytes of the pointee. ``!nontemporal`` does not
 have any defined semantics for atomic stores.
 
-The optional constant "align" argument specifies the alignment of the
+The optional constant ``align`` argument specifies the alignment of the
 operation (that is, the alignment of the memory address). A value of 0
-or an omitted "align" argument means that the operation has the abi
+or an omitted ``align`` argument means that the operation has the ABI
 alignment for the target. It is the responsibility of the code emitter
 to ensure that the alignment information is correct. Overestimating the
-alignment results in an undefined behavior. Underestimating the
+alignment results in undefined behavior. Underestimating the
 alignment may produce less efficient code. An alignment of 1 is always
 safe.
 
-The optional !nontemporal metadata must reference a single metatadata
-name <index> corresponding to a metadata node with one i32 entry of
-value 1. The existence of the !nontemporal metatadata on the instruction
+The optional ``!nontemporal`` metadata must reference a single metatadata
+name ``<index>`` corresponding to a metadata node with one ``i32`` entry of
+value 1. The existence of the ``!nontemporal`` metatadata on the instruction
 tells the optimizer and code generator that this load is not expected to
 be reused in the cache. The code generator may select special
 instructions to save cache bandwidth, such as the MOVNT instruction on
@@ -4658,8 +4668,8 @@ x86.
 Semantics:
 """"""""""
 
-The contents of memory are updated to contain '``<value>``' at the
-location specified by the '``<pointer>``' operand. If '``<value>``' is
+The contents of memory are updated to contain ``<value>`` at the
+location specified by the ``<pointer>`` operand. If ``<value>`` is
 of scalar type then the number of bytes written does not exceed the
 minimum number of bytes needed to hold all bits of the type. For
 example, storing an ``i24`` writes at most three bytes. When writing a
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 3ca5560..f2afdc9 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -5,12 +5,6 @@ LLVM 3.3 Release Notes
 .. contents::
     :local:
 
-.. warning::
-   These are in-progress notes for the upcoming LLVM 3.3 release.  You may
-   prefer the `LLVM 3.2 Release Notes <http://llvm.org/releases/3.2/docs
-   /ReleaseNotes.html>`_.
-
-
 Introduction
 ============
 
@@ -34,13 +28,6 @@ page <http://llvm.org/releases/>`_.
 Non-comprehensive list of changes in this release
 =================================================
 
-.. NOTE
-   For small 1-3 sentence descriptions, just add an entry at the end of
-   this list. If your description won't fit comfortably in one bullet
-   point (e.g. maybe you would like to give an example of the
-   functionality, or simply have a lot to talk about), see the `NOTE` below
-   for adding a new subsection.
-
 * The CellSPU port has been removed.  It can still be found in older versions.
 
 * The IR-level extended linker APIs (for example, to link bitcode files out of
@@ -70,17 +57,15 @@ Non-comprehensive list of changes in this release
   examples of the new syntax. The old syntax using register classes still
   works, but it will be removed in a future LLVM release.
 
-* ... next change ...
+* MCJIT now supports exception handling. Support for it in the old jit will be
+  removed in the 3.4 release.
 
-.. NOTE
-   If you would like to document a larger change, then you can add a
-   subsection about it right here. You can copy the following boilerplate
-   and un-indent it (the indentation causes it to be inside this comment).
+* Command line options can now be grouped into categories which are shown in
+  the output of ``-help``. See :ref:`grouping options into categories`.
 
-   Special New Feature
-   -------------------
-
-   Makes programs 10x faster by doing Special New Thing.
+* The appearance of command line options in ``-help`` that are inherited by
+  linking with libraries that use the LLVM Command line support library can now
+  be modified at runtime. See :ref:`cl::getRegisteredOptions`.
 
 AArch64 target
 --------------
@@ -99,9 +84,59 @@ GNU-style thread local storage and inline assembly.
 Hexagon Target
 --------------
 
-- Removed support for legacy hexagonv2 and hexagonv3 processor
-  architectures which are no longer in use. Currently supported
-  architectures are hexagonv4 and hexagonv5.
+Removed support for legacy hexagonv2 and hexagonv3 processor architectures which
+are no longer in use. Currently supported architectures are hexagonv4 and
+hexagonv5.
+
+Mips target
+--------------
+
+New features and improvements:
+
+- Clang driver
+ - Support for Sourcery CodeBench Mips toolchain directories tree.
+ - Support for new command line options including:
+  - -mxgot/-mno-xgot
+  - -EL / -EB
+  - -mmicromips / -mno-micromips
+  - -msingle-float / -mdouble-float
+  - -mabi=32 (o32 abi) and -mabi=64 (n64 abi)
+ - Previously, options such as -mips16, -mmicromips, -mdsp and -mdspr2 were
+   not passed to the assembler. This issue has been fixed.
+
+- A number of changes have been made to improve the quality of DSP-ASE code
+  generation.
+ - Multiply and multiply-accumulate instructions can now use all four
+   accumulators.
+ - Instruction selection patterns have been added so that DSP instructions
+   are emitted without having to use builtins.
+
+- Delay slot filler pass can now search successor blocks for instructions to
+  fill delay slots (use option -disable-mips-df-succbb-search=false).
+
+PowerPC Target
+--------------
+
+New features and improvements:
+
+- PowerPC now supports an assembly parser.
+- Support added for thread-local storage.  64-bit ELF subtarget only.
+- Support added for medium and large code model (-mcmodel=medium,large).
+  Medium code model is now the default.  64-bit ELF subtarget only.
+- Improved register allocation (fewer reserved registers).
+- 64-bit atomic load and store are now supported.
+- Improved code generation for unaligned memory accesses of scalar types.
+- Improved performance of floating-point divide and square root
+  with -ffast-math.
+- Support for predicated returns.
+- Improved code generation for comparisons.
+- Support added for inline setjmp and longjmp.
+- Support added for many instructions introduced in PowerISA 2.04, 2.05,
+  and 2.06.
+- Improved spill code for vector registers.
+- Support added for -mno-altivec.
+- ABI compatibility fixes for complex parameters, 128-bit integer parameters,
+  and varargs functions.  64-bit ELF subtarget only.
 
 Loop Vectorizer
 ---------------
@@ -109,24 +144,164 @@ Loop Vectorizer
 We've continued the work on the loop vectorizer. The loop vectorizer now
 has the following features:
 
-- Loops with unknown trip count.
-- Runtime checks of pointers
-- Reductions, Inductions
-- If Conversion
-- Pointer induction variables
-- Reverse iterators
-- Vectorization of mixed types
-- Vectorization of function calls
-- Partial unrolling during vectorization
+- Loops with unknown trip counts.
+- Runtime checks of pointers.
+- Reductions, Inductions.
+- Min/Max reductions of integers.
+- If Conversion.
+- Pointer induction variables.
+- Reverse iterators.
+- Vectorization of mixed types.
+- Vectorization of function calls.
+- Partial unrolling during vectorization.
+
+The loop vectorizer is now enabled by default for -O3.
+
+SLP Vectorizer
+--------------
+
+LLVM now has a new SLP vectorizer. The new SLP vectorizer is not enabled by
+default but can be enabled using the clang flag ``-fslp-vectorize``. The
+BB-vectorizer can also be enabled using the command line flag
+``-fslp-vectorize-aggressive``.
 
 R600 Backend
 ------------
 
-The R600 backend was added in this release, it supports AMD GPUs
-(HD2XXX - HD7XXX).  This backend is used in AMD's Open Source
-graphics / compute drivers which are developed as part of the `Mesa3D
-<http://www.mesa3d.org>`_ project.
+The R600 backend was added in this release, it supports AMD GPUs (HD2XXX -
+HD7XXX).  This backend is used in AMD's Open Source graphics / compute drivers
+which are developed as part of the `Mesa3D <http://www.mesa3d.org>`_ project.
+
+SystemZ/s390x Backend
+---------------------
+
+LLVM and clang now support IBM's z/Architecture.  At present this support
+is restricted to GNU/Linux (GNU triplet s390x-linux-gnu) and requires
+z10 or greater.
+
+
+Sub-project Status Update
+=========================
+
+In addition to the core LLVM 3.3 distribution of production-quality compiler
+infrastructure, the LLVM project includes sub-projects that use the LLVM core
+and share the same distribution license.  This section provides updates on these
+sub-projects.
+
+
+DragonEgg: GCC front-ends, LLVM back-end
+----------------------------------------
+
+`DragonEgg <http://dragonegg.llvm.org/>`_ is a
+`GCC plugin <http://gcc.gnu.org/wiki/plugins>`_ that replaces GCC's optimizers
+and code generators with LLVM's.  It works with gcc-4.5, 4.6, 4.7 and 4.8, can
+target the x86-32/x86-64 and ARM processor families, and has been successfully
+used on the Darwin, FreeBSD, KFreeBSD, Linux and OpenBSD platforms.  It fully
+supports Ada, C, C++ and Fortran.  It has partial support for Go, Java, Obj-C
+and Obj-C++.  Note that gcc-4.6 is the best supported version, and that Ada in
+particular doesn't work well with gcc-4.7 and newer.
+
+The `3.3 release <http://llvm.org/apt/>`_ has the following notable changes.
+
+- supports gcc-4.8 (requires gcc-4.8.1 or newer)
+- object files can be written directly using LLVM's integrated assembler
+- produces saner debug info
+- bitfields can now contain arbitrary scalar types (useful for Ada)
+
+
+LLDB: Low Level Debugger
+------------------------
+
+`LLDB <http://lldb.llvm.org/>`_ is a ground-up implementation of a command-line
+debugger, as well as a debugger API that can be used from scripts and other
+applications. LLDB uses the following components of the LLVM core distribution
+to support the latest language features and target support:
+
+- the Clang parser for high-quality parsing of C, C++ and Objective C
+- the LLVM disassembler
+- the LLVM JIT compiler (MCJIT) for expression evaluation
+
+The `3.3 release <http://llvm.org/apt/>`_ has the following notable changes.
+
+Linux Features:
+
+- Support for watchpoints
+- vim integration for lldb commands and program status using a `vim plug-in
+  <http://llvm.org/svn/llvm-project/lldb/trunk/utils/vim-lldb/README>`_
+- Improved register support including vector registers
+- Builds with cmake/ninja/auto-tools/clang 3.3/gcc 4.6
+
+Linux Improvements:
+
+- Debugging multi-threaded programs
+- Debugging i386 programs
+- Process list, attach and fork
+- Expression evaluation
+
+
+External Open Source Projects Using LLVM 3.3
+============================================
+
+An exciting aspect of LLVM is that it is used as an enabling technology for a
+lot of other language and tools projects. This section lists some of the
+projects that have already been updated to work with LLVM 3.3.
+
+
+Portable Computing Language (pocl)
+----------------------------------
+
+In addition to producing an easily portable open source OpenCL implementation,
+another major goal of `pocl <http://pocl.sourceforge.net/>`_ is improving
+performance portability of OpenCL programs with compiler optimizations, reducing
+the need for target-dependent manual optimizations. An important part of pocl is
+a set of LLVM passes used to statically parallelize multiple work-items with the
+kernel compiler, even in the presence of work-group barriers. This enables
+static parallelization of the fine-grained static concurrency in the work groups
+in multiple ways.
+
+TTA-based Co-design Environment (TCE)
+-------------------------------------
+
+`TCE <http://tce.cs.tut.fi/>`_ is a toolset for designing new processors based
+on the Transport triggered architecture (TTA).  The toolset provides a complete
+co-design flow from C/C++ programs down to synthesizable VHDL/Verilog and
+parallel program binaries.  Processor customization points include the register
+files, function units, supported operations, and the interconnection network.
+
+TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent
+optimizations and also for parts of code generation. It generates new LLVM-based
+code generators "on the fly" for the designed TTA processors and loads them in
+to the compiler backend as runtime libraries to avoid per-target recompilation
+of larger parts of the compiler chain.
+
+Just-in-time Adaptive Decoder Engine (Jade)
+-------------------------------------------
+
+`Jade <https://github.com/orcc/jade>`_ (Just-in-time Adaptive Decoder Engine) is
+a generic video decoder engine using LLVM for just-in-time compilation of video
+decoder configurations. Those configurations are designed by MPEG Reconfigurable
+Video Coding (RVC) committee. MPEG RVC standard is built on a stream-based
+dataflow representation of decoders. It is composed of a standard library of
+coding tools written in RVC-CAL language and a dataflow configuration --- block
+diagram --- of a decoder.
+
+Jade project is hosted as part of the Open RVC-CAL Compiler (`Orcc
+<http://orcc.sf.net>`_) and requires it to translate the RVC-CAL standard
+library of video coding tools into an LLVM assembly code.
+
+LDC - the LLVM-based D compiler
+-------------------------------
+
+`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
+pragmatically combines efficiency, control, and modeling power, with safety and
+programmer productivity. D supports powerful concepts like Compile-Time Function
+Execution (CTFE) and Template Meta-Programming, provides an innovative approach
+to concurrency and offers many classical paradigms.
 
+`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
+combined with LLVM as backend to produce efficient native code. LDC targets
+x86/x86_64 systems like Linux, OS X and Windows and also Linux/PPC64. Ports to
+other architectures like ARM are underway.
 
 
 Additional Information
diff --git a/docs/Vectorizers.rst b/docs/Vectorizers.rst
index e2d3667..d565c21 100644
--- a/docs/Vectorizers.rst
+++ b/docs/Vectorizers.rst
@@ -6,10 +6,10 @@ Auto-Vectorization in LLVM
    :local:
 
 LLVM has two vectorizers: The :ref:`Loop Vectorizer <loop-vectorizer>`,
-which operates on Loops, and the :ref:`Basic Block Vectorizer
-<bb-vectorizer>`, which optimizes straight-line code. These vectorizers
+which operates on Loops, and the :ref:`SLP Vectorizer
+<slp-vectorizer>`, which optimizes straight-line code. These vectorizers
 focus on different optimization opportunities and use different techniques.
-The BB vectorizer merges multiple scalars that are found in the code into
+The SLP vectorizer merges multiple scalars that are found in the code into
 vectors while the Loop Vectorizer widens instructions in the original loop
 to operate on multiple consecutive loop iterations.
 
@@ -21,19 +21,13 @@ The Loop Vectorizer
 Usage
 -----
 
-LLVM's Loop Vectorizer is now available and will be useful for many people.
-It is not enabled by default, but can be enabled through clang using the
-command line flag:
+LLVM's Loop Vectorizer is now enabled by default for -O3.
+We plan to enable parts of the Loop Vectorizer on -O2 and -Os in future releases.
+The vectorizer can be disabled using the command line:
 
 .. code-block:: console
 
-   $ clang -fvectorize -O3 file.c
-
-If the ``-fvectorize`` flag is used then the loop vectorizer will be enabled
-when running with ``-O3``, ``-O2``. When ``-Os`` is used, the loop vectorizer
-will only vectorize loops that do not require a major increase in code size.
-
-We plan to enable the Loop Vectorizer by default as part of the LLVM 3.3 release.
+   $ clang ... -fno-vectorize  file.c
 
 Command line flags
 ^^^^^^^^^^^^^^^^^^
@@ -299,25 +293,15 @@ And Linpack-pc with the same configuration. Result is Mflops, higher is better.
 
 .. image:: linpack-pc.png
 
-.. _bb-vectorizer:
+.. _slp-vectorizer:
 
-The Basic Block Vectorizer
-==========================
-
-Usage
-------
-
-The Basic Block Vectorizer is not enabled by default, but it can be enabled
-through clang using the command line flag:
-
-.. code-block:: console
-
-   $ clang -fslp-vectorize file.c
+The SLP Vectorizer
+==================
 
 Details
 -------
 
-The goal of basic-block vectorization (a.k.a. superword-level parallelism) is
+The goal of SLP vectorization (a.k.a. superword-level parallelism) is
 to combine similar independent instructions within simple control-flow regions
 into vector instructions. Memory accesses, arithemetic operations, comparison
 operations and some math functions can all be vectorized using this technique
@@ -329,10 +313,50 @@ into vector operations.
 
 .. code-block:: c++
 
-  int foo(int a1, int a2, int b1, int b2) {
-    int r1 = a1*(a1 + b1)/b1 + 50*b1/a1;
-    int r2 = a2*(a2 + b2)/b2 + 50*b2/a2;
-    return r1 + r2;
+  void foo(int a1, int a2, int b1, int b2, int *A) {
+    A[0] = a1*(a1 + b1)/b1 + 50*b1/a1;
+    A[1] = a2*(a2 + b2)/b2 + 50*b2/a2;
   }
 
+The SLP-vectorizer has two phases, bottom-up, and top-down. The top-down vectorization
+phase is more aggressive, but takes more time to run.
+
+Usage
+------
+
+The SLP Vectorizer is not enabled by default, but it can be enabled
+through clang using the command line flag:
+
+.. code-block:: console
+
+   $ clang -fslp-vectorize file.c
+
+LLVM has a second basic block vectorization phase
+which is more compile-time intensive (The BB vectorizer). This optimization
+can be enabled through clang using the command line flag:
+
+.. code-block:: console
+
+   $ clang -fslp-vectorize-aggressive file.c
+
+
+The SLP vectorizer is in early development stages but can already vectorize
+and accelerate many programs in the LLVM test suite.
+
+=======================   ============
+Benchmark Name              Gain
+=======================   ============
+Misc/flops-7               -32.70%
+Misc/matmul_f64_4x4        -23.23%
+Olden/power                -21.45%
+Misc/flops-4               -14.90%
+ASC_Sequoia/AMGmk          -13.85%
+TSVC/LoopRerolling-flt     -11.76%
+Misc/flops-6               -9.70%
+Misc/flops-5               -8.54%
+Misc/flops                 -8.12%
+TSVC/NodeSplitting-dbl     -6.96%
+Misc-C++/sphereflake       -6.74%
+Ptrdist/yacr2              -6.31%
+=======================   ============
 
diff --git a/docs/index.rst b/docs/index.rst
index c3bb808..6b182da 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -160,6 +160,7 @@ For developers of applications which use LLVM as a library.
    ExtendingLLVM
    HowToSetUpLLVMStyleRTTI
    ProgrammersManual
+   Extensions
 
 :doc:`LLVM Language Reference Manual <LangRef>`
   Defines the LLVM intermediate representation and the assembly form of the
@@ -172,6 +173,9 @@ For developers of applications which use LLVM as a library.
   Introduction to the general layout of the LLVM sourcebase, important classes
   and APIs, and some tips & tricks.
 
+:doc:`Extensions`
+  LLVM-specific extensions to tools and formats LLVM seeks compatibility with.
+
 :doc:`CommandLine`
   Provides information on using the command line parsing library.
 
diff --git a/docs/tutorial/LangImpl1.rst b/docs/tutorial/LangImpl1.rst
index aa619cf..a2c5eee 100644
--- a/docs/tutorial/LangImpl1.rst
+++ b/docs/tutorial/LangImpl1.rst
@@ -55,7 +55,7 @@ in the various pieces. The structure of the tutorial is:
    - Because a lot of people are interested in using LLVM as a JIT,
    we'll dive right into it and show you the 3 lines it takes to add JIT
    support. LLVM is also useful in many other ways, but this is one
-   simple and "sexy" way to shows off its power. :)
+   simple and "sexy" way to show off its power. :)
 -  `Chapter #5 <LangImpl5.html>`_: Extending the Language: Control
    Flow - With the language up and running, we show how to extend it
    with control flow operations (if/then/else and a 'for' loop). This
diff --git a/examples/ExceptionDemo/CMakeLists.txt b/examples/ExceptionDemo/CMakeLists.txt
index 88c9ab7..ea818fa 100644
--- a/examples/ExceptionDemo/CMakeLists.txt
+++ b/examples/ExceptionDemo/CMakeLists.txt
@@ -1,6 +1,8 @@
-set(LLVM_LINK_COMPONENTS jit nativecodegen)
+set(LLVM_LINK_COMPONENTS jit mcjit nativecodegen)
 set(LLVM_REQUIRES_EH 1)
 
 add_llvm_example(ExceptionDemo
   ExceptionDemo.cpp
   )
+
+set_target_properties(ExceptionDemo PROPERTIES ENABLE_EXPORTS 1)
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index 264ef54..f9498a5 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -49,8 +49,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
@@ -418,6 +418,30 @@ static uintptr_t readSLEB128(const uint8_t **data) {
   return result;
 }
 
+unsigned getEncodingSize(uint8_t Encoding) {
+  if (Encoding == llvm::dwarf::DW_EH_PE_omit)
+    return 0;
+
+  switch (Encoding & 0x0F) {
+  case llvm::dwarf::DW_EH_PE_absptr:
+    return sizeof(uintptr_t);
+  case llvm::dwarf::DW_EH_PE_udata2:
+    return sizeof(uint16_t);
+  case llvm::dwarf::DW_EH_PE_udata4:
+    return sizeof(uint32_t);
+  case llvm::dwarf::DW_EH_PE_udata8:
+    return sizeof(uint64_t);
+  case llvm::dwarf::DW_EH_PE_sdata2:
+    return sizeof(int16_t);
+  case llvm::dwarf::DW_EH_PE_sdata4:
+    return sizeof(int32_t);
+  case llvm::dwarf::DW_EH_PE_sdata8:
+    return sizeof(int64_t);
+  default:
+    // not supported
+    abort();
+  }
+}
 
 /// Read a pointer encoded value and advance pointer
 /// See Variable Length Data in:
@@ -523,7 +547,8 @@ static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) {
 /// @returns whether or not a type info was found. False is returned if only
 ///          a cleanup was found
 static bool handleActionValue(int64_t *resultAction,
-                              struct OurExceptionType_t **classInfo,
+                              uint8_t TTypeEncoding,
+                              const uint8_t *ClassInfo,
                               uintptr_t actionEntry,
                               uint64_t exceptionClass,
                               struct _Unwind_Exception *exceptionObject) {
@@ -572,16 +597,22 @@ static bool handleActionValue(int64_t *resultAction,
 
     // Note: A typeOffset == 0 implies that a cleanup llvm.eh.selector
     //       argument has been matched.
-    if ((typeOffset > 0) &&
-        (type == (classInfo[-typeOffset])->type)) {
+    if (typeOffset > 0) {
 #ifdef DEBUG
       fprintf(stderr,
               "handleActionValue(...):actionValue <%d> found.\n",
               i);
 #endif
-      *resultAction = i + 1;
-      ret = true;
-      break;
+      unsigned EncSize = getEncodingSize(TTypeEncoding);
+      const uint8_t *EntryP = ClassInfo - typeOffset * EncSize;
+      uintptr_t P = readEncodedPointer(&EntryP, TTypeEncoding);
+      struct OurExceptionType_t *ThisClassInfo =
+        reinterpret_cast<struct OurExceptionType_t *>(P);
+      if (ThisClassInfo->type == type) {
+        *resultAction = i + 1;
+        ret = true;
+        break;
+      }
     }
 
 #ifdef DEBUG
@@ -633,7 +664,7 @@ static _Unwind_Reason_Code handleLsda(int version,
   // emitted dwarf code)
   uintptr_t funcStart = _Unwind_GetRegionStart(context);
   uintptr_t pcOffset = pc - funcStart;
-  struct OurExceptionType_t **classInfo = NULL;
+  const uint8_t *ClassInfo = NULL;
 
   // Note: See JITDwarfEmitter::EmitExceptionTable(...) for corresponding
   //       dwarf emission
@@ -653,7 +684,7 @@ static _Unwind_Reason_Code handleLsda(int version,
     // were flagged by type info arguments to llvm.eh.selector
     // intrinsic
     classInfoOffset = readULEB128(&lsda);
-    classInfo = (struct OurExceptionType_t**) (lsda + classInfoOffset);
+    ClassInfo = lsda + classInfoOffset;
   }
 
   // Walk call-site table looking for range that
@@ -714,7 +745,8 @@ static _Unwind_Reason_Code handleLsda(int version,
 
       if (actionEntry) {
         exceptionMatched = handleActionValue(&actionValue,
-                                             classInfo,
+                                             ttypeEncoding,
+                                             ClassInfo,
                                              actionEntry,
                                              exceptionClass,
                                              exceptionObject);
@@ -1921,17 +1953,22 @@ int main(int argc, char *argv[]) {
   Opts.JITExceptionHandling = true;
 
   llvm::InitializeNativeTarget();
+  llvm::InitializeNativeTargetAsmPrinter();
   llvm::LLVMContext &context = llvm::getGlobalContext();
   llvm::IRBuilder<> theBuilder(context);
 
   // Make the module, which holds all the code.
   llvm::Module *module = new llvm::Module("my cool jit", context);
 
+  llvm::JITMemoryManager *MemMgr = new llvm::SectionMemoryManager();
+
   // Build engine with JIT
   llvm::EngineBuilder factory(module);
   factory.setEngineKind(llvm::EngineKind::JIT);
   factory.setAllocateGVsWithCode(false);
   factory.setTargetOptions(Opts);
+  factory.setJITMemoryManager(MemMgr);
+  factory.setUseMCJIT(true);
   llvm::ExecutionEngine *executionEngine = factory.create();
 
   {
@@ -1975,6 +2012,8 @@ int main(int argc, char *argv[]) {
                               fpm,
                               "throwCppException");
 
+    executionEngine->finalizeObject();
+
     fprintf(stderr, "\nBegin module dump:\n\n");
 
     module->dump();
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index e85fb97..6b62f33 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -18,13 +18,6 @@
 #include "llvm/Support/DataTypes.h"
 
 #ifdef __cplusplus
-
-/* Need these includes to support the LLVM 'cast' template for the C++ 'wrap' 
-   and 'unwrap' conversion functions. */
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/PassRegistry.h"
-
 extern "C" {
 #endif
 
@@ -60,11 +53,6 @@ extern "C" {
  * with C++ due to name mangling. So in addition to C, this interface enables
  * tools written in such languages.
  *
- * When included into a C++ source file, also declares 'wrap' and 'unwrap'
- * helpers to perform opaque reference<-->pointer conversions. These helpers
- * are shorter and more tightly typed than writing the casts by hand when
- * authoring bindings. In assert builds, they will do runtime type checking.
- *
  * @{
  */
 
@@ -352,6 +340,63 @@ typedef enum {
   LLVMLandingPadFilter    /**< A filter clause  */
 } LLVMLandingPadClauseTy;
 
+typedef enum {
+  LLVMNotThreadLocal = 0,
+  LLVMGeneralDynamicTLSModel,
+  LLVMLocalDynamicTLSModel,
+  LLVMInitialExecTLSModel,
+  LLVMLocalExecTLSModel
+} LLVMThreadLocalMode;
+
+typedef enum {
+  LLVMAtomicOrderingNotAtomic = 0, /**< A load or store which is not atomic */
+  LLVMAtomicOrderingUnordered = 1, /**< Lowest level of atomicity, guarantees
+                                     somewhat sane results, lock free. */
+  LLVMAtomicOrderingMonotonic = 2, /**< guarantees that if you take all the 
+                                     operations affecting a specific address, 
+                                     a consistent ordering exists */
+  LLVMAtomicOrderingAcquire = 4, /**< Acquire provides a barrier of the sort 
+                                   necessary to acquire a lock to access other 
+                                   memory with normal loads and stores. */
+  LLVMAtomicOrderingRelease = 5, /**< Release is similar to Acquire, but with 
+                                   a barrier of the sort necessary to release 
+                                   a lock. */
+  LLVMAtomicOrderingAcquireRelease = 6, /**< provides both an Acquire and a 
+                                          Release barrier (for fences and 
+                                          operations which both read and write
+                                           memory). */
+  LLVMAtomicOrderingSequentiallyConsistent = 7 /**< provides Acquire semantics 
+                                                 for loads and Release 
+                                                 semantics for stores. 
+                                                 Additionally, it guarantees 
+                                                 that a total ordering exists 
+                                                 between all 
+                                                 SequentiallyConsistent 
+                                                 operations. */
+} LLVMAtomicOrdering;
+
+typedef enum {
+    LLVMAtomicRMWBinOpXchg, /**< Set the new value and return the one old */
+    LLVMAtomicRMWBinOpAdd, /**< Add a value and return the old one */
+    LLVMAtomicRMWBinOpSub, /**< Subtract a value and return the old one */
+    LLVMAtomicRMWBinOpAnd, /**< And a value and return the old one */
+    LLVMAtomicRMWBinOpNand, /**< Not-And a value and return the old one */
+    LLVMAtomicRMWBinOpOr, /**< OR a value and return the old one */
+    LLVMAtomicRMWBinOpXor, /**< Xor a value and return the old one */
+    LLVMAtomicRMWBinOpMax, /**< Sets the value if it's greater than the
+                             original using a signed comparison and return 
+                             the old one */
+    LLVMAtomicRMWBinOpMin, /**< Sets the value if it's Smaller than the
+                             original using a signed comparison and return 
+                             the old one */
+    LLVMAtomicRMWBinOpUMax, /**< Sets the value if it's greater than the
+                             original using an unsigned comparison and return 
+                             the old one */
+    LLVMAtomicRMWBinOpUMin /**< Sets the value if it's greater than the
+                             original using an unsigned comparison  and return 
+                             the old one */
+} LLVMAtomicRMWBinOp;
+
 /**
  * @}
  */
@@ -1057,24 +1102,24 @@ LLVMTypeRef LLVMX86MMXType(void);
         macro(SwitchInst)                   \
         macro(UnreachableInst)              \
         macro(ResumeInst)                   \
-    macro(UnaryInstruction)                 \
-      macro(AllocaInst)                     \
-      macro(CastInst)                       \
-        macro(BitCastInst)                  \
-        macro(FPExtInst)                    \
-        macro(FPToSIInst)                   \
-        macro(FPToUIInst)                   \
-        macro(FPTruncInst)                  \
-        macro(IntToPtrInst)                 \
-        macro(PtrToIntInst)                 \
-        macro(SExtInst)                     \
-        macro(SIToFPInst)                   \
-        macro(TruncInst)                    \
-        macro(UIToFPInst)                   \
-        macro(ZExtInst)                     \
-      macro(ExtractValueInst)               \
-      macro(LoadInst)                       \
-      macro(VAArgInst)
+      macro(UnaryInstruction)               \
+        macro(AllocaInst)                   \
+        macro(CastInst)                     \
+          macro(BitCastInst)                \
+          macro(FPExtInst)                  \
+          macro(FPToSIInst)                 \
+          macro(FPToUIInst)                 \
+          macro(FPTruncInst)                \
+          macro(IntToPtrInst)               \
+          macro(PtrToIntInst)               \
+          macro(SExtInst)                   \
+          macro(SIToFPInst)                 \
+          macro(TruncInst)                  \
+          macro(UIToFPInst)                 \
+          macro(ZExtInst)                   \
+        macro(ExtractValueInst)             \
+        macro(LoadInst)                     \
+        macro(VAArgInst)
 
 /**
  * @defgroup LLVMCCoreValueGeneral General APIs
@@ -1606,6 +1651,10 @@ LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar);
 void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal);
 LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar);
 void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant);
+LLVMThreadLocalMode LLVMGetThreadLocalMode(LLVMValueRef GlobalVar);
+void LLVMSetThreadLocalMode(LLVMValueRef GlobalVar, LLVMThreadLocalMode Mode);
+LLVMBool LLVMIsExternallyInitialized(LLVMValueRef GlobalVar);
+void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit);
 
 /**
  * @}
@@ -1694,6 +1743,13 @@ void LLVMSetGC(LLVMValueRef Fn, const char *Name);
 void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
 
 /**
+ * Add a target-dependent attribute to a fuction
+ * @see llvm::AttrBuilder::addAttribute()
+ */
+void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
+                                        const char *V);
+
+/**
  * Obtain an attribute from a function.
  *
  * @see llvm::Function::getAttributes()
@@ -2515,6 +2571,10 @@ LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef, LLVMValueRef Val,
                                 const char *Name);
 LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS,
                               LLVMValueRef RHS, const char *Name);
+LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op,  
+                                LLVMValueRef PTR, LLVMValueRef Val, 
+                                LLVMAtomicOrdering ordering, 
+                                LLVMBool singleThread);
 
 /**
  * @}
@@ -2560,6 +2620,8 @@ LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRange(const char *InputData,
 LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy(const char *InputData,
                                                               size_t InputDataLength,
                                                               const char *BufferName);
+const char *LLVMGetBufferStart(LLVMMemoryBufferRef MemBuf);
+size_t LLVMGetBufferSize(LLVMMemoryBufferRef MemBuf);
 void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf);
 
 /**
@@ -2669,100 +2731,6 @@ LLVMBool LLVMIsMultithreaded();
 
 #ifdef __cplusplus
 }
-
-namespace llvm {
-  class MemoryBuffer;
-  class PassManagerBase;
-  
-  #define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)   \
-    inline ty *unwrap(ref P) {                          \
-      return reinterpret_cast<ty*>(P);                  \
-    }                                                   \
-                                                        \
-    inline ref wrap(const ty *P) {                      \
-      return reinterpret_cast<ref>(const_cast<ty*>(P)); \
-    }
-  
-  #define DEFINE_ISA_CONVERSION_FUNCTIONS(ty, ref)  \
-    DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)         \
-                                                        \
-    template<typename T>                                \
-    inline T *unwrap(ref P) {                           \
-      return cast<T>(unwrap(P));                        \
-    }
-  
-  #define DEFINE_STDCXX_CONVERSION_FUNCTIONS(ty, ref)   \
-    DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)         \
-                                                        \
-    template<typename T>                                \
-    inline T *unwrap(ref P) {                           \
-      T *Q = (T*)unwrap(P);                             \
-      assert(Q && "Invalid cast!");                     \
-      return Q;                                         \
-    }
-  
-  DEFINE_ISA_CONVERSION_FUNCTIONS   (Type,               LLVMTypeRef          )
-  DEFINE_ISA_CONVERSION_FUNCTIONS   (Value,              LLVMValueRef         )
-  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Module,             LLVMModuleRef        )
-  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(BasicBlock,         LLVMBasicBlockRef    )
-  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRBuilder<>,        LLVMBuilderRef       )
-  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(MemoryBuffer,       LLVMMemoryBufferRef  )
-  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLVMContext,        LLVMContextRef       )
-  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Use,                LLVMUseRef           )
-  DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassManagerBase,    LLVMPassManagerRef   )
-  DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassRegistry,       LLVMPassRegistryRef  )
-  /* LLVMModuleProviderRef exists for historical reasons, but now just holds a
-   * Module.
-   */
-  inline Module *unwrap(LLVMModuleProviderRef MP) {
-    return reinterpret_cast<Module*>(MP);
-  }
-  
-  #undef DEFINE_STDCXX_CONVERSION_FUNCTIONS
-  #undef DEFINE_ISA_CONVERSION_FUNCTIONS
-  #undef DEFINE_SIMPLE_CONVERSION_FUNCTIONS
-
-  /* Specialized opaque context conversions.
-   */
-  inline LLVMContext **unwrap(LLVMContextRef* Tys) {
-    return reinterpret_cast<LLVMContext**>(Tys);
-  }
-  
-  inline LLVMContextRef *wrap(const LLVMContext **Tys) {
-    return reinterpret_cast<LLVMContextRef*>(const_cast<LLVMContext**>(Tys));
-  }
-  
-  /* Specialized opaque type conversions.
-   */
-  inline Type **unwrap(LLVMTypeRef* Tys) {
-    return reinterpret_cast<Type**>(Tys);
-  }
-  
-  inline LLVMTypeRef *wrap(Type **Tys) {
-    return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys));
-  }
-  
-  /* Specialized opaque value conversions.
-   */ 
-  inline Value **unwrap(LLVMValueRef *Vals) {
-    return reinterpret_cast<Value**>(Vals);
-  }
-  
-  template<typename T>
-  inline T **unwrap(LLVMValueRef *Vals, unsigned Length) {
-    #ifdef DEBUG
-    for (LLVMValueRef *I = Vals, *E = Vals + Length; I != E; ++I)
-      cast<T>(*I);
-    #endif
-    (void)Length;
-    return reinterpret_cast<T**>(Vals);
-  }
-  
-  inline LLVMValueRef *wrap(const Value **Vals) {
-    return reinterpret_cast<LLVMValueRef*>(const_cast<Value**>(Vals));
-  }
-}
-
 #endif /* !defined(__cplusplus) */
 
 #endif /* !defined(LLVM_C_CORE_H) */
diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h
index cb77bb2..8fae77d 100644
--- a/include/llvm-c/ExecutionEngine.h
+++ b/include/llvm-c/ExecutionEngine.h
@@ -21,6 +21,7 @@
 
 #include "llvm-c/Core.h"
 #include "llvm-c/Target.h"
+#include "llvm-c/TargetMachine.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -34,11 +35,19 @@ extern "C" {
  */
 
 void LLVMLinkInJIT(void);
+void LLVMLinkInMCJIT(void);
 void LLVMLinkInInterpreter(void);
 
 typedef struct LLVMOpaqueGenericValue *LLVMGenericValueRef;
 typedef struct LLVMOpaqueExecutionEngine *LLVMExecutionEngineRef;
 
+struct LLVMMCJITCompilerOptions {
+  unsigned OptLevel;
+  LLVMCodeModel CodeModel;
+  LLVMBool NoFramePointerElim;
+  LLVMBool EnableFastISel;
+};
+
 /*===-- Operations on generic values --------------------------------------===*/
 
 LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty,
@@ -75,6 +84,31 @@ LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
                                         unsigned OptLevel,
                                         char **OutError);
 
+void LLVMInitializeMCJITCompilerOptions(
+  struct LLVMMCJITCompilerOptions *Options, size_t SizeOfOptions);
+
+/**
+ * Create an MCJIT execution engine for a module, with the given options. It is
+ * the responsibility of the caller to ensure that all fields in Options up to
+ * the given SizeOfOptions are initialized. It is correct to pass a smaller
+ * value of SizeOfOptions that omits some fields. The canonical way of using
+ * this is:
+ *
+ * LLVMMCJITCompilerOptions options;
+ * LLVMInitializeMCJITCompilerOptions(&options, sizeof(options));
+ * ... fill in those options you care about
+ * LLVMCreateMCJITCompilerForModule(&jit, mod, &options, sizeof(options),
+ *                                  &error);
+ *
+ * Note that this is also correct, though possibly suboptimal:
+ *
+ * LLVMCreateMCJITCompilerForModule(&jit, mod, 0, 0, &error);
+ */
+LLVMBool LLVMCreateMCJITCompilerForModule(
+  LLVMExecutionEngineRef *OutJIT, LLVMModuleRef M,
+  struct LLVMMCJITCompilerOptions *Options, size_t SizeOfOptions,
+  char **OutError);
+
 /** Deprecated: Use LLVMCreateExecutionEngineForModule instead. */
 LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
                                    LLVMModuleProviderRef MP,
@@ -123,7 +157,8 @@ LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
 LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
                           LLVMValueRef *OutFn);
 
-void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn);
+void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE,
+                                     LLVMValueRef Fn);
 
 LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE);
 
@@ -137,27 +172,7 @@ void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global);
  */
 
 #ifdef __cplusplus
-}
-
-namespace llvm {
-  struct GenericValue;
-  class ExecutionEngine;
-  
-  #define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)   \
-    inline ty *unwrap(ref P) {                          \
-      return reinterpret_cast<ty*>(P);                  \
-    }                                                   \
-                                                        \
-    inline ref wrap(const ty *P) {                      \
-      return reinterpret_cast<ref>(const_cast<ty*>(P)); \
-    }
-  
-  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue,    LLVMGenericValueRef   )
-  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionEngine, LLVMExecutionEngineRef)
-  
-  #undef DEFINE_SIMPLE_CONVERSION_FUNCTIONS
-}
-  
+}  
 #endif /* defined(__cplusplus) */
 
 #endif
diff --git a/include/llvm-c/Object.h b/include/llvm-c/Object.h
index e2dad62..ecccfee 100644
--- a/include/llvm-c/Object.h
+++ b/include/llvm-c/Object.h
@@ -23,8 +23,6 @@
 #include "llvm/Config/llvm-config.h"
 
 #ifdef __cplusplus
-#include "llvm/Object/ObjectFile.h"
-
 extern "C" {
 #endif
 
@@ -99,50 +97,6 @@ const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI);
 
 #ifdef __cplusplus
 }
-
-namespace llvm {
-  namespace object {
-    inline ObjectFile *unwrap(LLVMObjectFileRef OF) {
-      return reinterpret_cast<ObjectFile*>(OF);
-    }
-
-    inline LLVMObjectFileRef wrap(const ObjectFile *OF) {
-      return reinterpret_cast<LLVMObjectFileRef>(const_cast<ObjectFile*>(OF));
-    }
-
-    inline section_iterator *unwrap(LLVMSectionIteratorRef SI) {
-      return reinterpret_cast<section_iterator*>(SI);
-    }
-
-    inline LLVMSectionIteratorRef
-    wrap(const section_iterator *SI) {
-      return reinterpret_cast<LLVMSectionIteratorRef>
-        (const_cast<section_iterator*>(SI));
-    }
-
-    inline symbol_iterator *unwrap(LLVMSymbolIteratorRef SI) {
-      return reinterpret_cast<symbol_iterator*>(SI);
-    }
-
-    inline LLVMSymbolIteratorRef
-    wrap(const symbol_iterator *SI) {
-      return reinterpret_cast<LLVMSymbolIteratorRef>
-        (const_cast<symbol_iterator*>(SI));
-    }
-
-    inline relocation_iterator *unwrap(LLVMRelocationIteratorRef SI) {
-      return reinterpret_cast<relocation_iterator*>(SI);
-    }
-
-    inline LLVMRelocationIteratorRef
-    wrap(const relocation_iterator *SI) {
-      return reinterpret_cast<LLVMRelocationIteratorRef>
-        (const_cast<relocation_iterator*>(SI));
-    }
-
-  }
-}
-
 #endif /* defined(__cplusplus) */
 
 #endif
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index 57abfa0..80fc3e5 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -235,29 +235,6 @@ void LLVMDisposeTargetData(LLVMTargetDataRef);
 
 #ifdef __cplusplus
 }
-
-namespace llvm {
-  class DataLayout;
-  class TargetLibraryInfo;
-
-  inline DataLayout *unwrap(LLVMTargetDataRef P) {
-    return reinterpret_cast<DataLayout*>(P);
-  }
-  
-  inline LLVMTargetDataRef wrap(const DataLayout *P) {
-    return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
-  }
-
-  inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
-    return reinterpret_cast<TargetLibraryInfo*>(P);
-  }
-
-  inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
-    TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
-    return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
-  }
-}
-
 #endif /* defined(__cplusplus) */
 
 #endif
diff --git a/include/llvm-c/TargetMachine.h b/include/llvm-c/TargetMachine.h
index 691abdf..5e35595 100644
--- a/include/llvm-c/TargetMachine.h
+++ b/include/llvm-c/TargetMachine.h
@@ -25,7 +25,7 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-typedef struct LLVMTargetMachine *LLVMTargetMachineRef;
+typedef struct LLVMOpaqueTargetMachine *LLVMTargetMachineRef;
 typedef struct LLVMTarget *LLVMTargetRef;
 
 typedef enum {
@@ -114,30 +114,11 @@ LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T);
 LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
   char *Filename, LLVMCodeGenFileType codegen, char **ErrorMessage);
 
-
-
-
+/** Compile the LLVM IR stored in \p M and store the result in \p OutMemBuf. */
+LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T, LLVMModuleRef M,
+  LLVMCodeGenFileType codegen, char** ErrorMessage, LLVMMemoryBufferRef *OutMemBuf);
 #ifdef __cplusplus
 }
-
-namespace llvm {
-  class TargetMachine;
-  class Target;
-
-  inline TargetMachine *unwrap(LLVMTargetMachineRef P) {
-    return reinterpret_cast<TargetMachine*>(P);
-  }
-  inline Target *unwrap(LLVMTargetRef P) {
-    return reinterpret_cast<Target*>(P);
-  }
-  inline LLVMTargetMachineRef wrap(const TargetMachine *P) {
-    return reinterpret_cast<LLVMTargetMachineRef>(
-      const_cast<TargetMachine*>(P));
-  }
-  inline LLVMTargetRef wrap(const Target * P) {
-    return reinterpret_cast<LLVMTargetRef>(const_cast<Target*>(P));
-  }
-}
 #endif
 
 #endif
diff --git a/include/llvm-c/Transforms/PassManagerBuilder.h b/include/llvm-c/Transforms/PassManagerBuilder.h
index 82e513d..545f8aa 100644
--- a/include/llvm-c/Transforms/PassManagerBuilder.h
+++ b/include/llvm-c/Transforms/PassManagerBuilder.h
@@ -86,16 +86,6 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
 
 #ifdef __cplusplus
 }
-
-namespace llvm {
-  inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
-    return reinterpret_cast<PassManagerBuilder*>(P);
-  }
-
-  inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
-    return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
-  }
-}
 #endif
 
 #endif
diff --git a/include/llvm-c/Transforms/Vectorize.h b/include/llvm-c/Transforms/Vectorize.h
index 68a9bdd..c9102da 100644
--- a/include/llvm-c/Transforms/Vectorize.h
+++ b/include/llvm-c/Transforms/Vectorize.h
@@ -39,6 +39,9 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM);
 /** See llvm::createLoopVectorizePass function. */
 void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM);
 
+/** See llvm::createSLPVectorizerPass function. */
+void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM);
+
 /**
  * @}
  */
diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h
index c555c1c..d4152ec 100644
--- a/include/llvm/ADT/ArrayRef.h
+++ b/include/llvm/ADT/ArrayRef.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_ADT_ARRAYREF_H
 #define LLVM_ADT_ARRAYREF_H
 
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/SmallVector.h"
 #include <vector>
 
@@ -49,6 +50,9 @@ namespace llvm {
     /// Construct an empty ArrayRef.
     /*implicit*/ ArrayRef() : Data(0), Length(0) {}
 
+    /// Construct an empty ArrayRef from None.
+    /*implicit*/ ArrayRef(NoneType) : Data(0), Length(0) {}
+
     /// Construct an ArrayRef from a single element.
     /*implicit*/ ArrayRef(const T &OneElt)
       : Data(&OneElt), Length(1) {}
@@ -174,9 +178,12 @@ namespace llvm {
   public:
     typedef T *iterator;
 
-    /// Construct an empty ArrayRef.
+    /// Construct an empty MutableArrayRef.
     /*implicit*/ MutableArrayRef() : ArrayRef<T>() {}
 
+    /// Construct an empty MutableArrayRef from None.
+    /*implicit*/ MutableArrayRef(NoneType) : ArrayRef<T>() {}
+
     /// Construct an MutableArrayRef from a single element.
     /*implicit*/ MutableArrayRef(T &OneElt) : ArrayRef<T>(OneElt) {}
 
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index d410619..31fd6d8 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -618,7 +618,7 @@ public:
     unsigned OldNumBuckets = NumBuckets;
     BucketT *OldBuckets = Buckets;
 
-    allocateBuckets(std::max<unsigned>(64, NextPowerOf2(AtLeast-1)));
+    allocateBuckets(std::max<unsigned>(64, static_cast<unsigned>(NextPowerOf2(AtLeast-1))));
     assert(Buckets);
     if (!OldBuckets) {
       this->BaseT::initEmpty();
diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h
index cda31a2..e434417 100644
--- a/include/llvm/ADT/Hashing.h
+++ b/include/llvm/ADT/Hashing.h
@@ -151,7 +151,7 @@ namespace detail {
 inline uint64_t fetch64(const char *p) {
   uint64_t result;
   memcpy(&result, p, sizeof(result));
-  if (sys::isBigEndianHost())
+  if (sys::IsBigEndianHost)
     return sys::SwapByteOrder(result);
   return result;
 }
@@ -159,7 +159,7 @@ inline uint64_t fetch64(const char *p) {
 inline uint32_t fetch32(const char *p) {
   uint32_t result;
   memcpy(&result, p, sizeof(result));
-  if (sys::isBigEndianHost())
+  if (sys::IsBigEndianHost)
     return sys::SwapByteOrder(result);
   return result;
 }
diff --git a/include/llvm/ADT/InMemoryStruct.h b/include/llvm/ADT/InMemoryStruct.h
deleted file mode 100644
index a560845..0000000
--- a/include/llvm/ADT/InMemoryStruct.h
+++ /dev/null
@@ -1,77 +0,0 @@
-//===- InMemoryStruct.h - Indirect Struct Access Smart Pointer --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ADT_INMEMORYSTRUCT_H
-#define LLVM_ADT_INMEMORYSTRUCT_H
-
-#include <cassert>
-
-namespace llvm {
-
-/// \brief Helper object for abstracting access to an in-memory structure which
-/// may require some kind of temporary storage.
-///
-/// This class is designed to be used for accessing file data structures which
-/// in the common case can be accessed from a direct pointer to a memory mapped
-/// object, but which in some cases may require indirect access to a temporary
-/// structure (which, for example, may have undergone endianness translation).
-template<typename T>
-class InMemoryStruct {
-  typedef T value_type;
-  typedef value_type &reference;
-  typedef value_type *pointer;
-  typedef const value_type &const_reference;
-  typedef const value_type *const_pointer;
-
-  /// \brief The smart pointer target.
-  value_type *Target;
-
-  /// \brief A temporary object which can be used as a target of the smart
-  /// pointer.
-  value_type Contents;
-
-private:
-
-public:
-  InMemoryStruct() : Target(0) {}
-  InMemoryStruct(reference Value) : Target(&Contents), Contents(Value) {}
-  InMemoryStruct(pointer Value) : Target(Value) {}
-  InMemoryStruct(const InMemoryStruct<T> &Value) { *this = Value; }
-  
-  void operator=(const InMemoryStruct<T> &Value) {
-    if (Value.Target != &Value.Contents) {
-      Target = Value.Target;
-    } else {
-      Target = &Contents;
-      Contents = Value.Contents;
-    }
-  }
-  
-  const_reference operator*() const {
-    assert(Target && "Cannot dereference null pointer");
-    return *Target;
-  }
-  reference operator*() {
-    assert(Target && "Cannot dereference null pointer");
-    return *Target;
-  }
-
-  const_pointer operator->() const {
-    return Target;
-  }
-  pointer operator->() {
-    return Target;
-  }
-
-  operator bool() const { return Target != 0; }
-};
-
-}
-
-#endif
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index cce2efb..0299a83 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -29,7 +29,7 @@ struct DenseMapInfo;
 /// on the number of bits available according to PointerLikeTypeTraits for the
 /// type.
 ///
-/// Note that PointerIntPair always puts the Int part in the highest bits
+/// Note that PointerIntPair always puts the IntVal part in the highest bits
 /// possible.  For example, PointerIntPair<void*, 1, bool> will put the bit for
 /// the bool into bit #2, not bit #0, which allows the low two bits to be used
 /// for something else.  For example, this allows:
@@ -57,13 +57,13 @@ class PointerIntPair {
   };
 public:
   PointerIntPair() : Value(0) {}
-  PointerIntPair(PointerTy Ptr, IntType Int) {
+  PointerIntPair(PointerTy PtrVal, IntType IntVal) {
     assert(IntBits <= PtrTraits::NumLowBitsAvailable &&
            "PointerIntPair formed with integer size too large for pointer");
-    setPointerAndInt(Ptr, Int);
+    setPointerAndInt(PtrVal, IntVal);
   }
-  explicit PointerIntPair(PointerTy Ptr) {
-    initWithPointer(Ptr);
+  explicit PointerIntPair(PointerTy PtrVal) {
+    initWithPointer(PtrVal);
   }
 
   PointerTy getPointer() const {
@@ -75,41 +75,41 @@ public:
     return (IntType)((Value >> IntShift) & IntMask);
   }
 
-  void setPointer(PointerTy Ptr) {
-    intptr_t PtrVal
-      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
-    assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
+  void setPointer(PointerTy PtrVal) {
+    intptr_t PtrWord
+      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(PtrVal));
+    assert((PtrWord & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
            "Pointer is not sufficiently aligned");
     // Preserve all low bits, just update the pointer.
-    Value = PtrVal | (Value & ~PointerBitMask);
+    Value = PtrWord | (Value & ~PointerBitMask);
   }
 
-  void setInt(IntType Int) {
-    intptr_t IntVal = Int;
-    assert(IntVal < (1 << IntBits) && "Integer too large for field");
+  void setInt(IntType IntVal) {
+    intptr_t IntWord = static_cast<intptr_t>(IntVal);
+    assert(IntWord < (1 << IntBits) && "Integer too large for field");
     
     // Preserve all bits other than the ones we are updating.
     Value &= ~ShiftedIntMask;     // Remove integer field.
-    Value |= IntVal << IntShift;  // Set new integer.
+    Value |= IntWord << IntShift;  // Set new integer.
   }
 
-  void initWithPointer(PointerTy Ptr) {
-    intptr_t PtrVal
-      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
-    assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
+  void initWithPointer(PointerTy PtrVal) {
+    intptr_t PtrWord
+      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(PtrVal));
+    assert((PtrWord & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
            "Pointer is not sufficiently aligned");
-    Value = PtrVal;
+    Value = PtrWord;
   }
 
-  void setPointerAndInt(PointerTy Ptr, IntType Int) {
-    intptr_t PtrVal
-      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
-    assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
+  void setPointerAndInt(PointerTy PtrVal, IntType IntVal) {
+    intptr_t PtrWord
+      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(PtrVal));
+    assert((PtrWord & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
            "Pointer is not sufficiently aligned");
-    intptr_t IntVal = Int;
-    assert(IntVal < (1 << IntBits) && "Integer too large for field");
+    intptr_t IntWord = static_cast<intptr_t>(IntVal);
+    assert(IntWord < (1 << IntBits) && "Integer too large for field");
 
-    Value = PtrVal | (IntVal << IntShift);
+    Value = PtrWord | (IntWord << IntShift);
   }
 
   PointerTy const *getAddrOfPointer() const {
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 224855e..d013d05 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -260,7 +260,7 @@ namespace llvm {
 
     /// Find the first character in the string that is \p C, or npos if not
     /// found. Same as find.
-    size_type find_first_of(char C, size_t From = 0) const {
+    size_t find_first_of(char C, size_t From = 0) const {
       return find(C, From);
     }
 
@@ -268,21 +268,21 @@ namespace llvm {
     /// not found.
     ///
     /// Complexity: O(size() + Chars.size())
-    size_type find_first_of(StringRef Chars, size_t From = 0) const;
+    size_t find_first_of(StringRef Chars, size_t From = 0) const;
 
     /// Find the first character in the string that is not \p C or npos if not
     /// found.
-    size_type find_first_not_of(char C, size_t From = 0) const;
+    size_t find_first_not_of(char C, size_t From = 0) const;
 
     /// Find the first character in the string that is not in the string
     /// \p Chars, or npos if not found.
     ///
     /// Complexity: O(size() + Chars.size())
-    size_type find_first_not_of(StringRef Chars, size_t From = 0) const;
+    size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
 
     /// Find the last character in the string that is \p C, or npos if not
     /// found.
-    size_type find_last_of(char C, size_t From = npos) const {
+    size_t find_last_of(char C, size_t From = npos) const {
       return rfind(C, From);
     }
 
@@ -290,17 +290,17 @@ namespace llvm {
     /// found.
     ///
     /// Complexity: O(size() + Chars.size())
-    size_type find_last_of(StringRef Chars, size_t From = npos) const;
+    size_t find_last_of(StringRef Chars, size_t From = npos) const;
 
     /// Find the last character in the string that is not \p C, or npos if not
     /// found.
-    size_type find_last_not_of(char C, size_t From = npos) const;
+    size_t find_last_not_of(char C, size_t From = npos) const;
 
     /// Find the last character in the string that is not in \p Chars, or
     /// npos if not found.
     ///
     /// Complexity: O(size() + Chars.size())
-    size_type find_last_not_of(StringRef Chars, size_t From = npos) const;
+    size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
 
     /// @}
     /// @name Helpful Algorithms
@@ -390,14 +390,14 @@ namespace llvm {
 
     /// Return a StringRef equal to 'this' but with the first \p N elements
     /// dropped.
-    StringRef drop_front(unsigned N = 1) const {
+    StringRef drop_front(size_t N = 1) const {
       assert(size() >= N && "Dropping more elements than exist");
       return substr(N);
     }
 
     /// Return a StringRef equal to 'this' but with the last \p N elements
     /// dropped.
-    StringRef drop_back(unsigned N = 1) const {
+    StringRef drop_back(size_t N = 1) const {
       assert(size() >= N && "Dropping more elements than exist");
       return substr(0, size()-N);
     }
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 8fac222..3a72e87 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -43,7 +43,7 @@ public:
   enum ArchType {
     UnknownArch,
 
-    arm,     // ARM; arm, armv.*, xscale
+    arm,     // ARM: arm, armv.*, xscale
     aarch64, // AArch64: aarch64
     hexagon, // Hexagon: hexagon
     mips,    // MIPS: mips, mipsallegrex
@@ -56,6 +56,7 @@ public:
     r600,    // R600: AMD GPUs HD2XXX - HD6XXX
     sparc,   // Sparc: sparc
     sparcv9, // Sparcv9: Sparcv9
+    systemz, // SystemZ: s390x
     tce,     // TCE (http://tce.cs.tut.fi/): tce
     thumb,   // Thumb: thumb, thumbv.*
     x86,     // X86: i[3-9]86
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
index cc290d5..e16c6b4 100644
--- a/include/llvm/ADT/Twine.h
+++ b/include/llvm/ADT/Twine.h
@@ -236,7 +236,7 @@ namespace llvm {
     /// getLHSKind - Get the NodeKind of the left-hand side.
     NodeKind getLHSKind() const { return (NodeKind) LHSKind; }
 
-    /// getRHSKind - Get the NodeKind of the left-hand side.
+    /// getRHSKind - Get the NodeKind of the right-hand side.
     NodeKind getRHSKind() const { return (NodeKind) RHSKind; }
 
     /// printOneChild - Print one child from a twine.
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index 63262eb..4883383 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -146,14 +146,6 @@ static inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) {
 bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
                    const TargetLibraryInfo *TLI, bool RoundToAlign = false);
 
-/// \brief Compute the size of the underlying object pointed by Ptr. Returns
-/// true and the object size in Size if successful, and false otherwise.
-/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
-/// byval arguments, and global variables.
-bool getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size,
-                             const DataLayout *TD, const TargetLibraryInfo *TLI,
-                             bool RoundToAlign = false);
-
 
 
 typedef std::pair<APInt, APInt> SizeOffsetType;
@@ -163,14 +155,12 @@ typedef std::pair<APInt, APInt> SizeOffsetType;
 class ObjectSizeOffsetVisitor
   : public InstVisitor<ObjectSizeOffsetVisitor, SizeOffsetType> {
 
-  typedef DenseMap<const Value*, SizeOffsetType> CacheMapTy;
-
   const DataLayout *TD;
   const TargetLibraryInfo *TLI;
   bool RoundToAlign;
   unsigned IntTyBits;
   APInt Zero;
-  CacheMapTy CacheMap;
+  SmallPtrSet<Instruction *, 8> SeenInsts;
 
   APInt align(APInt Size, uint64_t Align);
 
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index 69cc293..e873195 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -266,6 +266,24 @@ public:
   /// @param BB  The new exit basic block of the region.
   void replaceExit(BasicBlock *BB);
 
+  /// @brief Recursively replace the entry basic block of the region.
+  ///
+  /// This function replaces the entry basic block with a new basic block. It
+  /// also updates all child regions that have the same entry basic block as
+  /// this region.
+  ///
+  /// @param NewEntry The new entry basic block.
+  void replaceEntryRecursive(BasicBlock *NewEntry);
+
+  /// @brief Recursively replace the exit basic block of the region.
+  ///
+  /// This function replaces the exit basic block with a new basic block. It
+  /// also updates all child regions that have the same exit basic block as
+  /// this region.
+  ///
+  /// @param NewExit The new exit basic block.
+  void replaceExitRecursive(BasicBlock *NewExit);
+
   /// @brief Get the exit BasicBlock of the Region.
   /// @return The exit BasicBlock of the Region, NULL if this is the TopLevel
   ///         Region.
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 306549f..349447f 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -453,7 +453,8 @@ namespace llvm {
     ExitLimit ComputeExitLimitFromCond(const Loop *L,
                                        Value *ExitCond,
                                        BasicBlock *TBB,
-                                       BasicBlock *FBB);
+                                       BasicBlock *FBB,
+                                       bool IsSubExpr);
 
     /// ComputeExitLimitFromICmp - Compute the number of times the backedge of
     /// the specified loop will execute if its exit condition were a conditional
@@ -461,7 +462,8 @@ namespace llvm {
     ExitLimit ComputeExitLimitFromICmp(const Loop *L,
                                        ICmpInst *ExitCond,
                                        BasicBlock *TBB,
-                                       BasicBlock *FBB);
+                                       BasicBlock *FBB,
+                                       bool IsSubExpr);
 
     /// ComputeLoadConstantCompareExitLimit - Given an exit condition
     /// of 'icmp op load X, cst', try to see if we can compute the
@@ -483,7 +485,7 @@ namespace llvm {
     /// HowFarToZero - Return the number of times an exit condition comparing
     /// the specified value to zero will execute.  If not computable, return
     /// CouldNotCompute.
-    ExitLimit HowFarToZero(const SCEV *V, const Loop *L);
+    ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr);
 
     /// HowFarToNonZero - Return the number of times an exit condition checking
     /// the specified value for nonzero will execute.  If not computable, return
@@ -495,7 +497,7 @@ namespace llvm {
     /// computable, return CouldNotCompute. isSigned specifies whether the
     /// less-than is signed.
     ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
-                               const Loop *L, bool isSigned);
+                               const Loop *L, bool isSigned, bool IsSubExpr);
 
     /// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
     /// (which may not be an immediate predecessor) which has exactly one
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index e0a6e3f..c2fd6ce 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -25,6 +25,7 @@ namespace llvm {
   class BlockAddress;
   class GCStrategy;
   class Constant;
+  class ConstantArray;
   class GCMetadataPrinter;
   class GlobalValue;
   class GlobalVariable;
@@ -134,6 +135,9 @@ namespace llvm {
     /// getDataLayout - Return information about data layout.
     const DataLayout &getDataLayout() const;
 
+    /// getTargetTriple - Return the target triple string.
+    StringRef getTargetTriple() const;
+
     /// getCurrentSection() - Return the current section we are emitting to.
     const MCSection *getCurrentSection() const;
 
@@ -480,7 +484,7 @@ namespace llvm {
     void EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
                             const MachineBasicBlock *MBB,
                             unsigned uid) const;
-    void EmitLLVMUsedList(const Constant *List);
+    void EmitLLVMUsedList(const ConstantArray *InitList);
     void EmitXXStructorList(const Constant *List, bool isCtor);
     GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy *C);
   };
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index c035e07..fa9d60f 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -163,8 +163,56 @@ private:
 
   unsigned StackOffset;
   SmallVector<uint32_t, 16> UsedRegs;
-  unsigned FirstByValReg;
-  bool FirstByValRegValid;
+
+  // ByValInfo and SmallVector<ByValInfo, 4> ByValRegs:
+  //
+  // Vector of ByValInfo instances (ByValRegs) is introduced for byval registers
+  // tracking.
+  // Or, in another words it tracks byval parameters that are stored in
+  // general purpose registers.
+  //
+  // For 4 byte stack alignment,
+  // instance index means byval parameter number in formal
+  // arguments set. Assume, we have some "struct_type" with size = 4 bytes,
+  // then, for function "foo":
+  //
+  // i32 foo(i32 %p, %struct_type* %r, i32 %s, %struct_type* %t)
+  //
+  // ByValRegs[0] describes how "%r" is stored (Begin == r1, End == r2)
+  // ByValRegs[1] describes how "%t" is stored (Begin == r3, End == r4).
+  //
+  // In case of 8 bytes stack alignment,
+  // ByValRegs may also contain information about wasted registers.
+  // In function shown above, r3 would be wasted according to AAPCS rules.
+  // And in that case ByValRegs[1].Waste would be "true".
+  // ByValRegs vector size still would be 2,
+  // while "%t" goes to the stack: it wouldn't be described in ByValRegs.
+  //
+  // Supposed use-case for this collection:
+  // 1. Initially ByValRegs is empty, InRegsParamsProceed is 0.
+  // 2. HandleByVal fillups ByValRegs.
+  // 3. Argument analysis (LowerFormatArguments, for example). After
+  // some byval argument was analyzed, InRegsParamsProceed is increased.
+  struct ByValInfo {
+    ByValInfo(unsigned B, unsigned E, bool IsWaste = false) :
+      Begin(B), End(E), Waste(IsWaste) {}
+    // First register allocated for current parameter.
+    unsigned Begin;
+
+    // First after last register allocated for current parameter.
+    unsigned End;
+
+    // Means that current range of registers doesn't belong to any
+    // parameters. It was wasted due to stack alignment rules.
+    // For more information see:
+    // AAPCS, 5.5 Parameter Passing, Stage C, C.3.
+    bool Waste;
+  };
+  SmallVector<ByValInfo, 4 > ByValRegs;
+
+  // InRegsParamsProceed - shows how many instances of ByValRegs was proceed
+  // during argument analysis.
+  unsigned InRegsParamsProceed;
 
 protected:
   ParmContext CallOrPrologue;
@@ -306,12 +354,45 @@ public:
                    MVT LocVT, CCValAssign::LocInfo LocInfo,
                    int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
 
-  // First GPR that carries part of a byval aggregate that's split
-  // between registers and memory.
-  unsigned getFirstByValReg() const { return FirstByValRegValid ? FirstByValReg : 0; }
-  void setFirstByValReg(unsigned r) { FirstByValReg = r; FirstByValRegValid = true; }
-  void clearFirstByValReg() { FirstByValReg = 0; FirstByValRegValid = false; }
-  bool isFirstByValRegValid() const { return FirstByValRegValid; }
+  // Returns count of byval arguments that are to be stored (even partly)
+  // in registers.
+  unsigned getInRegsParamsCount() const { return ByValRegs.size(); }
+
+  // Returns count of byval in-regs arguments proceed.
+  unsigned getInRegsParamsProceed() const { return InRegsParamsProceed; }
+
+  // Get information about N-th byval parameter that is stored in registers.
+  // Here "ByValParamIndex" is N.
+  void getInRegsParamInfo(unsigned InRegsParamRecordIndex,
+                          unsigned& BeginReg, unsigned& EndReg) const {
+    assert(InRegsParamRecordIndex < ByValRegs.size() &&
+           "Wrong ByVal parameter index");
+
+    const ByValInfo& info = ByValRegs[InRegsParamRecordIndex];
+    BeginReg = info.Begin;
+    EndReg = info.End;
+  }
+
+  // Add information about parameter that is kept in registers.
+  void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd) {
+    ByValRegs.push_back(ByValInfo(RegBegin, RegEnd));
+  }
+
+  // Goes either to next byval parameter (excluding "waste" record), or
+  // to the end of collection.
+  // Returns false, if end is reached.
+  bool nextInRegsParam() {
+    unsigned e = ByValRegs.size();
+    if (InRegsParamsProceed < e)
+      ++InRegsParamsProceed;
+    return InRegsParamsProceed < e;
+  }
+
+  // Clear byval registers tracking info.
+  void clearByValRegsInfo() {
+    InRegsParamsProceed = 0;
+    ByValRegs.clear();
+  }
 
   ParmContext getCallOrPrologue() const { return CallOrPrologue; }
 
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 705db7e..471e9bf 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -123,12 +123,28 @@ public:
   /// index value.
   std::pair<unsigned, bool> getRegForGEPIndex(const Value *V);
 
-  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// \brief We're checking to see if we can fold \p LI into \p FoldInst.
+  /// Note that we could have a sequence where multiple LLVM IR instructions
+  /// are folded into the same machineinstr.  For example we could have:
+  ///   A: x = load i32 *P
+  ///   B: y = icmp A, 42
+  ///   C: br y, ...
+  ///
+  /// In this scenario, \p LI is "A", and \p FoldInst is "C".  We know
+  /// about "B" (and any other folded instructions) because it is between
+  /// A and C.
+  ///
+  /// If we succeed folding, return true.
+  ///
+  bool tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst);
+
+  /// \brief The specified machine instr operand is a vreg, and that
   /// vreg is being provided by the specified load instruction.  If possible,
   /// try to fold the load as an operand to the instruction, returning true if
   /// possible.
-  virtual bool TryToFoldLoad(MachineInstr * /*MI*/, unsigned /*OpNo*/,
-                             const LoadInst * /*LI*/) {
+  /// This method should be implemented by targets.
+  virtual bool tryToFoldLoadIntoMI(MachineInstr * /*MI*/, unsigned /*OpNo*/,
+                                   const LoadInst * /*LI*/) {
     return false;
   }
 
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 442729b..0fd211b 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -602,14 +602,6 @@ namespace ISD {
     /// specifier.
     PREFETCH,
 
-    /// OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load,
-    ///                       store-store, device)
-    /// This corresponds to the memory.barrier intrinsic.
-    /// it takes an input chain, 4 operands to specify the type of barrier, an
-    /// operand specifying if the barrier applies to device and uncached memory
-    /// and produces an output chain.
-    MEMBARRIER,
-
     /// OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope)
     /// This corresponds to the fence instruction. It takes an input chain, and
     /// two integer constants: an AtomicOrdering and a SynchronizationScope.
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 244be9c..cb09a49 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -399,6 +399,15 @@ namespace llvm {
       return r != end() && r->containsRange(Start, End);
     }
 
+    /// True iff this live range is a single segment that lies between the
+    /// specified boundaries, exclusively. Vregs live across a backedge are not
+    /// considered local. The boundaries are expected to lie within an extended
+    /// basic block, so vregs that are not live out should contain no holes.
+    bool isLocal(SlotIndex Start, SlotIndex End) const {
+      return beginIndex() > Start.getBaseIndex() &&
+        endIndex() < End.getBoundaryIndex();
+    }
+
     /// removeRange - Remove the specified range from this interval.  Note that
     /// the range must be a single LiveRange in its entirety.
     void removeRange(SlotIndex Start, SlotIndex End,
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
index 8a32a3c..e59276f 100644
--- a/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -196,8 +196,7 @@ public:
   /// allocator.  These registers should not be split into new intervals
   /// as currently those new intervals are not guaranteed to spill.
   void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
-                         ArrayRef<unsigned> RegsBeingSpilled 
-                          = ArrayRef<unsigned>());
+                         ArrayRef<unsigned> RegsBeingSpilled = None);
 
   /// calculateRegClassAndHint - Recompute register class and hint for each new
   /// register.
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 492a3ff..0f2f874 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -71,7 +71,6 @@ class MachineBasicBlock : public ilist_node<MachineBasicBlock> {
   std::vector<MachineBasicBlock *> Predecessors;
   std::vector<MachineBasicBlock *> Successors;
 
-
   /// Weights - Keep track of the weights to the successors. This vector
   /// has the same order as Successors, or it is empty if we don't use it
   /// (disable optimization).
@@ -96,6 +95,10 @@ class MachineBasicBlock : public ilist_node<MachineBasicBlock> {
   /// target of an indirect branch.
   bool AddressTaken;
 
+  /// \brief since getSymbol is a relatively heavy-weight operation, the symbol
+  /// is only computed once and is cached.
+  mutable MCSymbol *CachedMCSymbol;
+
   // Intrusive list support
   MachineBasicBlock() {}
 
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 82c4cd6..c886e25 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -352,8 +352,8 @@ public:
   // Internal functions used to automatically number MachineBasicBlocks
   //
 
-  /// getNextMBBNumber - Returns the next unique number to be assigned
-  /// to a MachineBasicBlock in this MachineFunction.
+  /// \brief Adds the MBB to the internal numbering. Returns the unique number
+  /// assigned to the MBB.
   ///
   unsigned addToMBBNumbering(MachineBasicBlock *MBB) {
     MBBNumbering.push_back(MBB);
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index ddb1271..00a55b5 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -34,22 +34,22 @@ struct MachinePointerInfo {
   /// If this is null, then the access is to a pointer in the default address
   /// space.
   const Value *V;
-  
+
   /// Offset - This is an offset from the base Value*.
   int64_t Offset;
-  
+
   explicit MachinePointerInfo(const Value *v = 0, int64_t offset = 0)
     : V(v), Offset(offset) {}
-  
+
   MachinePointerInfo getWithOffset(int64_t O) const {
     if (V == 0) return MachinePointerInfo(0, 0);
     return MachinePointerInfo(V, Offset+O);
   }
-  
+
   /// getAddrSpace - Return the LLVM IR address space number that this pointer
   /// points into.
   unsigned getAddrSpace() const;
-  
+
   /// getConstantPool - Return a MachinePointerInfo record that refers to the
   /// constant pool.
   static MachinePointerInfo getConstantPool();
@@ -57,20 +57,20 @@ struct MachinePointerInfo {
   /// getFixedStack - Return a MachinePointerInfo record that refers to the
   /// the specified FrameIndex.
   static MachinePointerInfo getFixedStack(int FI, int64_t offset = 0);
-  
+
   /// getJumpTable - Return a MachinePointerInfo record that refers to a
   /// jump table entry.
   static MachinePointerInfo getJumpTable();
-  
+
   /// getGOT - Return a MachinePointerInfo record that refers to a
   /// GOT entry.
   static MachinePointerInfo getGOT();
-  
+
   /// getStack - stack pointer relative access.
   static MachinePointerInfo getStack(int64_t Offset);
 };
-  
-  
+
+
 //===----------------------------------------------------------------------===//
 /// MachineMemOperand - A description of a memory reference used in the backend.
 /// Instead of holding a StoreInst or LoadInst, this class holds the address
@@ -99,8 +99,11 @@ public:
     MONonTemporal = 8,
     /// The memory access is invariant.
     MOInvariant = 16,
+    // Target hints allow target passes to annotate memory operations.
+    MOTargetStartBit = 5,
+    MOTargetNumBits = 3,
     // This is the number of bits we need to represent flags.
-    MOMaxBits = 5
+    MOMaxBits = 8
   };
 
   /// MachineMemOperand - Construct an MachineMemOperand object with the
@@ -110,7 +113,7 @@ public:
                     const MDNode *Ranges = 0);
 
   const MachinePointerInfo &getPointerInfo() const { return PtrInfo; }
-  
+
   /// getValue - Return the base address of the memory access. This may either
   /// be a normal LLVM IR Value, or one of the special values used in CodeGen.
   /// Special values are those obtained via
@@ -123,6 +126,9 @@ public:
   /// getFlags - Return the raw flags of the source value, \see MemOperandFlags.
   unsigned int getFlags() const { return Flags & ((1 << MOMaxBits) - 1); }
 
+  /// Bitwise OR the current flags with the given flags.
+  void setFlags(unsigned f) { Flags |= (f & ((1 << MOMaxBits) - 1)); }
+
   /// getOffset - For normal values, this is a byte offset added to the base
   /// address. For PseudoSourceValue::FPRel values, this is the FrameIndex
   /// number.
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 4b43cc1..24ba7bb 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -157,6 +157,12 @@ public:
   // Strictly for use by MachineInstr.cpp.
   void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps);
 
+  /// Verify the sanity of the use list for Reg.
+  void verifyUseList(unsigned Reg) const;
+
+  /// Verify the use list of all registers.
+  void verifyUseLists() const;
+
   /// reg_begin/reg_end - Provide iteration support to walk over all definitions
   /// and uses of a register within the MachineFunction that corresponds to this
   /// MachineRegisterInfo object.
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 57febe7..769e4b4 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -274,6 +274,10 @@ public:
     Mutations.push_back(Mutation);
   }
 
+  /// \brief True if an edge can be added from PredSU to SuccSU without creating
+  /// a cycle.
+  bool canAddEdge(SUnit *SuccSU, SUnit *PredSU);
+
   /// \brief Add a DAG edge to the given SU with the given predecessor
   /// dependence data.
   ///
@@ -297,6 +301,10 @@ public:
   /// reorderable instructions.
   virtual void schedule();
 
+  /// Change the position of an instruction within the basic block and update
+  /// live ranges and region boundary iterators.
+  void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
+
   /// Get current register pressure for the top scheduled instructions.
   const IntervalPressure &getTopPressure() const { return TopPressure; }
   const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
@@ -362,7 +370,6 @@ protected:
 
   void updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure);
 
-  void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
   bool checkSchedLimit();
 
   void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
diff --git a/include/llvm/CodeGen/MachineTraceMetrics.h b/include/llvm/CodeGen/MachineTraceMetrics.h
index 2775a04..9794707 100644
--- a/include/llvm/CodeGen/MachineTraceMetrics.h
+++ b/include/llvm/CodeGen/MachineTraceMetrics.h
@@ -260,9 +260,12 @@ public:
     /// independent, exposing the maximum instruction-level parallelism.
     ///
     /// Any blocks in Extrablocks are included as if they were part of the
-    /// trace.
-    unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks =
-                               ArrayRef<const MachineBasicBlock*>()) const;
+    /// trace. Likewise, extra resources required by the specified scheduling
+    /// classes are included. For the caller to account for extra machine
+    /// instructions, it must first resolve each instruction's scheduling class.
+    unsigned getResourceLength(
+                ArrayRef<const MachineBasicBlock*> Extrablocks = None,
+                ArrayRef<const MCSchedClassDesc*> ExtraInstrs = None) const;
 
     /// Return the length of the (data dependency) critical path through the
     /// trace.
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index fc8aa75..b02f63e 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -35,6 +35,48 @@ namespace llvm {
 
 class PassConfigImpl;
 
+/// Discriminated union of Pass ID types.
+///
+/// The PassConfig API prefers dealing with IDs because they are safer and more
+/// efficient. IDs decouple configuration from instantiation. This way, when a
+/// pass is overriden, it isn't unnecessarily instantiated. It is also unsafe to
+/// refer to a Pass pointer after adding it to a pass manager, which deletes
+/// redundant pass instances.
+///
+/// However, it is convient to directly instantiate target passes with
+/// non-default ctors. These often don't have a registered PassInfo. Rather than
+/// force all target passes to implement the pass registry boilerplate, allow
+/// the PassConfig API to handle either type.
+///
+/// AnalysisID is sadly char*, so PointerIntPair won't work.
+class IdentifyingPassPtr {
+  union {
+    AnalysisID ID;
+    Pass *P;
+  };
+  bool IsInstance;
+public:
+  IdentifyingPassPtr() : P(0), IsInstance(false) {}
+  IdentifyingPassPtr(AnalysisID IDPtr) : ID(IDPtr), IsInstance(false) {}
+  IdentifyingPassPtr(Pass *InstancePtr) : P(InstancePtr), IsInstance(true) {}
+
+  bool isValid() const { return P; }
+  bool isInstance() const { return IsInstance; }
+
+  AnalysisID getID() const {
+    assert(!IsInstance && "Not a Pass ID");
+    return ID;
+  }
+  Pass *getInstance() const {
+    assert(IsInstance && "Not a Pass Instance");
+    return P;
+  }
+};
+
+template <> struct isPodLike<IdentifyingPassPtr> {
+  static const bool value = true;
+};
+
 /// Target-Independent Code Generator Pass Configuration Options.
 ///
 /// This is an ImmutablePass solely for the purpose of exposing CodeGen options
@@ -117,20 +159,22 @@ public:
   /// Allow the target to override a specific pass without overriding the pass
   /// pipeline. When passes are added to the standard pipeline at the
   /// point where StandardID is expected, add TargetID in its place.
-  void substitutePass(AnalysisID StandardID, AnalysisID TargetID);
+  void substitutePass(AnalysisID StandardID, IdentifyingPassPtr TargetID);
 
   /// Insert InsertedPassID pass after TargetPassID pass.
-  void insertPass(AnalysisID TargetPassID, AnalysisID InsertedPassID);
+  void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID);
 
   /// Allow the target to enable a specific standard pass by default.
   void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); }
 
   /// Allow the target to disable a specific standard pass by default.
-  void disablePass(AnalysisID PassID) { substitutePass(PassID, 0); }
+  void disablePass(AnalysisID PassID) {
+    substitutePass(PassID, IdentifyingPassPtr());
+  }
 
   /// Return the pass substituted for StandardID by the target.
   /// If no substitution exists, return StandardID.
-  AnalysisID getPassSubstitution(AnalysisID StandardID) const;
+  IdentifyingPassPtr getPassSubstitution(AnalysisID StandardID) const;
 
   /// Return true if the optimized regalloc pipeline is enabled.
   bool getOptimizeRegAlloc() const;
@@ -222,17 +266,6 @@ protected:
     return false;
   }
 
-  /// addFinalizeRegAlloc - This method may be implemented by targets that want
-  /// to run passes within the regalloc pipeline, immediately after the register
-  /// allocation pass itself. These passes run as soon as virtual regisiters
-  /// have been rewritten to physical registers but before and other postRA
-  /// optimization happens. Targets that have marked instructions for bundling
-  /// must have finalized those bundles by the time these passes have run,
-  /// because subsequent passes are not guaranteed to be bundle-aware.
-  virtual bool addFinalizeRegAlloc() {
-    return false;
-  }
-
   /// addPostRegAlloc - This method may be implemented by targets that want to
   /// run passes after register allocation pass pipeline but before
   /// prolog-epilog insertion.  This should return true if -print-machineinstrs
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index b617c14..8b8e3d9 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -29,6 +29,7 @@ namespace llvm {
   class MachineFunction;
   class MachineLoopInfo;
   class TargetRegisterInfo;
+  template<class T> class OwningPtr;
 
   /// This class wraps up a PBQP instance representing a register allocation
   /// problem, plus the structures necessary to map back from the PBQP solution
@@ -123,11 +124,9 @@ namespace llvm {
 
     /// Build a PBQP instance to represent the register allocation problem for
     /// the given MachineFunction.
-    virtual std::auto_ptr<PBQPRAProblem> build(
-                                              MachineFunction *mf,
-                                              const LiveIntervals *lis,
-                                              const MachineLoopInfo *loopInfo,
-                                              const RegSet &vregs);
+    virtual PBQPRAProblem *build(MachineFunction *mf, const LiveIntervals *lis,
+                                 const MachineLoopInfo *loopInfo,
+                                 const RegSet &vregs);
   private:
 
     void addSpillCosts(PBQP::Vector &costVec, PBQP::PBQPNum spillCost);
@@ -144,11 +143,9 @@ namespace llvm {
  
     /// Build a PBQP instance to represent the register allocation problem for
     /// the given MachineFunction.
-    virtual std::auto_ptr<PBQPRAProblem> build(
-                                              MachineFunction *mf,
-                                              const LiveIntervals *lis,
-                                              const MachineLoopInfo *loopInfo,
-                                              const RegSet &vregs);   
+    virtual PBQPRAProblem *build(MachineFunction *mf, const LiveIntervals *lis,
+                                 const MachineLoopInfo *loopInfo,
+                                 const RegSet &vregs);   
 
   private:
 
@@ -161,7 +158,7 @@ namespace llvm {
                             PBQP::PBQPNum benefit);
   };
 
-  FunctionPass* createPBQPRegisterAllocator(std::auto_ptr<PBQPBuilder> builder,
+  FunctionPass* createPBQPRegisterAllocator(OwningPtr<PBQPBuilder> &builder,
                                             char *customPassID=0);
 }
 
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 8c959da..7cff27e 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -302,6 +302,7 @@ namespace llvm {
     bool isCallOp         : 1;          // Is a function call operand.
     bool isTwoAddress     : 1;          // Is a two-address instruction.
     bool isCommutable     : 1;          // Is a commutable instruction.
+    bool hasPhysRegUses   : 1;          // Has physreg uses.
     bool hasPhysRegDefs   : 1;          // Has physreg defs that are being used.
     bool hasPhysRegClobbers : 1;        // Has any physreg defs, used or not.
     bool isPending        : 1;          // True once pending.
@@ -331,10 +332,10 @@ namespace llvm {
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
         Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
-        isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
-        hasPhysRegClobbers(false), isPending(false), isAvailable(false),
-        isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
-        isCloned(false), SchedulingPref(Sched::None),
+        isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
+        hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
+        isAvailable(false), isScheduled(false), isScheduleHigh(false),
+        isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
 
@@ -345,10 +346,10 @@ namespace llvm {
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
         Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
-        isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
-        hasPhysRegClobbers(false), isPending(false), isAvailable(false),
-        isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
-        isCloned(false), SchedulingPref(Sched::None),
+        isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
+        hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
+        isAvailable(false), isScheduled(false), isScheduleHigh(false),
+        isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
 
@@ -358,10 +359,10 @@ namespace llvm {
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), WeakPredsLeft(0), WeakSuccsLeft(0), NumRegDefsLeft(0),
         Latency(0), isVRegCycle(false), isCall(false), isCallOp(false),
-        isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
-        hasPhysRegClobbers(false), isPending(false), isAvailable(false),
-        isScheduled(false), isScheduleHigh(false), isScheduleLow(false),
-        isCloned(false), SchedulingPref(Sched::None),
+        isTwoAddress(false), isCommutable(false), hasPhysRegUses(false),
+        hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false),
+        isAvailable(false), isScheduled(false), isScheduleHigh(false),
+        isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None),
         isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
         TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
 
@@ -726,9 +727,8 @@ namespace llvm {
     /// IsReachable - Checks if SU is reachable from TargetSU.
     bool IsReachable(const SUnit *SU, const SUnit *TargetSU);
 
-    /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU
-    /// will create a cycle.
-    bool WillCreateCycle(SUnit *SU, SUnit *TargetSU);
+    /// WillCreateCycle - Return true if addPred(TargetSU, SU) creates a cycle.
+    bool WillCreateCycle(SUnit *TargetSU, SUnit *SU);
 
     /// AddPred - Updates the topological ordering to accommodate an edge
     /// to be added from SUnit X to SUnit Y.
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 2219520..990cac6 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -105,6 +105,10 @@ namespace llvm {
     MachineBasicBlock::iterator RegionEnd;
 
     /// The index in BB of RegionEnd.
+    ///
+    /// This is the instruction number from the top of the current block, not
+    /// the SlotIndex. It is only used by the AntiDepBreaker and should be
+    /// removed once that client is obsolete.
     unsigned EndIndex;
 
     /// After calling BuildSchedGraph, each machine instruction in the current
@@ -146,6 +150,9 @@ namespace llvm {
 
     virtual ~ScheduleDAGInstrs() {}
 
+    /// \brief Expose LiveIntervals for use in DAG mutators and such.
+    LiveIntervals *getLIS() const { return LIS; }
+
     /// \brief Get the machine model for instruction scheduling.
     const TargetSchedModel *getSchedModel() const { return &SchedModel; }
 
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index e5adf67..8c064bf 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -810,31 +810,32 @@ public:
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
                                 SDValue Op1, SDValue Op2);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
-                         SDValue Op1, SDValue Op2, SDValue Op3);
+                                SDValue Op1, SDValue Op2, SDValue Op3);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
-                         const SDValue *Ops, unsigned NumOps);
+                                ArrayRef<SDValue> Ops);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
-                         SDValue Op1);
-  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
-                         EVT VT2, SDValue Op1, SDValue Op2);
-  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
-                         EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
+                                SDValue Op1);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                                SDValue Op1, SDValue Op2);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                                SDValue Op1, SDValue Op2, SDValue Op3);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
-                         const SDValue *Ops, unsigned NumOps);
+                                ArrayRef<SDValue> Ops);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
-                         EVT VT3, SDValue Op1, SDValue Op2);
+                                EVT VT3, SDValue Op1, SDValue Op2);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
-                         EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3);
+                                EVT VT3, SDValue Op1, SDValue Op2,
+                                SDValue Op3);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
-                         EVT VT3, const SDValue *Ops, unsigned NumOps);
+                                EVT VT3, ArrayRef<SDValue> Ops);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
-                         EVT VT3, EVT VT4, const SDValue *Ops, unsigned NumOps);
+                                EVT VT3, EVT VT4, ArrayRef<SDValue> Ops);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl,
-                         ArrayRef<EVT> ResultTys, const SDValue *Ops,
-                         unsigned NumOps);
+                                ArrayRef<EVT> ResultTys,
+                                ArrayRef<SDValue> Ops);
   MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, SDVTList VTs,
-                         const SDValue *Ops, unsigned NumOps);
+                                ArrayRef<SDValue> Ops);
 
   /// getTargetExtractSubreg - A convenience function for creating
   /// TargetInstrInfo::EXTRACT_SUBREG nodes.
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 5f503de..a4721db 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -259,9 +259,6 @@ private:
   void SelectBasicBlock(BasicBlock::const_iterator Begin,
                         BasicBlock::const_iterator End,
                         bool &HadTailCall);
-
-  bool TryToFoldFastISelLoad(const LoadInst *LI, const Instruction *FoldInst,
-                             FastISel *FastIS);
   void FinishBasicBlock();
 
   void CodeGenAndEmitDAG();
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index a277080..26d0433 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -53,6 +53,20 @@ namespace llvm {
       this->index = index;
     }
 
+#ifdef EXPENSIVE_CHECKS
+    // When EXPENSIVE_CHECKS is defined, "erased" index list entries will
+    // actually be moved to a "graveyard" list, and have their pointers
+    // poisoned, so that dangling SlotIndex access can be reliably detected.
+    void setPoison() {
+      intptr_t tmp = reinterpret_cast<intptr_t>(mi);
+      assert(((tmp & 0x1) == 0x0) && "Pointer already poisoned?");  
+      tmp |= 0x1;
+      mi = reinterpret_cast<MachineInstr*>(tmp);
+    }
+
+    bool isPoisoned() const { return (reinterpret_cast<intptr_t>(mi) & 0x1) == 0x1; }
+#endif // EXPENSIVE_CHECKS
+
   };
 
   template <>
@@ -109,6 +123,10 @@ namespace llvm {
 
     IndexListEntry* listEntry() const {
       assert(isValid() && "Attempt to compare reserved index.");
+#ifdef EXPENSIVE_CHECKS
+      assert(!lie.getPointer()->isPoisoned() &&
+             "Attempt to access deleted list-entry.");
+#endif // EXPENSIVE_CHECKS
       return lie.getPointer();
     }
 
@@ -282,7 +300,6 @@ namespace llvm {
 
   template <> struct isPodLike<SlotIndex> { static const bool value = true; };
 
-
   inline raw_ostream& operator<<(raw_ostream &os, SlotIndex li) {
     li.print(os);
     return os;
@@ -313,6 +330,10 @@ namespace llvm {
     typedef ilist<IndexListEntry> IndexList;
     IndexList indexList;
 
+#ifdef EXPENSIVE_CHECKS
+    IndexList graveyardList;
+#endif // EXPENSIVE_CHECKS
+
     MachineFunction *mf;
 
     typedef DenseMap<const MachineInstr*, SlotIndex> Mi2IndexMap;
@@ -643,6 +664,32 @@ namespace llvm {
       std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
     }
 
+    /// \brief Free the resources that were required to maintain a SlotIndex.
+    ///
+    /// Once an index is no longer needed (for instance because the instruction
+    /// at that index has been moved), the resources required to maintain the
+    /// index can be relinquished to reduce memory use and improve renumbering
+    /// performance. Any remaining SlotIndex objects that point to the same
+    /// index are left 'dangling' (much the same as a dangling pointer to a
+    /// freed object) and should not be accessed, except to destruct them.
+    /// 
+    /// Like dangling pointers, access to dangling SlotIndexes can cause
+    /// painful-to-track-down bugs, especially if the memory for the index
+    /// previously pointed to has been re-used. To detect dangling SlotIndex
+    /// bugs, build with EXPENSIVE_CHECKS=1. This will cause "erased" indexes to
+    /// be retained in a graveyard instead of being freed. Operations on indexes
+    /// in the graveyard will trigger an assertion.
+    void eraseIndex(SlotIndex index) {
+      IndexListEntry *entry = index.listEntry();
+#ifdef EXPENSIVE_CHECKS
+      indexList.remove(entry);
+      graveyardList.push_back(entry);
+      entry->setPoison();
+#else
+      indexList.erase(entry);
+#endif
+    }
+
   };
 
 
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index e7098e4..5b22c9c 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -128,6 +128,12 @@ public:
   virtual const MCSection *
   SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const;
+
+  /// emitModuleFlags - Emit Obj-C garbage collection and linker options.  Only
+  /// linker option emission is implemented for COFF.
+  virtual void emitModuleFlags(MCStreamer &Streamer,
+                               ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+                               Mangler *Mang, const TargetMachine &TM) const;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 76df6ac..da26985 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -44,13 +44,13 @@ def v4i8   : ValueType<32 , 20>;   //  4 x i8  vector value
 def v8i8   : ValueType<64 , 21>;   //  8 x i8  vector value
 def v16i8  : ValueType<128, 22>;   // 16 x i8  vector value
 def v32i8  : ValueType<256, 23>;   // 32 x i8 vector value
-def v64i8  : ValueType<256, 24>;   // 64 x i8 vector value
+def v64i8  : ValueType<512, 24>;   // 64 x i8 vector value
 def v1i16  : ValueType<16 , 25>;   //  1 x i16 vector value
 def v2i16  : ValueType<32 , 26>;   //  2 x i16 vector value
 def v4i16  : ValueType<64 , 27>;   //  4 x i16 vector value
 def v8i16  : ValueType<128, 28>;   //  8 x i16 vector value
 def v16i16 : ValueType<256, 29>;   // 16 x i16 vector value
-def v32i16 : ValueType<256, 30>;   // 32 x i16 vector value
+def v32i16 : ValueType<512, 30>;   // 32 x i16 vector value
 def v1i32  : ValueType<32 , 31>;   //  1 x i32 vector value
 def v2i32  : ValueType<64 , 32>;   //  2 x i32 vector value
 def v4i32  : ValueType<128, 33>;   //  4 x i32 vector value
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index 0a26857..32d192e 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -69,6 +69,9 @@
 /* Define to 1 if you have the `closedir' function. */
 #cmakedefine HAVE_CLOSEDIR ${HAVE_CLOSEDIR}
 
+/* Define to 1 if you have the <cxxabi.h> header file. */
+#cmakedefine HAVE_CXXABI_H ${HAVE_CXXABI_H}
+
 /* Define to 1 if you have the <CrashReporterClient.h> header file. */
 #undef HAVE_CRASHREPORTERCLIENT_H
 
@@ -230,6 +233,9 @@
 /* Define to 1 if you have the `udis86' library (-ludis86). */
 #undef HAVE_LIBUDIS86
 
+/* Define to 1 if you have the 'z' library (-lz). */
+#cmakedefine HAVE_LIBZ ${HAVE_LIBZ}
+
 /* Define to 1 if you have the <limits.h> header file. */
 #cmakedefine HAVE_LIMITS_H ${HAVE_LIMITS_H}
 
@@ -498,6 +504,9 @@
 /* Define if the xdot.py program is available */
 #cmakedefine HAVE_XDOT_PY ${HAVE_XDOT_PY}
 
+/* Define to 1 if you have the <zlib.h> header file. */
+#cmakedefine HAVE_ZLIB_H ${HAVE_ZLIB_H}
+
 /* Have host's _alloca */
 #cmakedefine HAVE__ALLOCA ${HAVE__ALLOCA}
 
@@ -570,6 +579,9 @@
 /* Define if threads enabled */
 #cmakedefine01 LLVM_ENABLE_THREADS
 
+/* Define if zlib compression is available */
+#cmakedefine01 LLVM_ENABLE_ZLIB
+
 /* Installation directory for config files */
 #cmakedefine LLVM_ETCDIR "${LLVM_ETCDIR}"
 
@@ -577,7 +589,7 @@
 #cmakedefine01 LLVM_HAS_ATOMICS
 
 /* Host triple LLVM will be executed on */
-#cmakedefine LLVM_HOSTTRIPLE "${LLVM_HOSTTRIPLE}"
+#cmakedefine LLVM_HOST_TRIPLE "${LLVM_HOST_TRIPLE}"
 
 /* Installation directory for include files */
 #cmakedefine LLVM_INCLUDEDIR "${LLVM_INCLUDEDIR}"
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index 5a3d02c..950e66f 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -78,6 +78,9 @@
 /* Define to 1 if you have the <ctype.h> header file. */
 #undef HAVE_CTYPE_H
 
+/* Define to 1 if you have the <cxxabi.h> header file. */
+#undef HAVE_CXXABI_H
+
 /* Define to 1 if you have the declaration of `FE_ALL_EXCEPT', and to 0 if you
    don't. */
 #undef HAVE_DECL_FE_ALL_EXCEPT
@@ -226,6 +229,9 @@
 /* Define to 1 if you have the `udis86' library (-ludis86). */
 #undef HAVE_LIBUDIS86
 
+/* Define to 1 if you have the `z' library (-lz). */
+#undef HAVE_LIBZ
+
 /* Define to 1 if you have the <limits.h> header file. */
 #undef HAVE_LIMITS_H
 
@@ -503,6 +509,9 @@
 /* Define if the xdot.py program is available */
 #undef HAVE_XDOT_PY
 
+/* Define to 1 if you have the <zlib.h> header file. */
+#undef HAVE_ZLIB_H
+
 /* Have host's _alloca */
 #undef HAVE__ALLOCA
 
@@ -575,6 +584,9 @@
 /* Define if threads enabled */
 #undef LLVM_ENABLE_THREADS
 
+/* Define if zlib is enabled */
+#undef LLVM_ENABLE_ZLIB
+
 /* Installation directory for config files */
 #undef LLVM_ETCDIR
 
@@ -582,7 +594,7 @@
 #undef LLVM_HAS_ATOMICS
 
 /* Host triple LLVM will be executed on */
-#undef LLVM_HOSTTRIPLE
+#undef LLVM_HOST_TRIPLE
 
 /* Installation directory for include files */
 #undef LLVM_INCLUDEDIR
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
index eda17ee..c6f2bef 100644
--- a/include/llvm/Config/llvm-config.h.cmake
+++ b/include/llvm/Config/llvm-config.h.cmake
@@ -41,7 +41,7 @@
 #cmakedefine01 LLVM_HAS_ATOMICS
 
 /* Host triple LLVM will be executed on */
-#cmakedefine LLVM_HOSTTRIPLE "${LLVM_HOSTTRIPLE}"
+#cmakedefine LLVM_HOST_TRIPLE "${LLVM_HOST_TRIPLE}"
 
 /* Installation directory for include files */
 #cmakedefine LLVM_INCLUDEDIR "${LLVM_INCLUDEDIR}"
diff --git a/include/llvm/Config/llvm-config.h.in b/include/llvm/Config/llvm-config.h.in
index af3a324..a5209fa 100644
--- a/include/llvm/Config/llvm-config.h.in
+++ b/include/llvm/Config/llvm-config.h.in
@@ -41,7 +41,7 @@
 #undef LLVM_HAS_ATOMICS
 
 /* Host triple LLVM will be executed on */
-#undef LLVM_HOSTTRIPLE
+#undef LLVM_HOST_TRIPLE
 
 /* Installation directory for include files */
 #undef LLVM_INCLUDEDIR
diff --git a/include/llvm/DIBuilder.h b/include/llvm/DIBuilder.h
index 4f0aa07..2c0f712 100644
--- a/include/llvm/DIBuilder.h
+++ b/include/llvm/DIBuilder.h
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines a DIBuilder that is useful for creating debugging 
+// This file defines a DIBuilder that is useful for creating debugging
 // information entries in LLVM IR form.
 //
 //===----------------------------------------------------------------------===//
@@ -37,11 +37,13 @@ namespace llvm {
   class DIType;
   class DIArray;
   class DIGlobalVariable;
+  class DIImportedModule;
   class DINameSpace;
   class DIVariable;
   class DISubrange;
   class DILexicalBlockFile;
   class DILexicalBlock;
+  class DIScope;
   class DISubprogram;
   class DITemplateTypeParameter;
   class DITemplateValueParameter;
@@ -57,6 +59,7 @@ namespace llvm {
     MDNode *TempRetainTypes;
     MDNode *TempSubprograms;
     MDNode *TempGVs;
+    MDNode *TempImportedModules;
 
     Function *DeclareFn;     // llvm.dbg.declare
     Function *ValueFn;       // llvm.dbg.value
@@ -65,6 +68,7 @@ namespace llvm {
     SmallVector<Value *, 4> AllRetainTypes;
     SmallVector<Value *, 4> AllSubprograms;
     SmallVector<Value *, 4> AllGVs;
+    SmallVector<Value *, 4> AllImportedModules;
 
     DIBuilder(const DIBuilder &) LLVM_DELETED_FUNCTION;
     void operator=(const DIBuilder &) LLVM_DELETED_FUNCTION;
@@ -82,18 +86,18 @@ namespace llvm {
     /// @param Lang     Source programming language, eg. dwarf::DW_LANG_C99
     /// @param File     File name
     /// @param Dir      Directory
-    /// @param Producer String identify producer of debugging information. 
+    /// @param Producer String identify producer of debugging information.
     ///                 Usuall this is a compiler version string.
     /// @param isOptimized A boolean flag which indicates whether optimization
     ///                    is ON or not.
-    /// @param Flags    This string lists command line options. This string is 
+    /// @param Flags    This string lists command line options. This string is
     ///                 directly embedded in debug info output which may be used
     ///                 by a tool analyzing generated debugging information.
-    /// @param RV       This indicates runtime version for languages like 
+    /// @param RV       This indicates runtime version for languages like
     ///                 Objective-C.
     /// @param SplitName The name of the file that we'll split debug info out
     ///                  into.
-    void createCompileUnit(unsigned Lang, StringRef File, StringRef Dir, 
+    void createCompileUnit(unsigned Lang, StringRef File, StringRef Dir,
                            StringRef Producer, bool isOptimized,
                            StringRef Flags, unsigned RV,
                            StringRef SplitName = StringRef());
@@ -101,14 +105,14 @@ namespace llvm {
     /// createFile - Create a file descriptor to hold debugging information
     /// for a file.
     DIFile createFile(StringRef Filename, StringRef Directory);
-                           
+
     /// createEnumerator - Create a single enumerator value.
     DIEnumerator createEnumerator(StringRef Name, uint64_t Val);
 
     /// createNullPtrType - Create C++0x nullptr type.
     DIType createNullPtrType(StringRef Name);
 
-    /// createBasicType - Create debugging information entry for a basic 
+    /// createBasicType - Create debugging information entry for a basic
     /// type.
     /// @param Name        Type name.
     /// @param SizeInBits  Size of the type.
@@ -158,7 +162,7 @@ namespace llvm {
     /// @param Ty           Original type.
     /// @param BaseTy       Base type. Ty is inherits from base.
     /// @param BaseOffset   Base offset.
-    /// @param Flags        Flags to describe inheritance attribute, 
+    /// @param Flags        Flags to describe inheritance attribute,
     ///                     e.g. private
     DIDerivedType createInheritance(DIType Ty, DIType BaseTy,
                                     uint64_t BaseOffset, unsigned Flags);
@@ -209,8 +213,8 @@ namespace llvm {
     ///                           selector.
     /// @param PropertyAttributes Objective C property attributes.
     DIType createObjCIVar(StringRef Name, DIFile File,
-                          unsigned LineNo, uint64_t SizeInBits, 
-                          uint64_t AlignInBits, uint64_t OffsetInBits, 
+                          unsigned LineNo, uint64_t SizeInBits,
+                          uint64_t AlignInBits, uint64_t OffsetInBits,
                           unsigned Flags, DIType Ty,
                           StringRef PropertyName = StringRef(),
                           StringRef PropertyGetterName = StringRef(),
@@ -229,8 +233,8 @@ namespace llvm {
     /// @param Ty           Parent type.
     /// @param PropertyNode Property associated with this ivar.
     DIType createObjCIVar(StringRef Name, DIFile File,
-                          unsigned LineNo, uint64_t SizeInBits, 
-                          uint64_t AlignInBits, uint64_t OffsetInBits, 
+                          unsigned LineNo, uint64_t SizeInBits,
+                          uint64_t AlignInBits, uint64_t OffsetInBits,
                           unsigned Flags, DIType Ty,
                           MDNode *PropertyNode);
 
@@ -249,7 +253,7 @@ namespace llvm {
                                       StringRef SetterName,
                                       unsigned PropertyAttributes,
                                       DIType Ty);
-      
+
     /// createClassType - Create debugging information entry for a class.
     /// @param Scope        Scope in which this class is defined.
     /// @param Name         class name.
@@ -261,7 +265,7 @@ namespace llvm {
     /// @param Flags        Flags to encode member attribute, e.g. private
     /// @param Elements     class members.
     /// @param VTableHolder Debug info of the base class that contains vtable
-    ///                     for this type. This is used in 
+    ///                     for this type. This is used in
     ///                     DW_AT_containing_type. See DWARF documentation
     ///                     for more info.
     /// @param TemplateParms Template type parameters.
@@ -346,22 +350,25 @@ namespace llvm {
     /// @param AlignInBits  Alignment.
     /// @param Ty           Element type.
     /// @param Subscripts   Subscripts.
-    DIType createVectorType(uint64_t Size, uint64_t AlignInBits, 
+    DIType createVectorType(uint64_t Size, uint64_t AlignInBits,
                             DIType Ty, DIArray Subscripts);
 
-    /// createEnumerationType - Create debugging information entry for an 
+    /// createEnumerationType - Create debugging information entry for an
     /// enumeration.
-    /// @param Scope        Scope in which this enumeration is defined.
-    /// @param Name         Union name.
-    /// @param File         File where this member is defined.
-    /// @param LineNumber   Line number.
-    /// @param SizeInBits   Member size.
-    /// @param AlignInBits  Member alignment.
-    /// @param Elements     Enumeration elements.
-    DICompositeType createEnumerationType(
-        DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
-        uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
-        DIType ClassType);
+    /// @param Scope          Scope in which this enumeration is defined.
+    /// @param Name           Union name.
+    /// @param File           File where this member is defined.
+    /// @param LineNumber     Line number.
+    /// @param SizeInBits     Member size.
+    /// @param AlignInBits    Member alignment.
+    /// @param Elements       Enumeration elements.
+    /// @param UnderlyingType Underlying type of a C++11/ObjC fixed enum.
+    DICompositeType createEnumerationType(DIDescriptor Scope, StringRef Name,
+                                          DIFile File, unsigned LineNumber,
+                                          uint64_t SizeInBits,
+                                          uint64_t AlignInBits,
+                                          DIArray Elements,
+                                          DIType UnderlyingType);
 
     /// createSubroutineType - Create subroutine type.
     /// @param File           File in which this subroutine is defined.
@@ -381,7 +388,7 @@ namespace llvm {
                              DIFile F, unsigned Line, unsigned RuntimeLang = 0,
                              uint64_t SizeInBits = 0, uint64_t AlignInBits = 0);
 
-    /// retainType - Retain DIType in a module even if it is not referenced 
+    /// retainType - Retain DIType in a module even if it is not referenced
     /// through debug info anchors.
     void retainType(DIType T);
 
@@ -422,7 +429,7 @@ namespace llvm {
                          unsigned LineNo, DIType Ty, bool isLocalToUnit,
                          llvm::Value *Val);
 
-    /// createStaticVariable - Create a new descriptor for the specified 
+    /// createStaticVariable - Create a new descriptor for the specified
     /// variable.
     /// @param Context     Variable scope.
     /// @param Name        Name of the variable.
@@ -435,13 +442,13 @@ namespace llvm {
     /// @param Val         llvm::Value of the variable.
     /// @param Decl        Reference to the corresponding declaration.
     DIGlobalVariable
-    createStaticVariable(DIDescriptor Context, StringRef Name, 
-                         StringRef LinkageName, DIFile File, unsigned LineNo, 
+    createStaticVariable(DIDescriptor Context, StringRef Name,
+                         StringRef LinkageName, DIFile File, unsigned LineNo,
                          DIType Ty, bool isLocalToUnit, llvm::Value *Val,
                          MDNode *Decl = NULL);
 
 
-    /// createLocalVariable - Create a new descriptor for the specified 
+    /// createLocalVariable - Create a new descriptor for the specified
     /// local variable.
     /// @param Tag         Dwarf TAG. Usually DW_TAG_auto_variable or
     ///                    DW_TAG_arg_variable.
@@ -518,7 +525,7 @@ namespace llvm {
     /// @param Ty            Function type.
     /// @param isLocalToUnit True if this function is not externally visible..
     /// @param isDefinition  True if this is a function definition.
-    /// @param Virtuality    Attributes describing virtualness. e.g. pure 
+    /// @param Virtuality    Attributes describing virtualness. e.g. pure
     ///                      virtual function.
     /// @param VTableIndex   Index no of this method in virtual table.
     /// @param VTableHolder  Type that holds vtable.
@@ -556,7 +563,7 @@ namespace llvm {
     /// @param File        Source file.
     DILexicalBlockFile createLexicalBlockFile(DIDescriptor Scope,
                                               DIFile File);
-    
+
     /// createLexicalBlock - This creates a descriptor for a lexical block
     /// with the specified parent context.
     /// @param Scope       Parent lexical scope.
@@ -566,6 +573,13 @@ namespace llvm {
     DILexicalBlock createLexicalBlock(DIDescriptor Scope, DIFile File,
                                       unsigned Line, unsigned Col);
 
+    /// \brief Create a descriptor for an imported module.
+    /// @param Context The scope this module is imported into
+    /// @param NS The namespace being imported here
+    /// @param Line Line number
+    DIImportedModule createImportedModule(DIScope Context, DINameSpace NS,
+                                          unsigned Line);
+
     /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
     /// @param Storage     llvm::Value of the variable
     /// @param VarInfo     Variable's debug info descriptor.
@@ -587,16 +601,16 @@ namespace llvm {
     /// @param VarInfo      Variable's debug info descriptor.
     /// @param InsertAtEnd Location for the new intrinsic.
     Instruction *insertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset,
-                                         DIVariable VarInfo, 
+                                         DIVariable VarInfo,
                                          BasicBlock *InsertAtEnd);
-    
+
     /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
     /// @param Val          llvm::Value of the variable
     /// @param Offset       Offset
     /// @param VarInfo      Variable's debug info descriptor.
     /// @param InsertBefore Location for the new intrinsic.
     Instruction *insertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset,
-                                         DIVariable VarInfo, 
+                                         DIVariable VarInfo,
                                          Instruction *InsertBefore);
 
   };
diff --git a/include/llvm/DebugInfo.h b/include/llvm/DebugInfo.h
index 15f9187..f9b58f4 100644
--- a/include/llvm/DebugInfo.h
+++ b/include/llvm/DebugInfo.h
@@ -125,6 +125,7 @@ namespace llvm {
     bool isTemplateTypeParameter() const;
     bool isTemplateValueParameter() const;
     bool isObjCProperty() const;
+    bool isImportedModule() const;
 
     /// print - print descriptor.
     void print(raw_ostream &OS) const;
@@ -199,8 +200,9 @@ namespace llvm {
     DIArray getRetainedTypes() const;
     DIArray getSubprograms() const;
     DIArray getGlobalVariables() const;
+    DIArray getImportedModules() const;
 
-    StringRef getSplitDebugFilename() const { return getStringField(11); }
+    StringRef getSplitDebugFilename() const { return getStringField(12); }
 
     /// Verify - Verify that a compile unit is well formed.
     bool Verify() const;
@@ -342,7 +344,10 @@ namespace llvm {
 
   /// DICompositeType - This descriptor holds a type that can refer to multiple
   /// other types, like a function or struct.
-  /// FIXME: Why is this a DIDerivedType??
+  /// DICompositeType is derived from DIDerivedType because some
+  /// composite types (such as enums) can be derived from basic types
+  // FIXME: Make this derive from DIType directly & just store the
+  // base type in a single DIType field.
   class DICompositeType : public DIDerivedType {
     friend class DIDescriptor;
     void printInternal(raw_ostream &OS) const;
@@ -678,6 +683,18 @@ namespace llvm {
     bool Verify() const;
   };
 
+  /// \brief An imported module (C++ using directive or similar).
+  class DIImportedModule : public DIDescriptor {
+    friend class DIDescriptor;
+    void printInternal(raw_ostream &OS) const;
+  public:
+    explicit DIImportedModule(const MDNode *N) : DIDescriptor(N) { }
+    DIScope getContext() const { return getFieldAs<DIScope>(1); }
+    DINameSpace getNameSpace() const { return getFieldAs<DINameSpace>(2); }
+    unsigned getLineNumber() const { return getUnsignedField(3); }
+    bool Verify() const;
+  };
+
   /// getDISubprogram - Find subprogram that is enclosing this scope.
   DISubprogram getDISubprogram(const MDNode *Scope);
 
diff --git a/lib/DebugInfo/DWARFFormValue.h b/include/llvm/DebugInfo/DWARFFormValue.h
index b863001..eaaccfb 100644
--- a/lib/DebugInfo/DWARFFormValue.h
+++ b/include/llvm/DebugInfo/DWARFFormValue.h
@@ -74,7 +74,7 @@ public:
                         uint32_t *offset_ptr, const DWARFCompileUnit *cu);
   static bool isBlockForm(uint16_t form);
   static bool isDataForm(uint16_t form);
-  static const uint8_t *getFixedFormSizesForAddressSize(uint8_t addr_size);
+  static const uint8_t *getFixedFormSizes(uint8_t AddrSize, uint16_t Version);
 };
 
 }
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index 3fd69e2..bbaebc6 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_EXECUTIONENGINE_EXECUTIONENGINE_H
 #define LLVM_EXECUTIONENGINE_EXECUTIONENGINE_H
 
+#include "llvm-c/ExecutionEngine.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -42,6 +43,7 @@ class JITMemoryManager;
 class MachineCodeInfo;
 class Module;
 class MutexGuard;
+class ObjectCache;
 class DataLayout;
 class Triple;
 class Type;
@@ -371,6 +373,12 @@ public:
   virtual void RegisterJITEventListener(JITEventListener *) {}
   virtual void UnregisterJITEventListener(JITEventListener *) {}
 
+  /// Sets the pre-compiled object cache.  The ownership of the ObjectCache is
+  /// not changed.  Supported by MCJIT but not JIT.
+  virtual void setObjectCache(ObjectCache *) {
+    llvm_unreachable("No support for an object cache");
+  }
+
   /// DisableLazyCompilation - When lazy compilation is off (the default), the
   /// JIT will eagerly compile every function reachable from the argument to
   /// getPointerToFunction.  If lazy compilation is turned on, the JIT will only
@@ -625,6 +633,9 @@ public:
   ExecutionEngine *create(TargetMachine *TM);
 };
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionEngine, LLVMExecutionEngineRef)
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/ExecutionEngine/ObjectCache.h b/include/llvm/ExecutionEngine/ObjectCache.h
new file mode 100644
index 0000000..0bee861
--- /dev/null
+++ b/include/llvm/ExecutionEngine/ObjectCache.h
@@ -0,0 +1,54 @@
+//===-- ObjectCache.h - Class definition for the ObjectCache -----C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_OBJECTCACHE_H
+#define LLVM_LIB_EXECUTIONENGINE_OBJECTCACHE_H
+
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+
+class Module;
+
+/// This is the base ObjectCache type which can be provided to an
+/// ExecutionEngine for the purpose of avoiding compilation for Modules that
+/// have already been compiled and an object file is available.
+class ObjectCache {
+public:
+  ObjectCache() { }
+
+  virtual ~ObjectCache() { }
+
+  /// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
+  virtual void notifyObjectCompiled(const Module *M, const MemoryBuffer *Obj) = 0;
+
+  /// getObjectCopy - Returns a pointer to a newly allocated MemoryBuffer that
+  /// contains the object which corresponds with Module M, or 0 if an object is
+  /// not available. The caller owns the MemoryBuffer returned by this function.
+  MemoryBuffer* getObjectCopy(const Module* M) {
+    const MemoryBuffer* Obj = getObject(M);
+    if (Obj)
+      return MemoryBuffer::getMemBufferCopy(Obj->getBuffer());
+    else
+      return 0;
+  }
+
+protected:
+  /// getObject - Returns a pointer to a MemoryBuffer that contains an object
+  /// that corresponds with Module M, or 0 if an object is not available.
+  /// The pointer returned by this function is not suitable for loading because
+  /// the memory is read-only and owned by the ObjectCache. To retrieve an
+  /// owning pointer to a MemoryBuffer (which is suitable for calling
+  /// RuntimeDyld::loadObject() with) use getObjectCopy() instead.
+  virtual const MemoryBuffer* getObject(const Module* M) = 0;
+};
+
+}
+
+#endif
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index 4222d53..c6c126c 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -66,6 +66,11 @@ public:
   ///
   /// Returns true if an error occurred, false otherwise.
   virtual bool applyPermissions(std::string *ErrMsg = 0) = 0;
+
+  /// Register the EH frames with the runtime so that c++ exceptions work. The
+  /// default implementation does nothing. Look at SectionMemoryManager for one
+  /// that uses __register_frame.
+  virtual void registerEHFrames(StringRef SectionData);
 };
 
 class RuntimeDyld {
@@ -109,6 +114,8 @@ public:
   void mapSectionAddress(const void *LocalAddress, uint64_t TargetAddress);
 
   StringRef getErrorString();
+
+  StringRef getEHFrameSection();
 };
 
 } // end namespace llvm
diff --git a/include/llvm/ExecutionEngine/SectionMemoryManager.h b/include/llvm/ExecutionEngine/SectionMemoryManager.h
index ae5004e1..84a4e08 100644
--- a/include/llvm/ExecutionEngine/SectionMemoryManager.h
+++ b/include/llvm/ExecutionEngine/SectionMemoryManager.h
@@ -72,6 +72,8 @@ public:
   /// \returns true if an error occurred, false otherwise.
   virtual bool applyPermissions(std::string *ErrMsg = 0);
 
+  void registerEHFrames(StringRef SectionData);
+
   /// This method returns the address of the specified function. As such it is
   /// only useful for resolving library symbols, not code generated symbols.
   ///
@@ -87,9 +89,7 @@ public:
   /// explicit cache flush, otherwise JIT code manipulations (like resolved
   /// relocations) will get to the data cache but not to the instruction cache.
   ///
-  /// This method is not called by RuntimeDyld or MCJIT during the load
-  /// process.  Clients may call this function when needed.  See the lli
-  /// tool for example use.
+  /// This method is called from applyPermissions.
   virtual void invalidateInstructionCache();
 
 private:
diff --git a/include/llvm/IR/Argument.h b/include/llvm/IR/Argument.h
index ef4e4fc..40d61ff 100644
--- a/include/llvm/IR/Argument.h
+++ b/include/llvm/IR/Argument.h
@@ -78,6 +78,10 @@ public:
   /// containing function.
   bool hasStructRetAttr() const;
 
+  /// \brief Return true if this argument has the returned attribute on it in
+  /// its containing function.
+  bool hasReturnedAttr() const;
+
   /// \brief Add a Attribute to an argument.
   void addAttr(AttributeSet AS);
 
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h
index 074b387..2c7da64 100644
--- a/include/llvm/IR/Attributes.h
+++ b/include/llvm/IR/Attributes.h
@@ -87,6 +87,7 @@ public:
     OptimizeForSize,       ///< opt_size
     ReadNone,              ///< Function does not access memory
     ReadOnly,              ///< Function only reads from memory
+    Returned,              ///< Return value is always equal to this argument
     ReturnsTwice,          ///< Function can return twice
     SExt,                  ///< Sign extended before/after call
     StackAlignment,        ///< Alignment of stack for function (3 bits)
@@ -209,7 +210,7 @@ private:
   AttributeSetImpl *pImpl;
 
   /// \brief The attributes for the specified index are returned.
-  AttributeSetNode *getAttributes(unsigned Idx) const;
+  AttributeSetNode *getAttributes(unsigned Index) const;
 
   /// \brief Create an AttributeSet with the specified parameters in it.
   static AttributeSet get(LLVMContext &C,
@@ -233,35 +234,35 @@ public:
 
   /// \brief Return an AttributeSet with the specified parameters in it.
   static AttributeSet get(LLVMContext &C, ArrayRef<AttributeSet> Attrs);
-  static AttributeSet get(LLVMContext &C, unsigned Idx,
+  static AttributeSet get(LLVMContext &C, unsigned Index,
                           ArrayRef<Attribute::AttrKind> Kind);
-  static AttributeSet get(LLVMContext &C, unsigned Idx, AttrBuilder &B);
+  static AttributeSet get(LLVMContext &C, unsigned Index, AttrBuilder &B);
 
   /// \brief Add an attribute to the attribute set at the given index. Since
   /// attribute sets are immutable, this returns a new set.
-  AttributeSet addAttribute(LLVMContext &C, unsigned Idx,
+  AttributeSet addAttribute(LLVMContext &C, unsigned Index,
                             Attribute::AttrKind Attr) const;
 
   /// \brief Add an attribute to the attribute set at the given index. Since
   /// attribute sets are immutable, this returns a new set.
-  AttributeSet addAttribute(LLVMContext &C, unsigned Idx,
+  AttributeSet addAttribute(LLVMContext &C, unsigned Index,
                             StringRef Kind) const;
 
   /// \brief Add attributes to the attribute set at the given index. Since
   /// attribute sets are immutable, this returns a new set.
-  AttributeSet addAttributes(LLVMContext &C, unsigned Idx,
+  AttributeSet addAttributes(LLVMContext &C, unsigned Index,
                              AttributeSet Attrs) const;
 
   /// \brief Remove the specified attribute at the specified index from this
   /// attribute list. Since attribute lists are immutable, this returns the new
   /// list.
-  AttributeSet removeAttribute(LLVMContext &C, unsigned Idx, 
+  AttributeSet removeAttribute(LLVMContext &C, unsigned Index, 
                                Attribute::AttrKind Attr) const;
 
   /// \brief Remove the specified attributes at the specified index from this
   /// attribute list. Since attribute lists are immutable, this returns the new
   /// list.
-  AttributeSet removeAttributes(LLVMContext &C, unsigned Idx, 
+  AttributeSet removeAttributes(LLVMContext &C, unsigned Index, 
                                 AttributeSet Attrs) const;
 
   //===--------------------------------------------------------------------===//
@@ -272,7 +273,7 @@ public:
   LLVMContext &getContext() const;
 
   /// \brief The attributes for the specified index are returned.
-  AttributeSet getParamAttributes(unsigned Idx) const;
+  AttributeSet getParamAttributes(unsigned Index) const;
 
   /// \brief The attributes for the ret value are returned.
   AttributeSet getRetAttributes() const;
@@ -300,7 +301,7 @@ public:
   Attribute getAttribute(unsigned Index, StringRef Kind) const;
 
   /// \brief Return the alignment for the specified function parameter.
-  unsigned getParamAlignment(unsigned Idx) const;
+  unsigned getParamAlignment(unsigned Index) const;
 
   /// \brief Get the stack alignment.
   unsigned getStackAlignment(unsigned Index) const;
@@ -310,8 +311,8 @@ public:
 
   typedef ArrayRef<Attribute>::iterator iterator;
 
-  iterator begin(unsigned Idx) const;
-  iterator end(unsigned Idx) const;
+  iterator begin(unsigned Slot) const;
+  iterator end(unsigned Slot) const;
 
   /// operator==/!= - Provide equality predicates.
   bool operator==(const AttributeSet &RHS) const {
@@ -344,7 +345,7 @@ public:
   unsigned getNumSlots() const;
 
   /// \brief Return the index for the given slot.
-  uint64_t getSlotIndex(unsigned Slot) const;
+  unsigned getSlotIndex(unsigned Slot) const;
 
   /// \brief Return the attributes at the given slot.
   AttributeSet getSlotAttributes(unsigned Slot) const;
@@ -473,9 +474,6 @@ public:
 
   bool td_empty() const              { return TargetDepAttrs.empty(); }
 
-  /// \brief Remove attributes that are used on functions only.
-  void removeFunctionOnlyAttrs();
-
   bool operator==(const AttrBuilder &B);
   bool operator!=(const AttrBuilder &B) {
     return !(*this == B);
diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h
index ea5695a..3bdc95d 100644
--- a/include/llvm/IR/BasicBlock.h
+++ b/include/llvm/IR/BasicBlock.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/ilist.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/SymbolTableListTraits.h"
+#include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
@@ -298,6 +299,9 @@ private:
   }
 };
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(BasicBlock, LLVMBasicBlockRef)
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h
index ad258f9..2f29f54 100644
--- a/include/llvm/IR/Constants.h
+++ b/include/llvm/IR/Constants.h
@@ -26,6 +26,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/DerivedTypes.h"
 
 namespace llvm {
 
@@ -138,7 +139,7 @@ public:
   /// which reduces the amount of casting needed in parts of the compiler.
   ///
   inline IntegerType *getType() const {
-    return reinterpret_cast<IntegerType*>(Value::getType());
+    return cast<IntegerType>(Value::getType());
   }
 
   /// This static method returns true if the type Ty is big enough to
@@ -354,7 +355,7 @@ public:
   /// which reduces the amount of casting needed in parts of the compiler.
   ///
   inline ArrayType *getType() const {
-    return reinterpret_cast<ArrayType*>(Value::getType());
+    return cast<ArrayType>(Value::getType());
   }
 
   virtual void destroyConstant();
@@ -412,7 +413,7 @@ public:
   /// getType() specialization - Reduce amount of casting...
   ///
   inline StructType *getType() const {
-    return reinterpret_cast<StructType*>(Value::getType());
+    return cast<StructType>(Value::getType());
   }
 
   virtual void destroyConstant();
@@ -455,7 +456,7 @@ public:
   /// which reduces the amount of casting needed in parts of the compiler.
   ///
   inline VectorType *getType() const {
-    return reinterpret_cast<VectorType*>(Value::getType());
+    return cast<VectorType>(Value::getType());
   }
 
   /// getSplatValue - If this is a splat constant, meaning that all of the
@@ -486,7 +487,7 @@ class ConstantPointerNull : public Constant {
   ConstantPointerNull(const ConstantPointerNull &) LLVM_DELETED_FUNCTION;
 protected:
   explicit ConstantPointerNull(PointerType *T)
-    : Constant(reinterpret_cast<Type*>(T),
+    : Constant(T,
                Value::ConstantPointerNullVal, 0, 0) {}
 
 protected:
@@ -504,7 +505,7 @@ public:
   /// which reduces the amount of casting needed in parts of the compiler.
   ///
   inline PointerType *getType() const {
-    return reinterpret_cast<PointerType*>(Value::getType());
+    return cast<PointerType>(Value::getType());
   }
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -580,7 +581,7 @@ public:
   /// SequentialType, which reduces the amount of casting needed in parts of the
   /// compiler.
   inline SequentialType *getType() const {
-    return reinterpret_cast<SequentialType*>(Value::getType());
+    return cast<SequentialType>(Value::getType());
   }
 
   /// getElementType - Return the element type of the array/vector.
@@ -679,7 +680,7 @@ public:
   /// which reduces the amount of casting needed in parts of the compiler.
   ///
   inline ArrayType *getType() const {
-    return reinterpret_cast<ArrayType*>(Value::getType());
+    return cast<ArrayType>(Value::getType());
   }
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -732,7 +733,7 @@ public:
   /// which reduces the amount of casting needed in parts of the compiler.
   ///
   inline VectorType *getType() const {
-    return reinterpret_cast<VectorType*>(Value::getType());
+    return cast<VectorType>(Value::getType());
   }
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h
index 547d857..b0def6b 100644
--- a/include/llvm/IR/DataLayout.h
+++ b/include/llvm/IR/DataLayout.h
@@ -171,13 +171,13 @@ public:
   /// Initialize target data from properties stored in the module.
   explicit DataLayout(const Module *M);
 
-  DataLayout(const DataLayout &TD) :
+  DataLayout(const DataLayout &DL) :
     ImmutablePass(ID),
-    LittleEndian(TD.isLittleEndian()),
-    StackNaturalAlign(TD.StackNaturalAlign),
-    LegalIntWidths(TD.LegalIntWidths),
-    Alignments(TD.Alignments),
-    Pointers(TD.Pointers),
+    LittleEndian(DL.isLittleEndian()),
+    StackNaturalAlign(DL.StackNaturalAlign),
+    LegalIntWidths(DL.LegalIntWidths),
+    Alignments(DL.Alignments),
+    Pointers(DL.Pointers),
     LayoutMap(0)
   { }
 
@@ -426,7 +426,7 @@ public:
 
 private:
   friend class DataLayout;   // Only DataLayout can create this class
-  StructLayout(StructType *ST, const DataLayout &TD);
+  StructLayout(StructType *ST, const DataLayout &DL);
 };
 
 
diff --git a/include/llvm/IR/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h
index 6c00f596..e279e60 100644
--- a/include/llvm/IR/DerivedTypes.h
+++ b/include/llvm/IR/DerivedTypes.h
@@ -117,7 +117,7 @@ public:
   /// argument type.
   static bool isValidArgumentType(Type *ArgTy);
 
-  bool isVarArg() const { return getSubclassData(); }
+  bool isVarArg() const { return getSubclassData()!=0; }
   Type *getReturnType() const { return ContainedTys[0]; }
 
   typedef Type::subtype_iterator param_iterator;
diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h
index f398bc1..260302a 100644
--- a/include/llvm/IR/GlobalValue.h
+++ b/include/llvm/IR/GlobalValue.h
@@ -19,6 +19,7 @@
 #define LLVM_IR_GLOBALVALUE_H
 
 #include "llvm/IR/Constant.h"
+#include "llvm/IR/DerivedTypes.h"
 
 namespace llvm {
 
@@ -105,7 +106,7 @@ public:
 
   /// getType - Global values are always pointers.
   inline PointerType *getType() const {
-    return reinterpret_cast<PointerType*>(User::getType());
+    return cast<PointerType>(User::getType());
   }
 
   static LinkageTypes getLinkOnceLinkage(bool ODR) {
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
index 1c71d0a..f11d3b4 100644
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -23,6 +23,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/ConstantFolder.h"
 
 namespace llvm {
@@ -48,6 +49,10 @@ protected:
 class IRBuilderBase {
   DebugLoc CurDbgLocation;
 protected:
+  /// Save the current debug location here while we are suppressing
+  /// line table entries.
+  llvm::DebugLoc SavedDbgLocation;
+
   BasicBlock *BB;
   BasicBlock::iterator InsertPt;
   LLVMContext &Context;
@@ -112,6 +117,23 @@ public:
     CurDbgLocation = L;
   }
 
+  /// \brief Temporarily suppress DebugLocations from being attached
+  /// to emitted instructions, until the next call to
+  /// SetCurrentDebugLocation() or EnableDebugLocations().  Use this
+  /// if you want an instruction to be counted towards the prologue or
+  /// if there is no useful source location.
+  void DisableDebugLocations() {
+    llvm::DebugLoc Empty;
+    SavedDbgLocation = getCurrentDebugLocation();
+    SetCurrentDebugLocation(Empty);
+  }
+
+  /// \brief Restore the previously saved DebugLocation.
+  void EnableDebugLocations() {
+    assert(CurDbgLocation.isUnknown());
+    SetCurrentDebugLocation(SavedDbgLocation);
+  }
+
   /// \brief Get location information used by debugging information.
   DebugLoc getCurrentDebugLocation() const { return CurDbgLocation; }
 
@@ -1396,6 +1418,9 @@ public:
   }
 };
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRBuilder<>, LLVMBuilderRef)
+
 }
 
 #endif
diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h
index c97cd91..c81d110 100644
--- a/include/llvm/IR/Intrinsics.h
+++ b/include/llvm/IR/Intrinsics.h
@@ -45,12 +45,12 @@ namespace Intrinsic {
   
   /// Intrinsic::getName(ID) - Return the LLVM name for an intrinsic, such as
   /// "llvm.ppc.altivec.lvx".
-  std::string getName(ID id, ArrayRef<Type*> Tys = ArrayRef<Type*>());
-  
+  std::string getName(ID id, ArrayRef<Type*> Tys = None);
+
   /// Intrinsic::getType(ID) - Return the function type for an intrinsic.
   ///
   FunctionType *getType(LLVMContext &Context, ID id,
-                        ArrayRef<Type*> Tys = ArrayRef<Type*>());
+                        ArrayRef<Type*> Tys = None);
 
   /// Intrinsic::isOverloaded(ID) - Returns true if the intrinsic can be
   /// overloaded.
@@ -63,14 +63,12 @@ namespace Intrinsic {
   /// Intrinsic::getDeclaration(M, ID) - Create or insert an LLVM Function
   /// declaration for an intrinsic, and return it.
   ///
-  /// The Tys and numTys parameters are for intrinsics with overloaded types
-  /// (e.g., those using iAny, fAny, vAny, or iPTRAny). For a declaration for an
-  /// overloaded intrinsic, Tys should point to an array of numTys pointers to
-  /// Type, and must provide exactly one type for each overloaded type in the
-  /// intrinsic.
-  Function *getDeclaration(Module *M, ID id,
-                           ArrayRef<Type*> Tys = ArrayRef<Type*>());
-                           
+  /// The Tys parameter is for intrinsics with overloaded types (e.g., those
+  /// using iAny, fAny, vAny, or iPTRAny).  For a declaration of an overloaded
+  /// intrinsic, Tys must provide exactly one type for each overloaded type in
+  /// the intrinsic.
+  Function *getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys = None);
+
   /// Map a GCC builtin name to an intrinsic ID.
   ID getIntrinsicForGCCBuiltin(const char *Prefix, const char *BuiltinName);
   
diff --git a/include/llvm/IR/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td
index e40e162..a0987c8 100644
--- a/include/llvm/IR/IntrinsicsMips.td
+++ b/include/llvm/IR/IntrinsicsMips.td
@@ -195,21 +195,21 @@ def int_mips_dpsq_sa_l_w: GCCBuiltin<"__builtin_mips_dpsq_sa_l_w">,
 def int_mips_cmpu_eq_qb: GCCBuiltin<"__builtin_mips_cmpu_eq_qb">,
   Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
 def int_mips_cmpu_lt_qb: GCCBuiltin<"__builtin_mips_cmpu_lt_qb">,
-  Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
+  Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
 def int_mips_cmpu_le_qb: GCCBuiltin<"__builtin_mips_cmpu_le_qb">,
-  Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
+  Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
 def int_mips_cmpgu_eq_qb: GCCBuiltin<"__builtin_mips_cmpgu_eq_qb">,
   Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
 def int_mips_cmpgu_lt_qb: GCCBuiltin<"__builtin_mips_cmpgu_lt_qb">,
-  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
+  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
 def int_mips_cmpgu_le_qb: GCCBuiltin<"__builtin_mips_cmpgu_le_qb">,
-  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
+  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
 def int_mips_cmp_eq_ph: GCCBuiltin<"__builtin_mips_cmp_eq_ph">,
   Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
 def int_mips_cmp_lt_ph: GCCBuiltin<"__builtin_mips_cmp_lt_ph">,
-  Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
+  Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], []>;
 def int_mips_cmp_le_ph: GCCBuiltin<"__builtin_mips_cmp_le_ph">,
-  Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
+  Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], []>;
 
 //===----------------------------------------------------------------------===//
 // Extracting
@@ -307,9 +307,9 @@ def int_mips_balign: GCCBuiltin<"__builtin_mips_balign">,
 def int_mips_cmpgdu_eq_qb: GCCBuiltin<"__builtin_mips_cmpgdu_eq_qb">,
   Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
 def int_mips_cmpgdu_lt_qb: GCCBuiltin<"__builtin_mips_cmpgdu_lt_qb">,
-  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
+  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
 def int_mips_cmpgdu_le_qb: GCCBuiltin<"__builtin_mips_cmpgdu_le_qb">,
-  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
+  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
 
 def int_mips_dpa_w_ph: GCCBuiltin<"__builtin_mips_dpa_w_ph">,
   Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v2i16_ty, llvm_v2i16_ty],
diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td
index ebfd03e..c248517 100644
--- a/include/llvm/IR/IntrinsicsNVVM.td
+++ b/include/llvm/IR/IntrinsicsNVVM.td
@@ -405,6 +405,8 @@ def llvm_anyi64ptr_ty     : LLVMAnyPointerType<llvm_i64_ty>;     // (space)i64*
 // Sqrt
 //
 
+  def int_nvvm_sqrt_f : GCCBuiltin<"__nvvm_sqrt_f">,
+      Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
       Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
   def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index cde39cc..5664f79 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -117,28 +117,33 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   // Loads.  These don't map directly to GCC builtins because they represent the
   // source address with a single pointer.
   def int_ppc_altivec_lvx :
-              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
   def int_ppc_altivec_lvxl :
-              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
   def int_ppc_altivec_lvebx :
-              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
   def int_ppc_altivec_lvehx :
-              Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
   def int_ppc_altivec_lvewx :
-              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
 
   // Stores.  These don't map directly to GCC builtins because they represent the
   // source address with a single pointer.
   def int_ppc_altivec_stvx :
-              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
+              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
+                        [IntrReadWriteArgMem]>;
   def int_ppc_altivec_stvxl :
-              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
+              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
+                        [IntrReadWriteArgMem]>;
   def int_ppc_altivec_stvebx :
-              Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty], []>;
+              Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty],
+                        [IntrReadWriteArgMem]>;
   def int_ppc_altivec_stvehx :
-              Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty], []>;
+              Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty],
+                        [IntrReadWriteArgMem]>;
   def int_ppc_altivec_stvewx :
-              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
+              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
+                        [IntrReadWriteArgMem]>;
 
   // Comparisons setting a vector.
   def int_ppc_altivec_vcmpbfp : GCCBuiltin<"__builtin_altivec_vcmpbfp">,
diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h
index ae81e5b..f25d820 100644
--- a/include/llvm/IR/LLVMContext.h
+++ b/include/llvm/IR/LLVMContext.h
@@ -15,7 +15,9 @@
 #ifndef LLVM_IR_LLVMCONTEXT_H
 #define LLVM_IR_LLVMCONTEXT_H
 
+#include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm-c/Core.h"
 
 namespace llvm {
 
@@ -109,6 +111,19 @@ private:
 /// only care about operating on a single thread.
 extern LLVMContext &getGlobalContext();
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLVMContext, LLVMContextRef)
+
+/* Specialized opaque context conversions.
+ */
+inline LLVMContext **unwrap(LLVMContextRef* Tys) {
+  return reinterpret_cast<LLVMContext**>(Tys);
+}
+
+inline LLVMContextRef *wrap(const LLVMContext **Tys) {
+  return reinterpret_cast<LLVMContextRef*>(const_cast<LLVMContext**>(Tys));
+}
+
 }
 
 #endif
diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h
index a1e3fb1..ce81b54 100644
--- a/include/llvm/IR/MDBuilder.h
+++ b/include/llvm/IR/MDBuilder.h
@@ -157,19 +157,31 @@ public:
   }
 
   /// \brief Return metadata for a TBAA struct node in the type DAG
-  /// with the given name, parents in the TBAA DAG.
+  /// with the given name, a list of pairs (offset, field type in the type DAG).
   MDNode *createTBAAStructTypeNode(StringRef Name,
-             ArrayRef<std::pair<uint64_t, MDNode*> > Fields) {
+             ArrayRef<std::pair<MDNode*, uint64_t> > Fields) {
     SmallVector<Value *, 4> Ops(Fields.size() * 2 + 1);
     Type *Int64 = IntegerType::get(Context, 64);
     Ops[0] = createString(Name);
     for (unsigned i = 0, e = Fields.size(); i != e; ++i) {
-      Ops[i * 2 + 1] = ConstantInt::get(Int64, Fields[i].first);
-      Ops[i * 2 + 2] = Fields[i].second;
+      Ops[i * 2 + 1] = Fields[i].first;
+      Ops[i * 2 + 2] = ConstantInt::get(Int64, Fields[i].second);
     }
     return MDNode::get(Context, Ops);
   }
 
+  /// \brief Return metadata for a TBAA scalar type node with the
+  /// given name, an offset and a parent in the TBAA type DAG.
+  MDNode *createTBAAScalarTypeNode(StringRef Name, MDNode *Parent,
+                                   uint64_t Offset = 0) {
+    SmallVector<Value *, 4> Ops(3);
+    Type *Int64 = IntegerType::get(Context, 64);
+    Ops[0] = createString(Name);
+    Ops[1] = Parent;
+    Ops[2] = ConstantInt::get(Int64, Offset);
+    return MDNode::get(Context, Ops);
+  }
+
   /// \brief Return metadata for a TBAA tag node with the given
   /// base type, access type and offset relative to the base type.
   MDNode *createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType,
diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h
index 4460aa4..cb500ff 100644
--- a/include/llvm/IR/Module.h
+++ b/include/llvm/IR/Module.h
@@ -20,6 +20,7 @@
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
@@ -584,6 +585,16 @@ inline raw_ostream &operator<<(raw_ostream &O, const Module &M) {
   return O;
 }
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Module, LLVMModuleRef)
+
+/* LLVMModuleProviderRef exists for historical reasons, but now just holds a
+ * Module.
+ */
+inline Module *unwrap(LLVMModuleProviderRef MP) {
+  return reinterpret_cast<Module*>(MP);
+}
+  
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h
index d89ae24..1bf8789 100644
--- a/include/llvm/IR/Type.h
+++ b/include/llvm/IR/Type.h
@@ -17,8 +17,10 @@
 
 #include "llvm/ADT/APFloat.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm-c/Core.h"
 
 namespace llvm {
 
@@ -467,6 +469,19 @@ template <> struct GraphTraits<const Type*> {
   }
 };
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_ISA_CONVERSION_FUNCTIONS(Type, LLVMTypeRef)
+
+/* Specialized opaque type conversions.
+ */
+inline Type **unwrap(LLVMTypeRef* Tys) {
+  return reinterpret_cast<Type**>(Tys);
+}
+
+inline LLVMTypeRef *wrap(Type **Tys) {
+  return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys));
+}
+  
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Use.h b/include/llvm/IR/Use.h
index 4bc7ce5..efd8b48 100644
--- a/include/llvm/IR/Use.h
+++ b/include/llvm/IR/Use.h
@@ -26,7 +26,9 @@
 #define LLVM_IR_USE_H
 
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm-c/Core.h"
 #include <cstddef>
 #include <iterator>
 
@@ -214,6 +216,9 @@ public:
   unsigned getOperandNo() const;
 };
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Use, LLVMUseRef)
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h
index a4f7862..5fba3d5 100644
--- a/include/llvm/IR/Value.h
+++ b/include/llvm/IR/Value.h
@@ -16,7 +16,9 @@
 
 #include "llvm/IR/Use.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm-c/Core.h"
 
 namespace llvm {
 
@@ -258,14 +260,24 @@ public:
   /// this value.
   bool hasValueHandle() const { return HasValueHandle; }
 
-  /// stripPointerCasts - This method strips off any unneeded pointer casts and
-  /// all-zero GEPs from the specified value, returning the original uncasted
-  /// value. If this is called on a non-pointer value, it returns 'this'.
+  /// \brief This method strips off any unneeded pointer casts,
+  /// all-zero GEPs and aliases from the specified value, returning the original
+  /// uncasted value. If this is called on a non-pointer value, it returns
+  /// 'this'.
   Value *stripPointerCasts();
   const Value *stripPointerCasts() const {
     return const_cast<Value*>(this)->stripPointerCasts();
   }
 
+  /// \brief This method strips off any unneeded pointer casts and
+  /// all-zero GEPs from the specified value, returning the original
+  /// uncasted value. If this is called on a non-pointer value, it returns
+  /// 'this'.
+  Value *stripPointerCastsNoFollowAliases();
+  const Value *stripPointerCastsNoFollowAliases() const {
+    return const_cast<Value*>(this)->stripPointerCastsNoFollowAliases();
+  }
+
   /// stripInBoundsConstantOffsets - This method strips off unneeded pointer casts and
   /// all-constant GEPs from the specified value, returning the original
   /// pointer value. If this is called on a non-pointer value, it returns
@@ -406,6 +418,29 @@ public:
   enum { NumLowBitsAvailable = 2 };
 };
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_ISA_CONVERSION_FUNCTIONS(Value, LLVMValueRef)
+
+/* Specialized opaque value conversions.
+ */ 
+inline Value **unwrap(LLVMValueRef *Vals) {
+  return reinterpret_cast<Value**>(Vals);
+}
+
+template<typename T>
+inline T **unwrap(LLVMValueRef *Vals, unsigned Length) {
+#ifdef DEBUG
+  for (LLVMValueRef *I = Vals, *E = Vals + Length; I != E; ++I)
+    cast<T>(*I);
+#endif
+  (void)Length;
+  return reinterpret_cast<T**>(Vals);
+}
+
+inline LLVMValueRef *wrap(const Value **Vals) {
+  return reinterpret_cast<LLVMValueRef*>(const_cast<Value**>(Vals));
+}
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 9cc194b..5b2cd60 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -271,6 +271,7 @@ void initializeInstSimplifierPass(PassRegistry&);
 void initializeUnpackMachineBundlesPass(PassRegistry&);
 void initializeFinalizeMachineBundlesPass(PassRegistry&);
 void initializeLoopVectorizePass(PassRegistry&);
+void initializeSLPVectorizerPass(PassRegistry&);
 void initializeBBVectorizePass(PassRegistry&);
 void initializeMachineFunctionPrinterPassPass(PassRegistry&);
 }
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 1f017e4..ca1c139 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -161,6 +161,7 @@ namespace {
       (void) llvm::createMemDepPrinter();
       (void) llvm::createInstructionSimplifierPass();
       (void) llvm::createLoopVectorizePass();
+      (void) llvm::createSLPVectorizerPass();
       (void) llvm::createBBVectorizePass();
 
       (void)new llvm::IntervalPartition();
diff --git a/include/llvm/Linker.h b/include/llvm/Linker.h
index 6796384..3667b85 100644
--- a/include/llvm/Linker.h
+++ b/include/llvm/Linker.h
@@ -10,149 +10,46 @@
 #ifndef LLVM_LINKER_H
 #define LLVM_LINKER_H
 
-#include <memory>
+#include "llvm/ADT/SmallPtrSet.h"
 #include <string>
-#include <vector>
 
 namespace llvm {
 
 class Module;
-class LLVMContext;
 class StringRef;
+class StructType;
 
-/// This class provides the core functionality of linking in LLVM. It retains a
-/// Module object which is the composite of the modules and libraries linked
-/// into it. The composite Module can be retrieved via the getModule() method.
-/// In this case the Linker still retains ownership of the Module. If the
-/// releaseModule() method is used, the ownership of the Module is transferred
-/// to the caller and the Linker object is only suitable for destruction.
-/// The Linker can link Modules from memory. By default, the linker
-/// will generate error and warning messages to stderr but this capability can
-/// be turned off with the QuietWarnings and QuietErrors flags. It can also be
-/// instructed to verbosely print out the linking actions it is taking with
-/// the Verbose flag.
-/// @brief The LLVM Linker.
+/// This class provides the core functionality of linking in LLVM. It keeps a
+/// pointer to the merged module so far. It doesn't take ownership of the
+/// module since it is assumed that the user of this class will want to do
+/// something with it after the linking.
 class Linker {
-
-  /// @name Types
-  /// @{
   public:
-    /// This enumeration is used to control various optional features of the
-    /// linker.
-    enum ControlFlags {
-      Verbose       = 1, ///< Print to stderr what steps the linker is taking
-      QuietWarnings = 2, ///< Don't print warnings to stderr.
-      QuietErrors   = 4  ///< Don't print errors to stderr.
-    };
-
     enum LinkerMode {
       DestroySource = 0, // Allow source module to be destroyed.
       PreserveSource = 1 // Preserve the source module.
     };
 
-  /// @}
-  /// @name Constructors
-  /// @{
-  public:
-    /// Construct the Linker with an empty module which will be given the
-    /// name \p progname. \p progname will also be used for error messages.
-    /// @brief Construct with empty module
-    Linker(StringRef progname, ///< name of tool running linker
-           StringRef modulename, ///< name of linker's end-result module
-           LLVMContext &C, ///< Context for global info
-           unsigned Flags = 0  ///< ControlFlags (one or more |'d together)
-    );
-
-    /// Construct the Linker with a previously defined module, \p aModule. Use
-    /// \p progname for the name of the program in error messages.
-    /// @brief Construct with existing module
-    Linker(StringRef progname, Module* aModule, unsigned Flags = 0);
-
-    /// Destruct the Linker.
-    /// @brief Destructor
+    Linker(Module *M);
     ~Linker();
-
-  /// @}
-  /// @name Accessors
-  /// @{
-  public:
-    /// This method gets the composite module into which linking is being
-    /// done. The Composite module starts out empty and accumulates modules
-    /// linked into it via the various LinkIn* methods. This method does not
-    /// release the Module to the caller. The Linker retains ownership and will
-    /// destruct the Module when the Linker is destructed.
-    /// @see releaseModule
-    /// @brief Get the linked/composite module.
-    Module* getModule() const { return Composite; }
-
-    /// This method releases the composite Module into which linking is being
-    /// done. Ownership of the composite Module is transferred to the caller who
-    /// must arrange for its destruct. After this method is called, the Linker
-    /// terminates the linking session for the returned Module. It will no
-    /// longer utilize the returned Module but instead resets itself for
-    /// subsequent linking as if the constructor had been called.
-    /// @brief Release the linked/composite module.
-    Module* releaseModule();
-
-    /// This method returns an error string suitable for printing to the user.
-    /// The return value will be empty unless an error occurred in one of the
-    /// LinkIn* methods. In those cases, the LinkIn* methods will have returned
-    /// true, indicating an error occurred. At most one error is retained so
-    /// this function always returns the last error that occurred. Note that if
-    /// the Quiet control flag is not set, the error string will have already
-    /// been printed to stderr.
-    /// @brief Get the text of the last error that occurred.
-    const std::string &getLastError() const { return Error; }
-
-  /// @}
-  /// @name Mutators
-  /// @{
-  public:
-    /// This method links the \p Src module into the Linker's Composite module
-    /// by calling LinkModules.
-    /// @see LinkModules
-    /// @returns True if an error occurs, false otherwise.
-    /// @brief Link in a module.
-    bool LinkInModule(
-      Module* Src,              ///< Module linked into \p Dest
-      std::string* ErrorMsg = 0 /// Error/diagnostic string
-    ) {
-      return LinkModules(Composite, Src, Linker::DestroySource, ErrorMsg);
+    Module *getModule() const { return Composite; }
+
+    /// \brief Link \p Src into the composite. The source is destroyed if
+    /// \p Mode is DestroySource and preserved if it is PreserveSource.
+    /// If \p ErrorMsg is not null, information about any error is written
+    /// to it.
+    /// Returns true on error.
+    bool linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg);
+    bool linkInModule(Module *Src, std::string *ErrorMsg) {
+      return linkInModule(Src, Linker::DestroySource, ErrorMsg);
     }
 
-    /// This is the heart of the linker. This method will take unconditional
-    /// control of the \p Src module and link it into the \p Dest module. The
-    /// \p Src module will be destructed or subsumed by this method. In either
-    /// case it is not usable by the caller after this method is invoked. Only
-    /// the \p Dest module will remain. The \p Src module is linked into the
-    /// Linker's composite module such that types, global variables, functions,
-    /// and etc. are matched and resolved.  If an error occurs, this function
-    /// returns true and ErrorMsg is set to a descriptive message about the
-    /// error.
-    /// @returns True if an error occurs, false otherwise.
-    /// @brief Generically link two modules together.
-    static bool LinkModules(Module* Dest, Module* Src, unsigned Mode,
-                            std::string* ErrorMsg);
+    static bool LinkModules(Module *Dest, Module *Src, unsigned Mode,
+                            std::string *ErrorMsg);
 
-  /// @}
-  /// @name Implementation
-  /// @{
   private:
-    bool warning(StringRef message);
-    bool error(StringRef message);
-    void verbose(StringRef message);
-
-  /// @}
-  /// @name Data
-  /// @{
-  private:
-    LLVMContext& Context; ///< The context for global information
-    Module* Composite; ///< The composite module linked together
-    unsigned Flags;    ///< Flags to control optional behavior.
-    std::string Error; ///< Text of error that occurred.
-    std::string ProgramName; ///< Name of the program being linked
-  /// @}
-
+    Module *Composite;
+    SmallPtrSet<StructType*, 32> IdentifiedStructTypes;
 };
 
 } // End llvm namespace
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 28256b3..d020de3 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -217,6 +217,8 @@ namespace llvm {
     /// convention.
     bool HasMicrosoftFastStdCallMangling;    // Defaults to false.
 
+    bool NeedsDwarfSectionOffsetDirective;
+
     //===--- Alignment Information ----------------------------------------===//
 
     /// AlignDirective - The directive used to emit round up to an alignment
@@ -320,9 +322,6 @@ namespace llvm {
     /// encode inline subroutine information.
     bool DwarfUsesInlineInfoSection;         // Defaults to false.
 
-    /// DwarfSectionOffsetDirective - Special section offset directive.
-    const char* DwarfSectionOffsetDirective; // Defaults to NULL
-
     /// DwarfUsesRelocationsAcrossSections - True if Dwarf2 output generally
     /// uses relocations for references to other .debug_* sections.
     bool DwarfUsesRelocationsAcrossSections;
@@ -412,6 +411,10 @@ namespace llvm {
       return HasMicrosoftFastStdCallMangling;
     }
 
+    bool needsDwarfSectionOffsetDirective() const {
+      return NeedsDwarfSectionOffsetDirective;
+    }
+
     // Accessors.
     //
     bool hasMachoZeroFillDirective() const { return HasMachoZeroFillDirective; }
@@ -557,9 +560,6 @@ namespace llvm {
     bool doesDwarfUseInlineInfoSection() const {
       return DwarfUsesInlineInfoSection;
     }
-    const char *getDwarfSectionOffsetDirective() const {
-      return DwarfSectionOffsetDirective;
-    }
     bool doesDwarfUseRelocationsAcrossSections() const {
       return DwarfUsesRelocationsAcrossSections;
     }
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 43fbdc9..38a70f0 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -451,7 +451,7 @@ class MCLEBFragment : public MCFragment {
 
   SmallString<8> Contents;
 public:
-  MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSectionData *SD)
+  MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSectionData *SD = 0)
     : MCFragment(FT_LEB, SD),
       Value(&Value_), IsSigned(IsSigned_) { Contents.push_back(0); }
 
@@ -487,7 +487,7 @@ class MCDwarfLineAddrFragment : public MCFragment {
 
 public:
   MCDwarfLineAddrFragment(int64_t _LineDelta, const MCExpr &_AddrDelta,
-                      MCSectionData *SD)
+                      MCSectionData *SD = 0)
     : MCFragment(FT_Dwarf, SD),
       LineDelta(_LineDelta), AddrDelta(&_AddrDelta) { Contents.push_back(0); }
 
@@ -518,7 +518,7 @@ class MCDwarfCallFrameFragment : public MCFragment {
   SmallString<8> Contents;
 
 public:
-  MCDwarfCallFrameFragment(const MCExpr &_AddrDelta,  MCSectionData *SD)
+  MCDwarfCallFrameFragment(const MCExpr &_AddrDelta,  MCSectionData *SD = 0)
     : MCFragment(FT_DwarfFrame, SD),
       AddrDelta(&_AddrDelta) { Contents.push_back(0); }
 
@@ -590,6 +590,10 @@ private:
   /// it.
   unsigned HasInstructions : 1;
 
+  /// Mapping from subsection number to insertion point for subsection numbers
+  /// below that number.
+  SmallVector<std::pair<unsigned, MCFragment *>, 1> SubsectionFragmentMap;
+
   /// @}
 
 public:
@@ -633,6 +637,8 @@ public:
 
   bool empty() const { return Fragments.empty(); }
 
+  iterator getSubsectionInsertionPoint(unsigned Subsection);
+
   bool isBundleLocked() const {
     return BundleLockState != NotBundleLocked;
   }
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index a59776d..65dd1e8 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -45,7 +45,14 @@ struct ELFRelocationEntry {
 
   // Support lexicographic sorting.
   bool operator<(const ELFRelocationEntry &RE) const {
-    return RE.r_offset < r_offset;
+    if (RE.r_offset != r_offset)
+      return RE.r_offset < r_offset;
+    if (Type != RE.Type)
+      return Type < RE.Type;
+    if (Index != RE.Index)
+      return Index < RE.Index;
+    llvm_unreachable("ELFRelocs might be unstable!");
+    return 0;
   }
 };
 
diff --git a/include/llvm/MC/MCELFStreamer.h b/include/llvm/MC/MCELFStreamer.h
index 6fb2d22..55c05b0 100644
--- a/include/llvm/MC/MCELFStreamer.h
+++ b/include/llvm/MC/MCELFStreamer.h
@@ -50,7 +50,8 @@ public:
 
   virtual void InitSections();
   virtual void InitToTextSection();
-  virtual void ChangeSection(const MCSection *Section);
+  virtual void ChangeSection(const MCSection *Section,
+                             const MCExpr *Subsection);
   virtual void EmitLabel(MCSymbol *Symbol);
   virtual void EmitDebugLabel(MCSymbol *Symbol);
   virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index b5bfed1..a2c5bd3 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -216,7 +216,9 @@ public:
     VK_Mips_GOT_HI16,
     VK_Mips_GOT_LO16,
     VK_Mips_CALL_HI16,
-    VK_Mips_CALL_LO16
+    VK_Mips_CALL_LO16,
+
+    VK_COFF_IMGREL32 // symbol@imgrel (image-relative)
   };
 
 private:
diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h
index e91c6a2..4766815 100644
--- a/include/llvm/MC/MCInst.h
+++ b/include/llvm/MC/MCInst.h
@@ -171,7 +171,7 @@ public:
   void clear() { Operands.clear(); }
   size_t size() { return Operands.size(); }
 
-  typedef SmallVector<MCOperand, 8>::iterator iterator;
+  typedef SmallVectorImpl<MCOperand>::iterator iterator;
   iterator begin() { return Operands.begin(); }
   iterator end()   { return Operands.end();   }
   iterator insert(iterator I, const MCOperand &Op) {
diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h
index c8d7484..a5853b6 100644
--- a/include/llvm/MC/MCObjectFileInfo.h
+++ b/include/llvm/MC/MCObjectFileInfo.h
@@ -46,10 +46,15 @@ protected:
   unsigned FDEEncoding;
   unsigned FDECFIEncoding;
   unsigned TTypeEncoding;
-  // Section flags for eh_frame
+
+  /// Section flags for eh_frame
   unsigned EHSectionType;
   unsigned EHSectionFlags;
 
+  /// CompactUnwindDwarfEHFrameOnly - Compact unwind encoding indicating that we
+  /// should emit only an EH frame.
+  unsigned CompactUnwindDwarfEHFrameOnly;
+
   /// TextSection - Section directive for standard text.
   ///
   const MCSection *TextSection;
@@ -201,6 +206,10 @@ public:
   }
   unsigned getTTypeEncoding() const { return TTypeEncoding; }
 
+  unsigned getCompactUnwindDwarfEHFrameOnly() const {
+    return CompactUnwindDwarfEHFrameOnly;
+  }
+
   const MCSection *getTextSection() const { return TextSection; }
   const MCSection *getDataSection() const { return DataSection; }
   const MCSection *getBSSSection() const { return BSSSection; }
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index f06c49f..22a2839 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_MC_MCOBJECTSTREAMER_H
 #define LLVM_MC_MCOBJECTSTREAMER_H
 
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCStreamer.h"
 
 namespace llvm {
@@ -32,6 +33,7 @@ class raw_ostream;
 class MCObjectStreamer : public MCStreamer {
   MCAssembler *Assembler;
   MCSectionData *CurSectionData;
+  MCSectionData::iterator CurInsertionPoint;
 
   virtual void EmitInstToData(const MCInst &Inst) = 0;
   virtual void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame);
@@ -56,6 +58,11 @@ protected:
 
   MCFragment *getCurrentFragment() const;
 
+  void insert(MCFragment *F) const {
+    CurSectionData->getFragmentList().insert(CurInsertionPoint, F);
+    F->setParent(CurSectionData);
+  }
+
   /// Get a data fragment to write into, creating a new one if the current
   /// fragment is not a data fragment.
   MCDataFragment *getOrCreateDataFragment() const;
@@ -76,7 +83,8 @@ public:
   virtual void EmitULEB128Value(const MCExpr *Value);
   virtual void EmitSLEB128Value(const MCExpr *Value);
   virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
-  virtual void ChangeSection(const MCSection *Section);
+  virtual void ChangeSection(const MCSection *Section,
+                             const MCExpr *Subsection);
   virtual void EmitInstruction(const MCInst &Inst);
 
   /// \brief Emit an instruction to a special fragment, because this instruction
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index d7e3902..dcc9886 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -33,15 +33,31 @@ class Twine;
 /// MCAsmParserSemaCallback - Generic Sema callback for assembly parser.
 class MCAsmParserSemaCallback {
 public:
+  typedef struct {
+    void *OpDecl;
+    bool IsVarDecl;
+    unsigned Length, Size, Type;
+
+    void clear() {
+      OpDecl = 0;
+      IsVarDecl = false;
+      Length = 1;
+      Size = 0;
+      Type = 0;
+    }
+  } InlineAsmIdentifierInfo;
+
   virtual ~MCAsmParserSemaCallback(); 
-  virtual void *LookupInlineAsmIdentifier(StringRef Name, void *Loc,
-                                          unsigned &Length, unsigned &Size, 
-                                          unsigned &Type, bool &IsVarDecl) = 0;
+  virtual void *LookupInlineAsmIdentifier(StringRef &LineBuf,
+                                          InlineAsmIdentifierInfo &Info,
+                                          bool IsUnevaluatedContext) = 0;
 
   virtual bool LookupInlineAsmField(StringRef Base, StringRef Member,
                                     unsigned &Offset) = 0;
 };
 
+typedef MCAsmParserSemaCallback::InlineAsmIdentifierInfo
+  InlineAsmIdentifierInfo;
 
 /// MCAsmParser - Generic assembler parser interface, for use by target specific
 /// assembly parsers.
@@ -106,14 +122,14 @@ public:
   ///
   /// \return The return value is true, if warnings are fatal.
   virtual bool Warning(SMLoc L, const Twine &Msg,
-                       ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) = 0;
+                       ArrayRef<SMRange> Ranges = None) = 0;
 
   /// Error - Emit an error at the location \p L, with the message \p Msg.
   ///
   /// \return The return value is always true, as an idiomatic convenience to
   /// clients.
   virtual bool Error(SMLoc L, const Twine &Msg,
-                     ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) = 0;
+                     ArrayRef<SMRange> Ranges = None) = 0;
 
   /// Lex - Get the next AsmToken in the stream, possibly handling file
   /// inclusion first.
@@ -123,8 +139,7 @@ public:
   const AsmToken &getTok();
 
   /// \brief Report an error at the current lexer location.
-  bool TokError(const Twine &Msg,
-                ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
+  bool TokError(const Twine &Msg, ArrayRef<SMRange> Ranges = None);
 
   /// parseIdentifier - Parse an identifier or string (as a quoted identifier)
   /// and set \p Res to the identifier contents.
@@ -151,6 +166,13 @@ public:
   virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc) = 0;
   bool parseExpression(const MCExpr *&Res);
 
+  /// parsePrimaryExpr - Parse a primary expression.
+  ///
+  /// @param Res - The value of the expression. The result is undefined
+  /// on error.
+  /// @result - False on success.
+  virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) = 0;
+
   /// parseParenExpression - Parse an arbitrary expression, assuming that an
   /// initial '(' has already been consumed.
   ///
diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index 4650bf2..818fbbd 100644
--- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -37,15 +37,8 @@ public:
   void setMCOperandNum (unsigned OpNum) { MCOperandNum = OpNum; }
   unsigned getMCOperandNum() { return MCOperandNum; }
 
-  unsigned getNameLen() {
-    assert (getStartLoc().isValid() && "Invalid StartLoc!");
-    assert (getEndLoc().isValid() && "Invalid EndLoc!");
-    return getEndLoc().getPointer() - getStartLoc().getPointer();
-  }
-
-  StringRef getName() {
-    return StringRef(getStartLoc().getPointer(), getNameLen());
-  }
+  virtual StringRef getSymName() { return StringRef(); }
+  virtual void *getOpDecl() { return 0; }
 
   /// isToken - Is this a token operand?
   virtual bool isToken() const = 0;
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index e575424..de2678a 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -20,6 +20,7 @@
 
 namespace llvm {
   class MCAsmInfo;
+  class MCExpr;
   class raw_ostream;
 
   /// MCSection - Instances of this class represent a uniqued identifier for a
@@ -48,7 +49,8 @@ namespace llvm {
     SectionVariant getVariant() const { return Variant; }
 
     virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
-                                      raw_ostream &OS) const = 0;
+                                      raw_ostream &OS,
+                                      const MCExpr *Subsection) const = 0;
 
     // Convenience routines to get label names for the beginning/end of a
     // section.
diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h
index 07c4714..50e33a5 100644
--- a/include/llvm/MC/MCSectionCOFF.h
+++ b/include/llvm/MC/MCSectionCOFF.h
@@ -60,7 +60,8 @@ namespace llvm {
     int getSelection () const { return Selection; }
 
     virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
-                                      raw_ostream &OS) const;
+                                      raw_ostream &OS,
+                                      const MCExpr *Subsection) const;
     virtual bool UseCodeAlign() const;
     virtual bool isVirtualSection() const;
 
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index 4b8b849..5979915 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -70,7 +70,8 @@ public:
   const MCSymbol *getGroup() const { return Group; }
 
   void PrintSwitchToSection(const MCAsmInfo &MAI,
-                            raw_ostream &OS) const;
+                            raw_ostream &OS,
+                            const MCExpr *Subsection) const;
   virtual bool UseCodeAlign() const;
   virtual bool isVirtualSection() const;
 
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index 898f571..b68bd85 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -175,7 +175,8 @@ public:
                                            unsigned  &StubSize); // Out.
 
   virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
-                                    raw_ostream &OS) const;
+                                    raw_ostream &OS,
+                                    const MCExpr *Subsection) const;
   virtual bool UseCodeAlign() const;
   virtual bool isVirtualSection() const;
 
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index a069a2b..2cab481 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -37,6 +37,8 @@ namespace llvm {
   class raw_ostream;
   class formatted_raw_ostream;
 
+  typedef std::pair<const MCSection *, const MCExpr *> MCSectionSubPair;
+
   /// MCStreamer - Streaming machine code generation interface.  This interface
   /// is intended to provide a programatic interface that is very similar to the
   /// level that an assembler .s file provides.  It has callbacks to emit bytes,
@@ -86,8 +88,7 @@ namespace llvm {
 
     /// SectionStack - This is stack of current and previous section
     /// values saved by PushSection.
-    SmallVector<std::pair<const MCSection *,
-                const MCSection *>, 4> SectionStack;
+    SmallVector<std::pair<MCSectionSubPair, MCSectionSubPair>, 4> SectionStack;
 
     bool AutoInitSections;
 
@@ -174,25 +175,25 @@ namespace llvm {
 
     /// getCurrentSection - Return the current section that the streamer is
     /// emitting code to.
-    const MCSection *getCurrentSection() const {
+    MCSectionSubPair getCurrentSection() const {
       if (!SectionStack.empty())
         return SectionStack.back().first;
-      return NULL;
+      return MCSectionSubPair();
     }
 
     /// getPreviousSection - Return the previous section that the streamer is
     /// emitting code to.
-    const MCSection *getPreviousSection() const {
+    MCSectionSubPair getPreviousSection() const {
       if (!SectionStack.empty())
         return SectionStack.back().second;
-      return NULL;
+      return MCSectionSubPair();
     }
 
     /// ChangeSection - Update streamer for a new active section.
     ///
     /// This is called by PopSection and SwitchSection, if the current
     /// section changes.
-    virtual void ChangeSection(const MCSection *) = 0;
+    virtual void ChangeSection(const MCSection *, const MCExpr *) = 0;
 
     /// pushSection - Save the current and previous section on the
     /// section stack.
@@ -208,11 +209,19 @@ namespace llvm {
     bool PopSection() {
       if (SectionStack.size() <= 1)
         return false;
-      const MCSection *oldSection = SectionStack.pop_back_val().first;
-      const MCSection *curSection = SectionStack.back().first;
+      MCSectionSubPair oldSection = SectionStack.pop_back_val().first;
+      MCSectionSubPair curSection = SectionStack.back().first;
 
       if (oldSection != curSection)
-        ChangeSection(curSection);
+        ChangeSection(curSection.first, curSection.second);
+      return true;
+    }
+
+    bool SubSection(const MCExpr *Subsection) {
+      if (SectionStack.empty())
+        return false;
+
+      SwitchSection(SectionStack.back().first.first, Subsection);
       return true;
     }
 
@@ -220,25 +229,26 @@ namespace llvm {
     /// @p Section.  This is required to update CurSection.
     ///
     /// This corresponds to assembler directives like .section, .text, etc.
-    void SwitchSection(const MCSection *Section) {
+    void SwitchSection(const MCSection *Section, const MCExpr *Subsection = 0) {
       assert(Section && "Cannot switch to a null section!");
-      const MCSection *curSection = SectionStack.back().first;
+      MCSectionSubPair curSection = SectionStack.back().first;
       SectionStack.back().second = curSection;
-      if (Section != curSection) {
-        SectionStack.back().first = Section;
-        ChangeSection(Section);
+      if (MCSectionSubPair(Section, Subsection) != curSection) {
+        SectionStack.back().first = MCSectionSubPair(Section, Subsection);
+        ChangeSection(Section, Subsection);
       }
     }
 
     /// SwitchSectionNoChange - Set the current section where code is being
     /// emitted to @p Section.  This is required to update CurSection. This
     /// version does not call ChangeSection.
-    void SwitchSectionNoChange(const MCSection *Section) {
+    void SwitchSectionNoChange(const MCSection *Section,
+                               const MCExpr *Subsection = 0) {
       assert(Section && "Cannot switch to a null section!");
-      const MCSection *curSection = SectionStack.back().first;
+      MCSectionSubPair curSection = SectionStack.back().first;
       SectionStack.back().second = curSection;
-      if (Section != curSection)
-        SectionStack.back().first = Section;
+      if (MCSectionSubPair(Section, Subsection) != curSection)
+        SectionStack.back().first = MCSectionSubPair(Section, Subsection);
     }
 
     /// Initialize the streamer.
diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h
index 4c5b176..6e878df 100644
--- a/include/llvm/MC/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCTargetAsmParser.h
@@ -22,6 +22,7 @@ class MCInst;
 template <typename T> class SmallVectorImpl;
 
 enum AsmRewriteKind {
+  AOK_Delete = 0,     // Rewrite should be ignored.
   AOK_Align,          // Rewrite align as .align.
   AOK_DotOperator,    // Rewrite a dot operator expression as an immediate.
                       // E.g., [eax].foo.bar -> [eax].8
@@ -34,6 +35,19 @@ enum AsmRewriteKind {
   AOK_Skip            // Skip emission (e.g., offset/type operators).
 };
 
+const char AsmRewritePrecedence [] = {
+  0, // AOK_Delete
+  1, // AOK_Align
+  1, // AOK_DotOperator
+  1, // AOK_Emit
+  3, // AOK_Imm
+  3, // AOK_ImmPrefix
+  2, // AOK_Input
+  2, // AOK_Output
+  4, // AOK_SizeDirective
+  1  // AOK_Skip
+};
+
 struct AsmRewrite {
   AsmRewriteKind Kind;
   SMLoc Loc;
diff --git a/include/llvm/MC/MCWinCOFFObjectWriter.h b/include/llvm/MC/MCWinCOFFObjectWriter.h
index 11df574..f13e7d5 100644
--- a/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -11,7 +11,9 @@
 #define LLVM_MC_MCWINCOFFOBJECTWRITER_H
 
 namespace llvm {
+  class MCFixup;
   class MCObjectWriter;
+  class MCValue;
   class raw_ostream;
 
   class MCWinCOFFObjectTargetWriter {
@@ -24,7 +26,9 @@ namespace llvm {
     virtual ~MCWinCOFFObjectTargetWriter() {}
 
     unsigned getMachine() const { return Machine; }
-    virtual unsigned getRelocType(unsigned FixupKind) const = 0;
+    virtual unsigned getRelocType(const MCValue &Target,
+                                  const MCFixup &Fixup,
+                                  bool IsCrossSection) const = 0;
   };
 
   /// \brief Construct a new Win COFF writer instance.
diff --git a/include/llvm/MC/MachineLocation.h b/include/llvm/MC/MachineLocation.h
index 5caad33..83c8b72 100644
--- a/include/llvm/MC/MachineLocation.h
+++ b/include/llvm/MC/MachineLocation.h
@@ -9,7 +9,7 @@
 // The MachineLocation class is used to represent a simple location in a machine
 // frame.  Locations will be one of two forms; a register or an address formed
 // from a base address plus an offset.  Register indirection can be specified by
-// using an offset of zero.
+// explicitly passing an offset to the constructor.
 //
 // The MachineMove class is used to represent abstract move operations in the
 // prolog/epilog of a compiled function.  A collection of these objects can be
@@ -37,8 +37,10 @@ public:
   };
   MachineLocation()
     : IsRegister(false), Register(0), Offset(0) {}
+  /// Create a direct register location.
   explicit MachineLocation(unsigned R)
     : IsRegister(true), Register(R), Offset(0) {}
+  /// Create a register-indirect location with an offset.
   MachineLocation(unsigned R, int O)
     : IsRegister(false), Register(R), Offset(O) {}
 
@@ -48,17 +50,20 @@ public:
   }
 
   // Accessors
+  bool isIndirect()      const { return !IsRegister; }
   bool isReg()           const { return IsRegister; }
   unsigned getReg()      const { return Register; }
   int getOffset()        const { return Offset; }
   void setIsRegister(bool Is)  { IsRegister = Is; }
   void setRegister(unsigned R) { Register = R; }
   void setOffset(int O)        { Offset = O; }
+  /// Make this location a direct register location.
   void set(unsigned R) {
     IsRegister = true;
     Register = R;
     Offset = 0;
   }
+  /// Make this location a register-indirect+offset location.
   void set(unsigned R, int O) {
     IsRegister = false;
     Register = R;
diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h
index 37ae03b..8862c8b 100644
--- a/include/llvm/MC/SubtargetFeature.h
+++ b/include/llvm/MC/SubtargetFeature.h
@@ -62,10 +62,8 @@ struct SubtargetInfoKV {
 ///
 /// SubtargetFeatures - Manages the enabling and disabling of subtarget
 /// specific features.  Features are encoded as a string of the form
-///   "cpu,+attr1,+attr2,-attr3,...,+attrN"
+///   "+attr1,+attr2,-attr3,...,+attrN"
 /// A comma separates each feature from the next (all lowercase.)
-/// The first feature is always the CPU subtype (eg. pentiumm).  If the CPU
-/// value is "generic" then the CPU subtype should be generic for the target.
 /// Each of the remaining features is prefixed with + or - indicating whether
 /// that feature should be enabled or disabled contrary to the cpu
 /// specification.
diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h
index 8bbcd8b..78fcf6f 100644
--- a/include/llvm/Object/Binary.h
+++ b/include/llvm/Object/Binary.h
@@ -41,11 +41,17 @@ protected:
     // Object and children.
     ID_StartObjects,
     ID_COFF,
+
     ID_ELF32L, // ELF 32-bit, little endian
     ID_ELF32B, // ELF 32-bit, big endian
     ID_ELF64L, // ELF 64-bit, little endian
     ID_ELF64B, // ELF 64-bit, big endian
-    ID_MachO,
+
+    ID_MachO32L, // MachO 32-bit, little endian
+    ID_MachO32B, // MachO 32-bit, big endian
+    ID_MachO64L, // MachO 64-bit, little endian
+    ID_MachO64B, // MachO 64-bit, big endian
+
     ID_EndObjects
   };
 
@@ -56,6 +62,13 @@ protected:
       return is64Bits ? ID_ELF64B : ID_ELF32B;
   }
 
+  static unsigned int getMachOType(bool isLE, bool is64Bits) {
+    if (isLE)
+      return is64Bits ? ID_MachO64L : ID_MachO32L;
+    else
+      return is64Bits ? ID_MachO64B : ID_MachO32B;
+  }
+
 public:
   virtual ~Binary();
 
@@ -79,7 +92,7 @@ public:
   }
 
   bool isMachO() const {
-    return TypeID == ID_MachO;
+    return TypeID >= ID_MachO32L && TypeID <= ID_MachO64B;
   }
 
   bool isCOFF() const {
@@ -87,7 +100,8 @@ public:
   }
 
   bool isLittleEndian() const {
-    return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B);
+    return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B ||
+             TypeID == ID_MachO32B || TypeID == ID_MachO64B);
   }
 };
 
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index 8ea5e46..eb2390a 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -81,9 +81,8 @@ template<class ELFT>
 struct ELFDataTypeTypedefHelper;
 
 /// ELF 32bit types.
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct ELFDataTypeTypedefHelper<ELFT<TargetEndianness, MaxAlign, false> >
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct ELFDataTypeTypedefHelper<ELFType<TargetEndianness, MaxAlign, false> >
   : ELFDataTypeTypedefHelperCommon<TargetEndianness, MaxAlign> {
   typedef uint32_t value_type;
   typedef support::detail::packed_endian_specific_integral
@@ -95,9 +94,8 @@ struct ELFDataTypeTypedefHelper<ELFT<TargetEndianness, MaxAlign, false> >
 };
 
 /// ELF 64bit types.
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct ELFDataTypeTypedefHelper<ELFT<TargetEndianness, MaxAlign, true> >
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct ELFDataTypeTypedefHelper<ELFType<TargetEndianness, MaxAlign, true> >
   : ELFDataTypeTypedefHelperCommon<TargetEndianness, MaxAlign> {
   typedef uint64_t value_type;
   typedef support::detail::packed_endian_specific_integral
@@ -109,27 +107,29 @@ struct ELFDataTypeTypedefHelper<ELFT<TargetEndianness, MaxAlign, true> >
 };
 
 // I really don't like doing this, but the alternative is copypasta.
-#define LLVM_ELF_IMPORT_TYPES(ELFT) \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Addr Elf_Addr; \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Off Elf_Off; \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Half Elf_Half; \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Word Elf_Word; \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Sword Elf_Sword; \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Xword Elf_Xword; \
-typedef typename ELFDataTypeTypedefHelper <ELFT>::Elf_Sxword Elf_Sxword;
-
-// This is required to get template types into a macro :(
-#define LLVM_ELF_COMMA ,
-
-  // Section header.
+#define LLVM_ELF_IMPORT_TYPES(E, M, W)                                         \
+typedef typename ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Addr Elf_Addr; \
+typedef typename ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Off Elf_Off;   \
+typedef typename ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Half Elf_Half; \
+typedef typename ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Word Elf_Word; \
+typedef typename                                                               \
+  ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Sword Elf_Sword;              \
+typedef typename                                                               \
+  ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Xword Elf_Xword;              \
+typedef typename                                                               \
+  ELFDataTypeTypedefHelper<ELFType<E,M,W> >::Elf_Sxword Elf_Sxword;
+
+#define LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)                                       \
+  LLVM_ELF_IMPORT_TYPES(ELFT::TargetEndianness, ELFT::MaxAlignment,            \
+  ELFT::Is64Bits)
+
+// Section header.
 template<class ELFT>
 struct Elf_Shdr_Base;
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Shdr_Base<ELFT<TargetEndianness, MaxAlign, false> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Shdr_Base<ELFType<TargetEndianness, MaxAlign, false> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
   Elf_Word sh_name;     // Section name (index into string table)
   Elf_Word sh_type;     // Section type (SHT_*)
   Elf_Word sh_flags;    // Section flags (SHF_*)
@@ -142,11 +142,9 @@ struct Elf_Shdr_Base<ELFT<TargetEndianness, MaxAlign, false> > {
   Elf_Word sh_entsize;  // Size of records contained within the section
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Shdr_Base<ELFT<TargetEndianness, MaxAlign, true> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Shdr_Base<ELFType<TargetEndianness, MaxAlign, true> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
   Elf_Word  sh_name;     // Section name (index into string table)
   Elf_Word  sh_type;     // Section type (SHT_*)
   Elf_Xword sh_flags;    // Section flags (SHF_*)
@@ -175,11 +173,9 @@ struct Elf_Shdr_Impl : Elf_Shdr_Base<ELFT> {
 template<class ELFT>
 struct Elf_Sym_Base;
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Sym_Base<ELFT<TargetEndianness, MaxAlign, false> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Sym_Base<ELFType<TargetEndianness, MaxAlign, false> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
   Elf_Word      st_name;  // Symbol name (index into string table)
   Elf_Addr      st_value; // Value or address associated with the symbol
   Elf_Word      st_size;  // Size of the symbol
@@ -188,11 +184,9 @@ struct Elf_Sym_Base<ELFT<TargetEndianness, MaxAlign, false> > {
   Elf_Half      st_shndx; // Which section (header table index) it's defined in
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Sym_Base<ELFT<TargetEndianness, MaxAlign, true> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Sym_Base<ELFType<TargetEndianness, MaxAlign, true> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
   Elf_Word      st_name;  // Symbol name (index into string table)
   unsigned char st_info;  // Symbol's type and binding attributes
   unsigned char st_other; // Must be zero; reserved
@@ -220,7 +214,7 @@ struct Elf_Sym_Impl : Elf_Sym_Base<ELFT> {
 /// (.gnu.version). This structure is identical for ELF32 and ELF64.
 template<class ELFT>
 struct Elf_Versym_Impl {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
   Elf_Half vs_index;   // Version index with flags (e.g. VERSYM_HIDDEN)
 };
 
@@ -231,7 +225,7 @@ struct Elf_Verdaux_Impl;
 /// (.gnu.version_d). This structure is identical for ELF32 and ELF64.
 template<class ELFT>
 struct Elf_Verdef_Impl {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
   typedef Elf_Verdaux_Impl<ELFT> Elf_Verdaux;
   Elf_Half vd_version; // Version of this structure (e.g. VER_DEF_CURRENT)
   Elf_Half vd_flags;   // Bitwise flags (VER_DEF_*)
@@ -251,7 +245,7 @@ struct Elf_Verdef_Impl {
 /// section (.gnu.version_d). This structure is identical for ELF32 and ELF64.
 template<class ELFT>
 struct Elf_Verdaux_Impl {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
   Elf_Word vda_name; // Version name (offset in string table)
   Elf_Word vda_next; // Offset to next Verdaux entry (in bytes)
 };
@@ -260,7 +254,7 @@ struct Elf_Verdaux_Impl {
 /// section (.gnu.version_r). This structure is identical for ELF32 and ELF64.
 template<class ELFT>
 struct Elf_Verneed_Impl {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
   Elf_Half vn_version; // Version of this structure (e.g. VER_NEED_CURRENT)
   Elf_Half vn_cnt;     // Number of associated Vernaux entries
   Elf_Word vn_file;    // Library name (string table offset)
@@ -272,7 +266,7 @@ struct Elf_Verneed_Impl {
 /// section (.gnu.version_r). This structure is identical for ELF32 and ELF64.
 template<class ELFT>
 struct Elf_Vernaux_Impl {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
   Elf_Word vna_hash;  // Hash of dependency name
   Elf_Half vna_flags; // Bitwise Flags (VER_FLAG_*)
   Elf_Half vna_other; // Version index, used in .gnu.version entries
@@ -285,11 +279,9 @@ struct Elf_Vernaux_Impl {
 template<class ELFT>
 struct Elf_Dyn_Base;
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Dyn_Base<ELFT<TargetEndianness, MaxAlign, false> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Dyn_Base<ELFType<TargetEndianness, MaxAlign, false> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
   Elf_Sword d_tag;
   union {
     Elf_Word d_val;
@@ -297,11 +289,9 @@ struct Elf_Dyn_Base<ELFT<TargetEndianness, MaxAlign, false> > {
   } d_un;
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Dyn_Base<ELFT<TargetEndianness, MaxAlign, true> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Dyn_Base<ELFType<TargetEndianness, MaxAlign, true> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
   Elf_Sxword d_tag;
   union {
     Elf_Xword d_val;
@@ -323,11 +313,9 @@ struct Elf_Dyn_Impl : Elf_Dyn_Base<ELFT> {
 template<class ELFT, bool isRela>
 struct Elf_Rel_Base;
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, false>, false> {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, false>, false> {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
   Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
   Elf_Word      r_info;  // Symbol table index and type of relocation to apply
 
@@ -340,11 +328,9 @@ struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, false>, false> {
   }
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, true>, false> {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, true>, false> {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
   Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
   Elf_Xword     r_info;   // Symbol table index and type of relocation to apply
 
@@ -365,11 +351,9 @@ struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, true>, false> {
   }
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, false>, true> {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, false>, true> {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
   Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
   Elf_Word      r_info;   // Symbol table index and type of relocation to apply
   Elf_Sword     r_addend; // Compute value for relocatable field by adding this
@@ -383,11 +367,9 @@ struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, false>, true> {
   }
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, true>, true> {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, true>, true> {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
   Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
   Elf_Xword     r_info;   // Symbol table index and type of relocation to apply
   Elf_Sxword    r_addend; // Compute value for relocatable field by adding this.
@@ -411,12 +393,10 @@ struct Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, true>, true> {
 template<class ELFT, bool isRela>
 struct Elf_Rel_Impl;
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign, bool isRela>
-struct Elf_Rel_Impl<ELFT<TargetEndianness, MaxAlign, true>, isRela>
-       : Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, true>, isRela> {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign, bool isRela>
+struct Elf_Rel_Impl<ELFType<TargetEndianness, MaxAlign, true>, isRela>
+       : Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, true>, isRela> {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
 
   // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
   // and ELF64_R_INFO macros defined in the ELF specification:
@@ -433,12 +413,10 @@ struct Elf_Rel_Impl<ELFT<TargetEndianness, MaxAlign, true>, isRela>
   }
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign, bool isRela>
-struct Elf_Rel_Impl<ELFT<TargetEndianness, MaxAlign, false>, isRela>
-       : Elf_Rel_Base<ELFT<TargetEndianness, MaxAlign, false>, isRela> {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign, bool isRela>
+struct Elf_Rel_Impl<ELFType<TargetEndianness, MaxAlign, false>, isRela>
+       : Elf_Rel_Base<ELFType<TargetEndianness, MaxAlign, false>, isRela> {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
 
   // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
   // and ELF32_R_INFO macros defined in the ELF specification:
@@ -457,7 +435,7 @@ struct Elf_Rel_Impl<ELFT<TargetEndianness, MaxAlign, false>, isRela>
 
 template<class ELFT>
 struct Elf_Ehdr_Impl {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
   unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes
   Elf_Half e_type;     // Type of file (see ET_*)
   Elf_Half e_machine;  // Required architecture for this file (see EM_*)
@@ -483,11 +461,9 @@ struct Elf_Ehdr_Impl {
 template<class ELFT>
 struct Elf_Phdr_Impl;
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Phdr_Impl<ELFT<TargetEndianness, MaxAlign, false> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA false>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Phdr_Impl<ELFType<TargetEndianness, MaxAlign, false> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false)
   Elf_Word p_type;   // Type of segment
   Elf_Off  p_offset; // FileOffset where segment is located, in bytes
   Elf_Addr p_vaddr;  // Virtual Address of beginning of segment
@@ -498,11 +474,9 @@ struct Elf_Phdr_Impl<ELFT<TargetEndianness, MaxAlign, false> > {
   Elf_Word p_align;  // Segment alignment constraint
 };
 
-template<template<endianness, std::size_t, bool> class ELFT,
-         endianness TargetEndianness, std::size_t MaxAlign>
-struct Elf_Phdr_Impl<ELFT<TargetEndianness, MaxAlign, true> > {
-  LLVM_ELF_IMPORT_TYPES(ELFT<TargetEndianness LLVM_ELF_COMMA
-                             MaxAlign LLVM_ELF_COMMA true>)
+template<endianness TargetEndianness, std::size_t MaxAlign>
+struct Elf_Phdr_Impl<ELFType<TargetEndianness, MaxAlign, true> > {
+  LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true)
   Elf_Word p_type;   // Type of segment
   Elf_Word p_flags;  // Segment flags
   Elf_Off  p_offset; // FileOffset where segment is located, in bytes
@@ -515,7 +489,7 @@ struct Elf_Phdr_Impl<ELFT<TargetEndianness, MaxAlign, true> > {
 
 template<class ELFT>
 class ELFObjectFile : public ObjectFile {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
 
 public:
   /// \brief Iterate over constant sized entities.
@@ -633,6 +607,8 @@ private:
   mutable const char *dt_soname;
 
 private:
+  uint64_t getROffset(DataRefImpl Rel) const;
+
   // Records for each version index the corresponding Verdef or Vernaux entry.
   // This is filled the first time LoadVersionMap() is called.
   class VersionMapEntry : public PointerIntPair<const void*, 1> {
@@ -689,6 +665,7 @@ public:
 protected:
   const Elf_Sym  *getSymbol(DataRefImpl Symb) const; // FIXME: Should be private?
   void            validateSymbol(DataRefImpl Symb) const;
+  StringRef       getRelocationTypeName(uint32_t Type) const;
 
 public:
   error_code      getSymbolName(const Elf_Shdr *section,
@@ -705,6 +682,7 @@ protected:
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
   virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolAlignment(DataRefImpl Symb, uint32_t &Res) const;
   virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
   virtual error_code getSymbolFlags(DataRefImpl Symb, uint32_t &Res) const;
@@ -1138,6 +1116,21 @@ error_code ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb,
 }
 
 template<class ELFT>
+error_code ELFObjectFile<ELFT>::getSymbolAlignment(DataRefImpl Symb,
+                                                   uint32_t &Res) const {
+  uint32_t flags;
+  getSymbolFlags(Symb, flags);
+  if (flags & SymbolRef::SF_Common) {
+    uint64_t Value;
+    getSymbolValue(Symb, Value);
+    Res = Value;
+  } else {
+    Res = 0;
+  }
+  return object_error::success;
+}
+
+template<class ELFT>
 error_code ELFObjectFile<ELFT>::getSymbolSize(DataRefImpl Symb,
                                               uint64_t &Result) const {
   validateSymbol(Symb);
@@ -1546,45 +1539,32 @@ error_code ELFObjectFile<ELFT>::getRelocationSymbol(DataRefImpl Rel,
 template<class ELFT>
 error_code ELFObjectFile<ELFT>::getRelocationAddress(DataRefImpl Rel,
                                                      uint64_t &Result) const {
-  uint64_t offset;
-  const Elf_Shdr *sec = getSection(Rel.w.b);
-  switch (sec->sh_type) {
-    default :
-      report_fatal_error("Invalid section type in Rel!");
-    case ELF::SHT_REL : {
-      offset = getRel(Rel)->r_offset;
-      break;
-    }
-    case ELF::SHT_RELA : {
-      offset = getRela(Rel)->r_offset;
-      break;
-    }
-  }
-
-  Result = offset;
+  assert((Header->e_type == ELF::ET_EXEC || Header->e_type == ELF::ET_DYN) &&
+         "Only executable and shared objects files have addresses");
+  Result = getROffset(Rel);
   return object_error::success;
 }
 
 template<class ELFT>
 error_code ELFObjectFile<ELFT>::getRelocationOffset(DataRefImpl Rel,
                                                     uint64_t &Result) const {
-  uint64_t offset;
+  assert(Header->e_type == ELF::ET_REL &&
+         "Only relocatable object files have relocation offsets");
+  Result = getROffset(Rel);
+  return object_error::success;
+}
+
+template<class ELFT>
+uint64_t ELFObjectFile<ELFT>::getROffset(DataRefImpl Rel) const {
   const Elf_Shdr *sec = getSection(Rel.w.b);
   switch (sec->sh_type) {
-    default :
-      report_fatal_error("Invalid section type in Rel!");
-    case ELF::SHT_REL : {
-      offset = getRel(Rel)->r_offset;
-      break;
-    }
-    case ELF::SHT_RELA : {
-      offset = getRela(Rel)->r_offset;
-      break;
-    }
+  default:
+    report_fatal_error("Invalid section type in Rel!");
+  case ELF::SHT_REL:
+    return getRel(Rel)->r_offset;
+  case ELF::SHT_RELA:
+    return getRela(Rel)->r_offset;
   }
-
-  Result = offset - sec->sh_addr;
-  return object_error::success;
 }
 
 template<class ELFT>
@@ -1607,29 +1587,14 @@ error_code ELFObjectFile<ELFT>::getRelocationType(DataRefImpl Rel,
 }
 
 #define LLVM_ELF_SWITCH_RELOC_TYPE_NAME(enum) \
-  case ELF::enum: res = #enum; break;
+  case ELF::enum: Res = #enum; break;
 
 template<class ELFT>
-error_code ELFObjectFile<ELFT>::getRelocationTypeName(
-    DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
-  const Elf_Shdr *sec = getSection(Rel.w.b);
-  uint32_t type;
-  StringRef res;
-  switch (sec->sh_type) {
-    default :
-      return object_error::parse_failed;
-    case ELF::SHT_REL : {
-      type = getRel(Rel)->getType(isMips64EL());
-      break;
-    }
-    case ELF::SHT_RELA : {
-      type = getRela(Rel)->getType(isMips64EL());
-      break;
-    }
-  }
+StringRef ELFObjectFile<ELFT>::getRelocationTypeName(uint32_t Type) const {
+  StringRef Res = "Unknown";
   switch (Header->e_machine) {
   case ELF::EM_X86_64:
-    switch (type) {
+    switch (Type) {
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_NONE);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_64);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC32);
@@ -1657,17 +1622,22 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName(
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC64);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTOFF64);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOT64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPCREL64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPLT64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PLTOFF64);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE32);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE64);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32_TLSDESC);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC_CALL);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC);
-    default:
-      res = "Unknown";
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_IRELATIVE);
+    default: break;
     }
     break;
   case ELF::EM_386:
-    switch (type) {
+    switch (Type) {
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_NONE);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC32);
@@ -1708,12 +1678,11 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName(
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC_CALL);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_IRELATIVE);
-    default:
-      res = "Unknown";
+    default: break;
     }
     break;
   case ELF::EM_MIPS:
-    switch (type) {
+    switch (Type) {
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_NONE);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_16);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_32);
@@ -1765,12 +1734,12 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName(
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GLOB_DAT);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_COPY);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JUMP_SLOT);
-    default:
-      res = "Unknown";
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_NUM);
+    default: break;
     }
     break;
   case ELF::EM_AARCH64:
-    switch (type) {
+    switch (Type) {
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_NONE);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS64);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS32);
@@ -1844,13 +1813,11 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName(
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_LD64_LO12_NC);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADD_LO12_NC);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_CALL);
-
-    default:
-      res = "Unknown";
+    default: break;
     }
     break;
   case ELF::EM_ARM:
-    switch (type) {
+    switch (Type) {
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_NONE);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PC24);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS32);
@@ -1982,12 +1949,11 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName(
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ME_TOO);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_TLS_DESCSEQ16);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_TLS_DESCSEQ32);
-    default:
-      res = "Unknown";
+    default: break;
     }
     break;
   case ELF::EM_HEXAGON:
-    switch (type) {
+    switch (Type) {
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_NONE);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B22_PCREL);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B15_PCREL);
@@ -2074,20 +2040,185 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName(
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_32_6_X);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_16_X);
       LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_11_X);
-    default:
-      res = "Unknown";
+    default: break;
     }
     break;
-  default:
-    res = "Unknown";
+  case ELF::EM_PPC:
+    switch (Type) {
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_NONE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR24);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16_HI);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR14);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR14_BRTAKEN);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR14_BRNTAKEN);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL24);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL14);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL14_BRTAKEN);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL14_BRNTAKEN);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL16_HA);
+    default: break;
+    }
+    break;
+  case ELF::EM_PPC64:
+    switch (Type) {
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_NONE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HI);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR14);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL24);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HIGHER);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HIGHEST);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_DS);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_LO_DS);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_DS);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_LO_DS);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TLS);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSGD16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSGD16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSLD16_LO);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSLD16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TPREL16_LO_DS);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TPREL16_HA);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TLSGD);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TLSLD);
+    default: break;
+    }
+    break;
+  case ELF::EM_S390:
+    switch (Type) {
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_NONE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_8);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLT32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_COPY);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GLOB_DAT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_JMP_SLOT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_RELATIVE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTOFF);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC16DBL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLT16DBL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC32DBL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLT32DBL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPCDBL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLT64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTENT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTOFF16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTOFF64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLTENT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLTOFF16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLTOFF32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLTOFF64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LOAD);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GDCALL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDCALL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GD32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GD64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GOTIE12);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GOTIE32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GOTIE64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDM32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDM64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_IE32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_IE64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_IEENT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LE32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LE64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDO32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDO64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_DTPMOD);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_DTPOFF);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_TPOFF);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_20);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT20);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT20);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GOTIE20);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_IRELATIVE);
+    default: break;
+    }
+    break;
+  default: break;
   }
-  Result.append(res.begin(), res.end());
-  return object_error::success;
+  return Res;
 }
 
 #undef LLVM_ELF_SWITCH_RELOC_TYPE_NAME
 
 template<class ELFT>
+error_code ELFObjectFile<ELFT>::getRelocationTypeName(
+    DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
+  const Elf_Shdr *sec = getSection(Rel.w.b);
+  uint32_t type;
+  switch (sec->sh_type) {
+    default :
+      return object_error::parse_failed;
+    case ELF::SHT_REL : {
+      type = getRel(Rel)->getType(isMips64EL());
+      break;
+    }
+    case ELF::SHT_RELA : {
+      type = getRela(Rel)->getType(isMips64EL());
+      break;
+    }
+  }
+
+  if (!isMips64EL()) {
+    StringRef Name = getRelocationTypeName(type);
+    Result.append(Name.begin(), Name.end());
+  } else {
+    uint8_t Type1 = (type >>  0) & 0xFF;
+    uint8_t Type2 = (type >>  8) & 0xFF;
+    uint8_t Type3 = (type >> 16) & 0xFF;
+
+    // Concat all three relocation type names.
+    StringRef Name = getRelocationTypeName(Type1);
+    Result.append(Name.begin(), Name.end());
+
+    Name = getRelocationTypeName(Type2);
+    Result.append(1, '/');
+    Result.append(Name.begin(), Name.end());
+
+    Name = getRelocationTypeName(Type3);
+    Result.append(1, '/');
+    Result.append(Name.begin(), Name.end());
+  }
+
+  return object_error::success;
+}
+
+template<class ELFT>
 error_code ELFObjectFile<ELFT>::getRelocationAdditionalInfo(
     DataRefImpl Rel, int64_t &Result) const {
   const Elf_Shdr *sec = getSection(Rel.w.b);
@@ -2189,8 +2320,7 @@ ELFObjectFile<ELFT>::ELFObjectFile(MemoryBuffer *Object, error_code &ec)
   : ObjectFile(getELFType(
       static_cast<endianness>(ELFT::TargetEndianness) == support::little,
       ELFT::Is64Bits),
-      Object,
-      ec)
+      Object)
   , isDyldELFObject(false)
   , SectionHeaderTable(0)
   , dot_shstrtab_sec(0)
@@ -2566,6 +2696,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
       return "ELF64-aarch64";
     case ELF::EM_PPC64:
       return "ELF64-ppc64";
+    case ELF::EM_S390:
+      return "ELF64-s390";
     default:
       return "ELF64-unknown";
     }
@@ -2593,6 +2725,8 @@ unsigned ELFObjectFile<ELFT>::getArch() const {
            Triple::mipsel : Triple::mips;
   case ELF::EM_PPC64:
     return Triple::ppc64;
+  case ELF::EM_S390:
+    return Triple::systemz;
   default:
     return Triple::UnknownArch;
   }
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index ed7aabd..14cd4d7 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -7,16 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the MachOObjectFile class, which binds the MachOObject
-// class to the generic ObjectFile wrapper.
+// This file declares the MachOObjectFile class, which implement the ObjectFile
+// interface for MachO files.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_OBJECT_MACHO_H
 #define LLVM_OBJECT_MACHO_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/MachOFormat.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/MachO.h"
 #include "llvm/Support/raw_ostream.h"
@@ -24,46 +25,26 @@
 namespace llvm {
 namespace object {
 
-typedef MachOObject::LoadCommandInfo LoadCommandInfo;
-
 class MachOObjectFile : public ObjectFile {
 public:
-  MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, error_code &ec);
-
-  virtual symbol_iterator begin_symbols() const;
-  virtual symbol_iterator end_symbols() const;
-  virtual symbol_iterator begin_dynamic_symbols() const;
-  virtual symbol_iterator end_dynamic_symbols() const;
-  virtual library_iterator begin_libraries_needed() const;
-  virtual library_iterator end_libraries_needed() const;
-  virtual section_iterator begin_sections() const;
-  virtual section_iterator end_sections() const;
+  struct LoadCommandInfo {
+    const char *Ptr;      // Where in memory the load command is.
+    macho::LoadCommand C; // The command itself.
+  };
 
-  virtual uint8_t getBytesInAddress() const;
-  virtual StringRef getFileFormatName() const;
-  virtual unsigned getArch() const;
-  virtual StringRef getLoadName() const;
+  MachOObjectFile(MemoryBuffer *Object, bool IsLittleEndian, bool Is64Bits,
+                  error_code &ec);
 
-  // In a MachO file, sections have a segment name. This is used in the .o
-  // files. They have a single segment, but this field specifies which segment
-  // a section should be put in in the final object.
-  error_code getSectionFinalSegmentName(DataRefImpl Sec, StringRef &Res) const;
-
-  MachOObject *getObject() { return MachOObj.get(); }
-
-  static inline bool classof(const Binary *v) {
-    return v->isMachO();
-  }
-
-protected:
   virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
-  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolAlignment(DataRefImpl Symb, uint32_t &Res) const;
   virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolType(DataRefImpl Symb,
+                                   SymbolRef::Type &Res) const;
   virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
   virtual error_code getSymbolFlags(DataRefImpl Symb, uint32_t &Res) const;
-  virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::Type &Res) const;
   virtual error_code getSymbolSection(DataRefImpl Symb,
                                       section_iterator &Res) const;
   virtual error_code getSymbolValue(DataRefImpl Symb, uint64_t &Val) const;
@@ -82,21 +63,17 @@ protected:
   virtual error_code isSectionVirtual(DataRefImpl Sec, bool &Res) const;
   virtual error_code isSectionZeroInit(DataRefImpl Sec, bool &Res) const;
   virtual error_code isSectionReadOnlyData(DataRefImpl Sec, bool &Res) const;
-  virtual error_code sectionContainsSymbol(DataRefImpl DRI, DataRefImpl S,
+  virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
                                            bool &Result) const;
   virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const;
   virtual relocation_iterator getSectionRelEnd(DataRefImpl Sec) const;
 
   virtual error_code getRelocationNext(DataRefImpl Rel,
                                        RelocationRef &Res) const;
-  virtual error_code getRelocationAddress(DataRefImpl Rel,
-                                          uint64_t &Res) const;
-  virtual error_code getRelocationOffset(DataRefImpl Rel,
-                                         uint64_t &Res) const;
-  virtual error_code getRelocationSymbol(DataRefImpl Rel,
-                                         SymbolRef &Res) const;
-  virtual error_code getRelocationType(DataRefImpl Rel,
-                                       uint64_t &Res) const;
+  virtual error_code getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const;
+  virtual error_code getRelocationOffset(DataRefImpl Rel, uint64_t &Res) const;
+  virtual error_code getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const;
+  virtual error_code getRelocationType(DataRefImpl Rel, uint64_t &Res) const;
   virtual error_code getRelocationTypeName(DataRefImpl Rel,
                                            SmallVectorImpl<char> &Result) const;
   virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel,
@@ -108,28 +85,98 @@ protected:
   virtual error_code getLibraryNext(DataRefImpl LibData, LibraryRef &Res) const;
   virtual error_code getLibraryPath(DataRefImpl LibData, StringRef &Res) const;
 
-private:
-  OwningPtr<MachOObject> MachOObj;
-  mutable uint32_t RegisteredStringTable;
-  typedef SmallVector<DataRefImpl, 1> SectionList;
-  SectionList Sections;
+  // TODO: Would be useful to have an iterator based version
+  // of the load command interface too.
 
+  virtual symbol_iterator begin_symbols() const;
+  virtual symbol_iterator end_symbols() const;
+
+  virtual symbol_iterator begin_dynamic_symbols() const;
+  virtual symbol_iterator end_dynamic_symbols() const;
+
+  virtual section_iterator begin_sections() const;
+  virtual section_iterator end_sections() const;
+
+  virtual library_iterator begin_libraries_needed() const;
+  virtual library_iterator end_libraries_needed() const;
+
+  virtual uint8_t getBytesInAddress() const;
+
+  virtual StringRef getFileFormatName() const;
+  virtual unsigned getArch() const;
+
+  virtual StringRef getLoadName() const;
 
-  void moveToNextSection(DataRefImpl &DRI) const;
-  void getSymbolTableEntry(DataRefImpl DRI,
-                           InMemoryStruct<macho::SymbolTableEntry> &Res) const;
-  void getSymbol64TableEntry(DataRefImpl DRI,
-                          InMemoryStruct<macho::Symbol64TableEntry> &Res) const;
-  void moveToNextSymbol(DataRefImpl &DRI) const;
-  void getSection(DataRefImpl DRI, InMemoryStruct<macho::Section> &Res) const;
-  void getSection64(DataRefImpl DRI,
-                    InMemoryStruct<macho::Section64> &Res) const;
-  void getRelocation(DataRefImpl Rel,
-                     InMemoryStruct<macho::RelocationEntry> &Res) const;
-  std::size_t getSectionIndex(DataRefImpl Sec) const;
-
-  void printRelocationTargetName(InMemoryStruct<macho::RelocationEntry>& RE,
-                                 raw_string_ostream &fmt) const;
+  relocation_iterator getSectionRelBegin(unsigned Index) const;
+  relocation_iterator getSectionRelEnd(unsigned Index) const;
+
+  // In a MachO file, sections have a segment name. This is used in the .o
+  // files. They have a single segment, but this field specifies which segment
+  // a section should be put in in the final object.
+  StringRef getSectionFinalSegmentName(DataRefImpl Sec) const;
+
+  // Names are stored as 16 bytes. These returns the raw 16 bytes without
+  // interpreting them as a C string.
+  ArrayRef<char> getSectionRawName(DataRefImpl Sec) const;
+  ArrayRef<char> getSectionRawFinalSegmentName(DataRefImpl Sec) const;
+
+  // MachO specific Info about relocations.
+  bool isRelocationScattered(const macho::RelocationEntry &RE) const;
+  unsigned getPlainRelocationSymbolNum(const macho::RelocationEntry &RE) const;
+  bool getPlainRelocationExternal(const macho::RelocationEntry &RE) const;
+  bool getScatteredRelocationScattered(const macho::RelocationEntry &RE) const;
+  uint32_t getScatteredRelocationValue(const macho::RelocationEntry &RE) const;
+  unsigned getAnyRelocationAddress(const macho::RelocationEntry &RE) const;
+  unsigned getAnyRelocationPCRel(const macho::RelocationEntry &RE) const;
+  unsigned getAnyRelocationLength(const macho::RelocationEntry &RE) const;
+  unsigned getAnyRelocationType(const macho::RelocationEntry &RE) const;
+  SectionRef getRelocationSection(const macho::RelocationEntry &RE) const;
+
+  // Walk load commands.
+  LoadCommandInfo getFirstLoadCommandInfo() const;
+  LoadCommandInfo getNextLoadCommandInfo(const LoadCommandInfo &L) const;
+
+  // MachO specific structures.
+  macho::Section getSection(DataRefImpl DRI) const;
+  macho::Section64 getSection64(DataRefImpl DRI) const;
+  macho::Section getSection(const LoadCommandInfo &L, unsigned Index) const;
+  macho::Section64 getSection64(const LoadCommandInfo &L, unsigned Index) const;
+  macho::SymbolTableEntry getSymbolTableEntry(DataRefImpl DRI) const;
+  macho::Symbol64TableEntry getSymbol64TableEntry(DataRefImpl DRI) const;
+
+  macho::LinkeditDataLoadCommand
+  getLinkeditDataLoadCommand(const LoadCommandInfo &L) const;
+  macho::SegmentLoadCommand
+  getSegmentLoadCommand(const LoadCommandInfo &L) const;
+  macho::Segment64LoadCommand
+  getSegment64LoadCommand(const LoadCommandInfo &L) const;
+  macho::LinkerOptionsLoadCommand
+  getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const;
+
+  macho::RelocationEntry getRelocation(DataRefImpl Rel) const;
+  macho::Header getHeader() const;
+  macho::Header64Ext getHeader64Ext() const;
+  macho::IndirectSymbolTableEntry
+  getIndirectSymbolTableEntry(const macho::DysymtabLoadCommand &DLC,
+                              unsigned Index) const;
+  macho::DataInCodeTableEntry getDataInCodeTableEntry(uint32_t DataOffset,
+                                                      unsigned Index) const;
+  macho::SymtabLoadCommand getSymtabLoadCommand() const;
+  macho::DysymtabLoadCommand getDysymtabLoadCommand() const;
+
+  StringRef getStringTableData() const;
+  bool is64Bit() const;
+  void ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const;
+
+  static bool classof(const Binary *v) {
+    return v->isMachO();
+  }
+
+private:
+  typedef SmallVector<const char*, 1> SectionList;
+  SectionList Sections;
+  const char *SymtabLoadCmd;
+  const char *DysymtabLoadCmd;
 };
 
 }
diff --git a/include/llvm/Object/MachOObject.h b/include/llvm/Object/MachOObject.h
deleted file mode 100644
index 9e4ab19..0000000
--- a/include/llvm/Object/MachOObject.h
+++ /dev/null
@@ -1,210 +0,0 @@
-//===- MachOObject.h - Mach-O Object File Wrapper ---------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_OBJECT_MACHOOBJECT_H
-#define LLVM_OBJECT_MACHOOBJECT_H
-
-#include "llvm/ADT/InMemoryStruct.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Object/MachOFormat.h"
-#include <string>
-
-namespace llvm {
-
-class MemoryBuffer;
-class raw_ostream;
-
-namespace object {
-
-/// \brief Wrapper object for manipulating Mach-O object files.
-///
-/// This class is designed to implement a full-featured, efficient, portable,
-/// and robust Mach-O interface to Mach-O object files. It does not attempt to
-/// smooth over rough edges in the Mach-O format or generalize access to object
-/// independent features.
-///
-/// The class is designed around accessing the Mach-O object which is expected
-/// to be fully loaded into memory.
-///
-/// This class is *not* suitable for concurrent use. For efficient operation,
-/// the class uses APIs which rely on the ability to cache the results of
-/// certain calls in internal objects which are not safe for concurrent
-/// access. This allows the API to be zero-copy on the common paths.
-//
-// FIXME: It would be cool if we supported a "paged" MemoryBuffer
-// implementation. This would allow us to implement a more sensible version of
-// MemoryObject which can work like a MemoryBuffer, but be more efficient for
-// objects which are in the current address space.
-class MachOObject {
-public:
-  struct LoadCommandInfo {
-    /// The load command information.
-    macho::LoadCommand Command;
-
-    /// The offset to the start of the load command in memory.
-    uint64_t Offset;
-  };
-
-private:
-  OwningPtr<MemoryBuffer> Buffer;
-
-  /// Whether the object is little endian.
-  bool IsLittleEndian;
-  /// Whether the object is 64-bit.
-  bool Is64Bit;
-  /// Whether the object is swapped endianness from the host.
-  bool IsSwappedEndian;
-  /// Whether the string table has been registered.
-  bool HasStringTable;
-
-  /// The cached information on the load commands.
-  LoadCommandInfo *LoadCommands;
-  mutable unsigned NumLoadedCommands;
-
-  /// The cached copy of the header.
-  macho::Header Header;
-  macho::Header64Ext Header64Ext;
-
-  /// Cache string table information.
-  StringRef StringTable;
-
-private:
-  MachOObject(MemoryBuffer *Buffer, bool IsLittleEndian, bool Is64Bit);
-
-public:
-  ~MachOObject();
-
-  /// \brief Load a Mach-O object from a MemoryBuffer object.
-  ///
-  /// \param Buffer - The buffer to load the object from. This routine takes
-  /// exclusive ownership of the buffer (which is passed to the returned object
-  /// on success).
-  /// \param ErrorStr [out] - If given, will be set to a user readable error
-  /// message on failure.
-  /// \returns The loaded object, or null on error.
-  static MachOObject *LoadFromBuffer(MemoryBuffer *Buffer,
-                                     std::string *ErrorStr = 0);
-
-  /// @name File Information
-  /// @{
-
-  bool isLittleEndian() const { return IsLittleEndian; }
-  bool isSwappedEndian() const { return IsSwappedEndian; }
-  bool is64Bit() const { return Is64Bit; }
-
-  unsigned getHeaderSize() const {
-    return Is64Bit ? macho::Header64Size : macho::Header32Size;
-  }
-
-  StringRef getData(size_t Offset, size_t Size) const;
-
-  /// @}
-  /// @name String Table Data
-  /// @{
-
-  StringRef getStringTableData() const {
-    assert(HasStringTable && "String table has not been registered!");
-    return StringTable;
-  }
-
-  StringRef getStringAtIndex(unsigned Index) const {
-    size_t End = getStringTableData().find('\0', Index);
-    return getStringTableData().slice(Index, End);
-  }
-
-  void RegisterStringTable(macho::SymtabLoadCommand &SLC);
-
-  /// @}
-  /// @name Object Header Access
-  /// @{
-
-  const macho::Header &getHeader() const { return Header; }
-  const macho::Header64Ext &getHeader64Ext() const {
-    assert(is64Bit() && "Invalid access!");
-    return Header64Ext;
-  }
-
-  /// @}
-  /// @name Object Structure Access
-  /// @{
-
-  /// \brief Retrieve the information for the given load command.
-  const LoadCommandInfo &getLoadCommandInfo(unsigned Index) const;
-
-  void ReadSegmentLoadCommand(
-    const LoadCommandInfo &LCI,
-    InMemoryStruct<macho::SegmentLoadCommand> &Res) const;
-  void ReadSegment64LoadCommand(
-    const LoadCommandInfo &LCI,
-    InMemoryStruct<macho::Segment64LoadCommand> &Res) const;
-  void ReadSymtabLoadCommand(
-    const LoadCommandInfo &LCI,
-    InMemoryStruct<macho::SymtabLoadCommand> &Res) const;
-  void ReadDysymtabLoadCommand(
-    const LoadCommandInfo &LCI,
-    InMemoryStruct<macho::DysymtabLoadCommand> &Res) const;
-  void ReadLinkeditDataLoadCommand(
-    const LoadCommandInfo &LCI,
-    InMemoryStruct<macho::LinkeditDataLoadCommand> &Res) const;
-  void ReadLinkerOptionsLoadCommand(
-    const LoadCommandInfo &LCI,
-    InMemoryStruct<macho::LinkerOptionsLoadCommand> &Res) const;
-  void ReadIndirectSymbolTableEntry(
-    const macho::DysymtabLoadCommand &DLC,
-    unsigned Index,
-    InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const;
-  void ReadSection(
-    const LoadCommandInfo &LCI,
-    unsigned Index,
-    InMemoryStruct<macho::Section> &Res) const;
-  void ReadSection64(
-    const LoadCommandInfo &LCI,
-    unsigned Index,
-    InMemoryStruct<macho::Section64> &Res) const;
-  void ReadRelocationEntry(
-    uint64_t RelocationTableOffset, unsigned Index,
-    InMemoryStruct<macho::RelocationEntry> &Res) const;
-  void ReadSymbolTableEntry(
-    uint64_t SymbolTableOffset, unsigned Index,
-    InMemoryStruct<macho::SymbolTableEntry> &Res) const;
-  void ReadSymbol64TableEntry(
-    uint64_t SymbolTableOffset, unsigned Index,
-    InMemoryStruct<macho::Symbol64TableEntry> &Res) const;
-  void ReadDataInCodeTableEntry(
-    uint64_t TableOffset, unsigned Index,
-    InMemoryStruct<macho::DataInCodeTableEntry> &Res) const;
-  void ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const;
-
-  /// @}
-
-  /// @name Object Dump Facilities
-  /// @{
-  /// dump - Support for debugging, callable in GDB: V->dump()
-  //
-  void dump() const;
-  void dumpHeader() const;
-
-  /// print - Implement operator<< on Value.
-  ///
-  void print(raw_ostream &O) const;
-  void printHeader(raw_ostream &O) const;
-
-  /// @}
-};
-
-inline raw_ostream &operator<<(raw_ostream &OS, const MachOObject &V) {
-  V.print(OS);
-  return OS;
-}
-
-} // end namespace object
-} // end namespace llvm
-
-#endif
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 6a66653..eb53cc0 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -217,6 +217,8 @@ public:
   /// mapped).
   error_code getAddress(uint64_t &Result) const;
   error_code getFileOffset(uint64_t &Result) const;
+  /// @brief Get the alignment of this symbol as the actual value (not log 2).
+  error_code getAlignment(uint32_t &Result) const;
   error_code getSize(uint64_t &Result) const;
   error_code getType(SymbolRef::Type &Result) const;
 
@@ -227,9 +229,6 @@ public:
   /// Get symbol flags (bitwise OR of SymbolRef::Flags)
   error_code getFlags(uint32_t &Result) const;
 
-  /// @brief Return true for common symbols such as uninitialized globals
-  error_code isCommon(bool &Result) const;
-
   /// @brief Get section this symbol is defined in reference to. Result is
   /// end_sections() if it is undefined or is an absolute symbol.
   error_code getSection(section_iterator &Result) const;
@@ -276,7 +275,7 @@ class ObjectFile : public Binary {
   ObjectFile(const ObjectFile &other) LLVM_DELETED_FUNCTION;
 
 protected:
-  ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec);
+  ObjectFile(unsigned int Type, MemoryBuffer *source);
 
   const uint8_t *base() const {
     return reinterpret_cast<const uint8_t *>(Data->getBufferStart());
@@ -295,6 +294,7 @@ protected:
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const = 0;
   virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const = 0;
   virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res)const=0;
+  virtual error_code getSymbolAlignment(DataRefImpl Symb, uint32_t &Res) const;
   virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const = 0;
   virtual error_code getSymbolType(DataRefImpl Symb,
                                    SymbolRef::Type &Res) const = 0;
@@ -428,6 +428,10 @@ inline error_code SymbolRef::getFileOffset(uint64_t &Result) const {
   return OwningObject->getSymbolFileOffset(SymbolPimpl, Result);
 }
 
+inline error_code SymbolRef::getAlignment(uint32_t &Result) const {
+  return OwningObject->getSymbolAlignment(SymbolPimpl, Result);
+}
+
 inline error_code SymbolRef::getSize(uint64_t &Result) const {
   return OwningObject->getSymbolSize(SymbolPimpl, Result);
 }
diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h
index 2dcbdf9..6239ec1 100644
--- a/include/llvm/Object/RelocVisitor.h
+++ b/include/llvm/Object/RelocVisitor.h
@@ -102,6 +102,16 @@ public:
         HasError = true;
         return RelocToApply();
       }
+    } else if (FileFormat == "ELF64-s390") {
+      switch (RelocType) {
+      case llvm::ELF::R_390_32:
+        return visitELF_390_32(R, Value);
+      case llvm::ELF::R_390_64:
+        return visitELF_390_64(R, Value);
+      default:
+        HasError = true;
+        return RelocToApply();
+      }
     }
     HasError = true;
     return RelocToApply();
@@ -133,7 +143,7 @@ private:
     int64_t Addend;
     R.getAdditionalInfo(Addend);
     uint64_t Address;
-    R.getAddress(Address);
+    R.getOffset(Address);
     return RelocToApply(Value + Addend - Address, 4);
   }
 
@@ -151,7 +161,7 @@ private:
     int64_t Addend;
     R.getAdditionalInfo(Addend);
     uint64_t Address;
-    R.getAddress(Address);
+    R.getOffset(Address);
     return RelocToApply(Value + Addend - Address, 4);
   }
   RelocToApply visitELF_X86_64_32(RelocationRef R, uint64_t Value) {
@@ -202,6 +212,24 @@ private:
     return RelocToApply(Value + Addend, 8);
   }
 
+  // SystemZ ELF
+  RelocToApply visitELF_390_32(RelocationRef R, uint64_t Value) {
+    int64_t Addend;
+    R.getAdditionalInfo(Addend);
+    int64_t Res = Value + Addend;
+
+    // Overflow check allows for both signed and unsigned interpretation.
+    if (Res < INT32_MIN || Res > UINT32_MAX)
+      HasError = true;
+
+    return RelocToApply(static_cast<uint32_t>(Res), 4);
+  }
+
+  RelocToApply visitELF_390_64(RelocationRef R, uint64_t Value) {
+    int64_t Addend;
+    R.getAdditionalInfo(Addend);
+    return RelocToApply(Value + Addend, 8);
+  }
 };
 
 }
diff --git a/include/llvm/PassManager.h b/include/llvm/PassManager.h
index ce5fda7..b6a8186 100644
--- a/include/llvm/PassManager.h
+++ b/include/llvm/PassManager.h
@@ -18,6 +18,7 @@
 #define LLVM_PASSMANAGER_H
 
 #include "llvm/Pass.h"
+#include "llvm/Support/CBindingWrapping.h"
 
 namespace llvm {
 
@@ -98,6 +99,9 @@ private:
   Module *M;
 };
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassManagerBase, LLVMPassManagerRef)
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/PassRegistry.h b/include/llvm/PassRegistry.h
index 5d89c49..f49c953 100644
--- a/include/llvm/PassRegistry.h
+++ b/include/llvm/PassRegistry.h
@@ -18,6 +18,8 @@
 #define LLVM_PASSREGISTRY_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CBindingWrapping.h"
+#include "llvm-c/Core.h"
 
 namespace llvm {
 
@@ -79,6 +81,9 @@ public:
   void removeRegistrationListener(PassRegistrationListener *L);
 };
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassRegistry, LLVMPassRegistryRef)
+
 }
 
 #endif
diff --git a/include/llvm/Support/CBindingWrapping.h b/include/llvm/Support/CBindingWrapping.h
new file mode 100644
index 0000000..51097b8
--- /dev/null
+++ b/include/llvm/Support/CBindingWrapping.h
@@ -0,0 +1,46 @@
+//===- llvm/Support/CBindingWrapph.h - C Interface Wrapping -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the wrapping macros for the C interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_C_BINDING_WRAPPING_H
+#define LLVM_C_BINDING_WRAPPING_H
+
+#include "llvm/Support/Casting.h"
+
+#define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)     \
+  inline ty *unwrap(ref P) {                            \
+    return reinterpret_cast<ty*>(P);                    \
+  }                                                     \
+                                                        \
+  inline ref wrap(const ty *P) {                        \
+    return reinterpret_cast<ref>(const_cast<ty*>(P));   \
+  }
+
+#define DEFINE_ISA_CONVERSION_FUNCTIONS(ty, ref)        \
+  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)           \
+                                                        \
+  template<typename T>                                  \
+  inline T *unwrap(ref P) {                             \
+    return cast<T>(unwrap(P));                          \
+  }
+
+#define DEFINE_STDCXX_CONVERSION_FUNCTIONS(ty, ref)     \
+  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)           \
+                                                        \
+  template<typename T>                                  \
+  inline T *unwrap(ref P) {                             \
+    T *Q = (T*)unwrap(P);                               \
+    assert(Q && "Invalid cast!");                       \
+    return Q;                                           \
+  }
+
+#endif
diff --git a/include/llvm/Support/CodeGen.h b/include/llvm/Support/CodeGen.h
index 1b66c94..240eba6 100644
--- a/include/llvm/Support/CodeGen.h
+++ b/include/llvm/Support/CodeGen.h
@@ -15,6 +15,9 @@
 #ifndef LLVM_SUPPORT_CODEGEN_H
 #define LLVM_SUPPORT_CODEGEN_H
 
+#include "llvm-c/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
+
 namespace llvm {
 
   // Relocation model types.
@@ -47,6 +50,42 @@ namespace llvm {
     };
   }
 
+  // Create wrappers for C Binding types (see CBindingWrapping.h).
+  inline CodeModel::Model unwrap(LLVMCodeModel Model) {
+    switch (Model) {
+      case LLVMCodeModelDefault:
+        return CodeModel::Default;
+      case LLVMCodeModelJITDefault:
+        return CodeModel::JITDefault;
+      case LLVMCodeModelSmall:
+        return CodeModel::Small;
+      case LLVMCodeModelKernel:
+        return CodeModel::Kernel;
+      case LLVMCodeModelMedium:
+        return CodeModel::Medium;
+      case LLVMCodeModelLarge:
+        return CodeModel::Large;
+    }
+    return CodeModel::Default;
+  }
+
+  inline LLVMCodeModel wrap(CodeModel::Model Model) {
+    switch (Model) {
+      case CodeModel::Default:
+        return LLVMCodeModelDefault;
+      case CodeModel::JITDefault:
+        return LLVMCodeModelJITDefault;
+      case CodeModel::Small:
+        return LLVMCodeModelSmall;
+      case CodeModel::Kernel:
+        return LLVMCodeModelKernel;
+      case CodeModel::Medium:
+        return LLVMCodeModelMedium;
+      case CodeModel::Large:
+        return LLVMCodeModelLarge;
+    }
+    llvm_unreachable("Bad CodeModel!");
+  }
 }  // end llvm namespace
 
 #endif
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index 2e84d7b..bfaafda 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -22,6 +22,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/type_traits.h"
 #include <cassert>
@@ -137,7 +138,23 @@ enum MiscFlags {               // Miscellaneous flags to adjust argument
   Sink               = 0x04   // Should this cl::list eat all unknown options?
 };
 
+//===----------------------------------------------------------------------===//
+// Option Category class
+//
+class OptionCategory {
+private:
+  const char *const Name;
+  const char *const Description;
+  void registerCategory();
+public:
+  OptionCategory(const char *const Name, const char *const Description = 0)
+      : Name(Name), Description(Description) { registerCategory(); }
+  const char *getName() { return Name; }
+  const char *getDescription() { return Description; }
+};
 
+// The general Option Category (used as default category).
+extern OptionCategory GeneralCategory;
 
 //===----------------------------------------------------------------------===//
 // Option Base class
@@ -173,10 +190,12 @@ class Option {
   unsigned Position;      // Position of last occurrence of the option
   unsigned AdditionalVals;// Greater than 0 for multi-valued option.
   Option *NextRegistered; // Singly linked list of registered options.
+
 public:
-  const char *ArgStr;     // The argument string itself (ex: "help", "o")
-  const char *HelpStr;    // The descriptive text message for -help
-  const char *ValueStr;   // String describing what the value of this option is
+  const char *ArgStr;   // The argument string itself (ex: "help", "o")
+  const char *HelpStr;  // The descriptive text message for -help
+  const char *ValueStr; // String describing what the value of this option is
+  OptionCategory *Category; // The Category this option belongs to
 
   inline enum NumOccurrencesFlag getNumOccurrencesFlag() const {
     return (enum NumOccurrencesFlag)Occurrences;
@@ -214,13 +233,14 @@ public:
   void setFormattingFlag(enum FormattingFlags V) { Formatting = V; }
   void setMiscFlag(enum MiscFlags M) { Misc |= M; }
   void setPosition(unsigned pos) { Position = pos; }
+  void setCategory(OptionCategory &C) { Category = &C; }
 protected:
   explicit Option(enum NumOccurrencesFlag OccurrencesFlag,
                   enum OptionHidden Hidden)
     : NumOccurrences(0), Occurrences(OccurrencesFlag), Value(0),
       HiddenFlag(Hidden), Formatting(NormalFormatting), Misc(0),
       Position(0), AdditionalVals(0), NextRegistered(0),
-      ArgStr(""), HelpStr(""), ValueStr("") {
+      ArgStr(""), HelpStr(""), ValueStr(""), Category(&GeneralCategory) {
   }
 
   inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; }
@@ -312,6 +332,16 @@ struct LocationClass {
 template<class Ty>
 LocationClass<Ty> location(Ty &L) { return LocationClass<Ty>(L); }
 
+// cat - Specifiy the Option category for the command line argument to belong
+// to.
+struct cat {
+  OptionCategory &Category;
+  cat(OptionCategory &c) : Category(c) {}
+
+  template<class Opt>
+  void apply(Opt &O) const { O.setCategory(Category); }
+};
+
 
 //===----------------------------------------------------------------------===//
 // OptionValue class
@@ -1674,10 +1704,48 @@ struct extrahelp {
 };
 
 void PrintVersionMessage();
-// This function just prints the help message, exactly the same way as if the
-// -help option had been given on the command line.
-// NOTE: THIS FUNCTION TERMINATES THE PROGRAM!
-void PrintHelpMessage();
+
+/// This function just prints the help message, exactly the same way as if the
+/// -help or -help-hidden option had been given on the command line.
+///
+/// NOTE: THIS FUNCTION TERMINATES THE PROGRAM!
+///
+/// \param hidden if true will print hidden options
+/// \param categorized if true print options in categories
+void PrintHelpMessage(bool Hidden=false, bool Categorized=false);
+
+
+//===----------------------------------------------------------------------===//
+// Public interface for accessing registered options.
+//
+
+/// \brief Use this to get a StringMap to all registered named options
+/// (e.g. -help). Note \p Map Should be an empty StringMap.
+///
+/// \param [out] map will be filled with mappings where the key is the
+/// Option argument string (e.g. "help") and value is the corresponding
+/// Option*.
+///
+/// Access to unnamed arguments (i.e. positional) are not provided because
+/// it is expected that the client already has access to these.
+///
+/// Typical usage:
+/// \code
+/// main(int argc,char* argv[]) {
+/// StringMap<llvm::cl::Option*> opts;
+/// llvm::cl::getRegisteredOptions(opts);
+/// assert(opts.count("help") == 1)
+/// opts["help"]->setDescription("Show alphabetical help information")
+/// // More code
+/// llvm::cl::ParseCommandLineOptions(argc,argv);
+/// //More code
+/// }
+/// \endcode
+///
+/// This interface is useful for modifying options in libraries that are out of
+/// the control of the client. The options should be modified before calling
+/// llvm::cl::ParseCommandLineOptions().
+void getRegisteredOptions(StringMap<Option*> &Map);
 
 } // End namespace cl
 
diff --git a/include/llvm/Support/Compression.h b/include/llvm/Support/Compression.h
new file mode 100644
index 0000000..9b1142d
--- /dev/null
+++ b/include/llvm/Support/Compression.h
@@ -0,0 +1,58 @@
+//===-- llvm/Support/Compression.h ---Compression----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains basic functions for compression/uncompression.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_COMPRESSION_H
+#define LLVM_SUPPORT_COMPRESSION_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MemoryBuffer;
+template<typename T> class OwningPtr;
+class StringRef;
+
+namespace zlib {
+
+enum CompressionLevel {
+  NoCompression,
+  DefaultCompression,
+  BestSpeedCompression,
+  BestSizeCompression
+};
+
+enum Status {
+  StatusOK,
+  StatusUnsupported,  // zlib is unavaliable
+  StatusOutOfMemory,  // there was not enough memory
+  StatusBufferTooShort,  // there was not enough room in the output buffer
+  StatusInvalidArg,  // invalid input parameter
+  StatusInvalidData  // data was corrupted or incomplete
+};
+
+bool isAvailable();
+
+Status compress(StringRef InputBuffer,
+                OwningPtr<MemoryBuffer> &CompressedBuffer,
+                CompressionLevel Level = DefaultCompression);
+
+Status uncompress(StringRef InputBuffer,
+                  OwningPtr<MemoryBuffer> &UncompressedBuffer,
+                  size_t UncompressedSize);
+
+}  // End of namespace zlib
+
+} // End of namespace llvm
+
+#endif
+
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index ea597fc..c46dfeb 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -466,6 +466,7 @@ enum {
 
 // ELF Relocation types for PPC64
 enum {
+  R_PPC64_NONE                = 0,
   R_PPC64_ADDR32              = 1,
   R_PPC64_ADDR16_LO           = 4,
   R_PPC64_ADDR16_HI           = 5,
@@ -486,6 +487,7 @@ enum {
   R_PPC64_TOC16_LO_DS         = 64,
   R_PPC64_TLS                 = 67,
   R_PPC64_TPREL16_LO          = 70,
+  R_PPC64_TPREL16_HA          = 72,
   R_PPC64_DTPREL16_LO         = 75,
   R_PPC64_DTPREL16_HA         = 77,
   R_PPC64_GOT_TLSGD16_LO      = 80,
@@ -944,6 +946,72 @@ enum {
   R_HEX_TPREL_11_X        =  85
 };
 
+// ELF Relocation types for S390/zSeries
+enum {
+  R_390_NONE        =  0,
+  R_390_8           =  1,
+  R_390_12          =  2,
+  R_390_16          =  3,
+  R_390_32          =  4,
+  R_390_PC32        =  5,
+  R_390_GOT12       =  6,
+  R_390_GOT32       =  7,
+  R_390_PLT32       =  8,
+  R_390_COPY        =  9,
+  R_390_GLOB_DAT    = 10,
+  R_390_JMP_SLOT    = 11,
+  R_390_RELATIVE    = 12,
+  R_390_GOTOFF      = 13,
+  R_390_GOTPC       = 14,
+  R_390_GOT16       = 15,
+  R_390_PC16        = 16,
+  R_390_PC16DBL     = 17,
+  R_390_PLT16DBL    = 18,
+  R_390_PC32DBL     = 19,
+  R_390_PLT32DBL    = 20,
+  R_390_GOTPCDBL    = 21,
+  R_390_64          = 22,
+  R_390_PC64        = 23,
+  R_390_GOT64       = 24,
+  R_390_PLT64       = 25,
+  R_390_GOTENT      = 26,
+  R_390_GOTOFF16    = 27,
+  R_390_GOTOFF64    = 28,
+  R_390_GOTPLT12    = 29,
+  R_390_GOTPLT16    = 30,
+  R_390_GOTPLT32    = 31,
+  R_390_GOTPLT64    = 32,
+  R_390_GOTPLTENT   = 33,
+  R_390_PLTOFF16    = 34,
+  R_390_PLTOFF32    = 35,
+  R_390_PLTOFF64    = 36,
+  R_390_TLS_LOAD    = 37,
+  R_390_TLS_GDCALL  = 38,
+  R_390_TLS_LDCALL  = 39,
+  R_390_TLS_GD32    = 40,
+  R_390_TLS_GD64    = 41,
+  R_390_TLS_GOTIE12 = 42,
+  R_390_TLS_GOTIE32 = 43,
+  R_390_TLS_GOTIE64 = 44,
+  R_390_TLS_LDM32   = 45,
+  R_390_TLS_LDM64   = 46,
+  R_390_TLS_IE32    = 47,
+  R_390_TLS_IE64    = 48,
+  R_390_TLS_IEENT   = 49,
+  R_390_TLS_LE32    = 50,
+  R_390_TLS_LE64    = 51,
+  R_390_TLS_LDO32   = 52,
+  R_390_TLS_LDO64   = 53,
+  R_390_TLS_DTPMOD  = 54,
+  R_390_TLS_DTPOFF  = 55,
+  R_390_TLS_TPOFF   = 56,
+  R_390_20          = 57,
+  R_390_GOT20       = 58,
+  R_390_GOTPLT20    = 59,
+  R_390_TLS_GOTIE20 = 60,
+  R_390_IRELATIVE   = 61
+};
+
 // Section header.
 struct Elf32_Shdr {
   Elf32_Word sh_name;      // Section name (index into string table)
diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h
index d438fac..0d35849 100644
--- a/include/llvm/Support/Endian.h
+++ b/include/llvm/Support/Endian.h
@@ -37,7 +37,7 @@ namespace detail {
 namespace endian {
 template<typename value_type, endianness endian>
 inline value_type byte_swap(value_type value) {
-  if (endian != native && sys::isBigEndianHost() != (endian == big))
+  if (endian != native && sys::IsBigEndianHost != (endian == big))
     return sys::SwapByteOrder(value);
   return value;
 }
diff --git a/include/llvm/Support/Host.h b/include/llvm/Support/Host.h
index 3a44405..9a4036a 100644
--- a/include/llvm/Support/Host.h
+++ b/include/llvm/Support/Host.h
@@ -15,23 +15,27 @@
 #define LLVM_SUPPORT_HOST_H
 
 #include "llvm/ADT/StringMap.h"
+
+#if defined(__linux__)
+#include <endian.h>
+#else
+#ifndef LLVM_ON_WIN32
+#include <machine/endian.h>
+#endif
+#endif
+
 #include <string>
 
 namespace llvm {
 namespace sys {
 
-  inline bool isLittleEndianHost() {
-    union {
-      int i;
-      char c;
-    };
-    i = 1;
-    return c;
-  }
+#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN
+  static const bool IsBigEndianHost = true;
+#else
+  static const bool IsBigEndianHost = false;
+#endif
 
-  inline bool isBigEndianHost() {
-    return !isLittleEndianHost();
-  }
+  static const bool IsLittleEndianHost = !IsBigEndianHost;
 
   /// getDefaultTargetTriple() - Return the default target triple the compiler
   /// has been configured to produce code for.
diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h
index 1f02907..0cce726 100644
--- a/include/llvm/Support/MemoryBuffer.h
+++ b/include/llvm/Support/MemoryBuffer.h
@@ -15,8 +15,10 @@
 #define LLVM_SUPPORT_MEMORYBUFFER_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm-c/Core.h"
 
 namespace llvm {
 
@@ -137,6 +139,9 @@ public:
   virtual BufferKind getBufferKind() const = 0;  
 };
 
+// Create wrappers for C Binding types (see CBindingWrapping.h).
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(MemoryBuffer, LLVMMemoryBufferRef)
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h
index 9fbe434..95d9d78 100644
--- a/include/llvm/Support/PatternMatch.h
+++ b/include/llvm/Support/PatternMatch.h
@@ -693,6 +693,12 @@ m_ZExt(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::ZExt>(Op);
 }
 
+/// m_UIToFP
+template<typename OpTy>
+inline CastClass_match<OpTy, Instruction::UIToFP>
+m_UIToFp(const OpTy &Op) { 
+  return CastClass_match<OpTy, Instruction::UIToFP>(Op);
+}
 
 //===----------------------------------------------------------------------===//
 // Matchers for unary operators
@@ -830,7 +836,7 @@ inline brc_match<Cond_t> m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) {
 // Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y).
 //
 
-template<typename LHS_t, typename RHS_t, typename Pred_t>
+template<typename CmpInst_t, typename LHS_t, typename RHS_t, typename Pred_t>
 struct MaxMin_match {
   LHS_t L;
   RHS_t R;
@@ -844,7 +850,7 @@ struct MaxMin_match {
     SelectInst *SI = dyn_cast<SelectInst>(V);
     if (!SI)
       return false;
-    ICmpInst *Cmp = dyn_cast<ICmpInst>(SI->getCondition());
+    CmpInst_t *Cmp = dyn_cast<CmpInst_t>(SI->getCondition());
     if (!Cmp)
       return false;
     // At this point we have a select conditioned on a comparison.  Check that
@@ -856,7 +862,7 @@ struct MaxMin_match {
     if ((TrueVal != LHS || FalseVal != RHS) &&
         (TrueVal != RHS || FalseVal != LHS))
       return false;
-    ICmpInst::Predicate Pred = LHS == TrueVal ?
+    typename CmpInst_t::Predicate Pred = LHS == TrueVal ?
       Cmp->getPredicate() : Cmp->getSwappedPredicate();
     // Does "(x pred y) ? x : y" represent the desired max/min operation?
     if (!Pred_t::match(Pred))
@@ -894,28 +900,116 @@ struct umin_pred_ty {
   }
 };
 
+/// ofmax_pred_ty - Helper class for identifying ordered max predicates.
+struct ofmax_pred_ty {
+  static bool match(FCmpInst::Predicate Pred) {
+    return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE;
+  }
+};
+
+/// ofmin_pred_ty - Helper class for identifying ordered min predicates.
+struct ofmin_pred_ty {
+  static bool match(FCmpInst::Predicate Pred) {
+    return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE;
+  }
+};
+
+/// ufmax_pred_ty - Helper class for identifying unordered max predicates.
+struct ufmax_pred_ty {
+  static bool match(FCmpInst::Predicate Pred) {
+    return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE;
+  }
+};
+
+/// ufmin_pred_ty - Helper class for identifying unordered min predicates.
+struct ufmin_pred_ty {
+  static bool match(FCmpInst::Predicate Pred) {
+    return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE;
+  }
+};
+
 template<typename LHS, typename RHS>
-inline MaxMin_match<LHS, RHS, smax_pred_ty>
+inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>
 m_SMax(const LHS &L, const RHS &R) {
-  return MaxMin_match<LHS, RHS, smax_pred_ty>(L, R);
+  return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline MaxMin_match<LHS, RHS, smin_pred_ty>
+inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>
 m_SMin(const LHS &L, const RHS &R) {
-  return MaxMin_match<LHS, RHS, smin_pred_ty>(L, R);
+  return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline MaxMin_match<LHS, RHS, umax_pred_ty>
+inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>
 m_UMax(const LHS &L, const RHS &R) {
-  return MaxMin_match<LHS, RHS, umax_pred_ty>(L, R);
+  return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>(L, R);
 }
 
 template<typename LHS, typename RHS>
-inline MaxMin_match<LHS, RHS, umin_pred_ty>
+inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>
 m_UMin(const LHS &L, const RHS &R) {
-  return MaxMin_match<LHS, RHS, umin_pred_ty>(L, R);
+  return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R);
+}
+
+/// \brief Match an 'ordered' floating point maximum function.
+/// Floating point has one special value 'NaN'. Therefore, there is no total
+/// order. However, if we can ignore the 'NaN' value (for example, because of a
+/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
+/// semantics. In the presence of 'NaN' we have to preserve the original
+/// select(fcmp(ogt/ge, L, R), L, R) semantics matched by this predicate.
+///
+///                         max(L, R)  iff L and R are not NaN
+///  m_OrdFMax(L, R) =      R          iff L or R are NaN
+template<typename LHS, typename RHS>
+inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>
+m_OrdFMax(const LHS &L, const RHS &R) {
+  return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R);
+}
+
+/// \brief Match an 'ordered' floating point minimum function.
+/// Floating point has one special value 'NaN'. Therefore, there is no total
+/// order. However, if we can ignore the 'NaN' value (for example, because of a
+/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
+/// semantics. In the presence of 'NaN' we have to preserve the original
+/// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate.
+///
+///                         max(L, R)  iff L and R are not NaN
+///  m_OrdFMin(L, R) =      R          iff L or R are NaN
+template<typename LHS, typename RHS>
+inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>
+m_OrdFMin(const LHS &L, const RHS &R) {
+  return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R);
+}
+
+/// \brief Match an 'unordered' floating point maximum function.
+/// Floating point has one special value 'NaN'. Therefore, there is no total
+/// order. However, if we can ignore the 'NaN' value (for example, because of a
+/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
+/// semantics. In the presence of 'NaN' we have to preserve the original
+/// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate.
+///
+///                         max(L, R)  iff L and R are not NaN
+///  m_UnordFMin(L, R) =    L          iff L or R are NaN
+template<typename LHS, typename RHS>
+inline MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>
+m_UnordFMax(const LHS &L, const RHS &R) {
+  return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
+}
+
+/// \brief Match an 'unordered' floating point minimum function.
+/// Floating point has one special value 'NaN'. Therefore, there is no total
+/// order. However, if we can ignore the 'NaN' value (for example, because of a
+/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
+/// semantics. In the presence of 'NaN' we have to preserve the original
+/// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate.
+///
+///                          max(L, R)  iff L and R are not NaN
+///  m_UnordFMin(L, R) =     L          iff L or R are NaN
+template<typename LHS, typename RHS>
+inline MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>
+m_UnordFMin(const LHS &L, const RHS &R) {
+  return MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>(L, R);
 }
 
 template<typename Opnd_t>
diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h
index bf65011..fb177de 100644
--- a/include/llvm/Support/Program.h
+++ b/include/llvm/Support/Program.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_SUPPORT_PROGRAM_H
 #define LLVM_SUPPORT_PROGRAM_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/Path.h"
 
 namespace llvm {
@@ -140,6 +141,10 @@ namespace sys {
     /// @}
 
   };
+
+  // Return true if the given arguments fit within system-specific
+  // argument length limits.
+  bool argumentsFitWithinSystemLimits(ArrayRef<const char*> Args);
 }
 }
 
diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h
index 02abf92..d67914a 100644
--- a/include/llvm/Support/SourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -145,8 +145,8 @@ public:
   /// @param ShowColors - Display colored messages if output is a terminal and
   /// the default error handler is used.
   void PrintMessage(SMLoc Loc, DiagKind Kind, const Twine &Msg,
-                    ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
-                    ArrayRef<SMFixIt> FixIts = ArrayRef<SMFixIt>(),
+                    ArrayRef<SMRange> Ranges = None,
+                    ArrayRef<SMFixIt> FixIts = None,
                     bool ShowColors = true) const;
 
 
@@ -155,9 +155,9 @@ public:
   ///
   /// @param Msg If non-null, the kind of message (e.g., "error") which is
   /// prefixed to the message.
-  SMDiagnostic GetMessage(SMLoc Loc, DiagKind Kind, const Twine &Msg, 
-                          ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
-                          ArrayRef<SMFixIt> FixIts = ArrayRef<SMFixIt>()) const;
+  SMDiagnostic GetMessage(SMLoc Loc, DiagKind Kind, const Twine &Msg,
+                          ArrayRef<SMRange> Ranges = None,
+                          ArrayRef<SMFixIt> FixIts = None) const;
 
   /// PrintIncludeStack - Prints the names of included files and the line of the
   /// file they were included from.  A diagnostic handler can use this before
@@ -227,7 +227,7 @@ public:
                int Line, int Col, SourceMgr::DiagKind Kind,
                StringRef Msg, StringRef LineStr,
                ArrayRef<std::pair<unsigned,unsigned> > Ranges,
-               ArrayRef<SMFixIt> FixIts = ArrayRef<SMFixIt>());
+               ArrayRef<SMFixIt> FixIts = None);
 
   const SourceMgr *getSourceMgr() const { return SM; }
   SMLoc getLoc() const { return Loc; }
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index deee2eb..7de8b38 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -790,8 +790,8 @@ class AsmParser {
   // This can be used to perform target specific instruction post-processing.
   string AsmParserInstCleanup  = "";
 
-  //ShouldEmitMatchRegisterName - Set to false if the target needs a hand
-  //written register name matcher
+  // ShouldEmitMatchRegisterName - Set to false if the target needs a hand
+  // written register name matcher
   bit ShouldEmitMatchRegisterName = 1;
 }
 def DefaultAsmParser : AsmParser;
@@ -807,6 +807,9 @@ class AsmParserVariant {
   // assembly language.
   int Variant = 0;
 
+  // Name - The AsmParser variant name (e.g., AT&T vs Intel).
+  string Name = "";
+
   // CommentDelimiter - If given, the delimiter string used to recognize
   // comments which are hard coded in the .td assembler strings for individual
   // instructions.
@@ -860,9 +863,16 @@ class TokenAlias<string From, string To> {
 ///  def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
 ///  def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
 ///
-class MnemonicAlias<string From, string To> {
+/// Mnemonic aliases can also be constrained to specific variants, e.g.:
+///
+///  def : MnemonicAlias<"pushf", "pushfq", "att">, Requires<[In64BitMode]>;
+///
+/// If no variant (e.g., "att" or "intel") is specified then the alias is
+/// applied unconditionally.
+class MnemonicAlias<string From, string To, string VariantName = ""> {
   string FromMnemonic = From;
   string ToMnemonic = To;
+  string AsmVariantName = VariantName;
 
   // Predicates - Predicates that must be true for this remapping to happen.
   list<Predicate> Predicates = [];
diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h
index 2160e37..1fd0bd9 100644
--- a/include/llvm/Target/TargetCallingConv.h
+++ b/include/llvm/Target/TargetCallingConv.h
@@ -36,13 +36,15 @@ namespace ISD {
     static const uint64_t ByValOffs      = 4;
     static const uint64_t Nest           = 1ULL<<5;  ///< Nested fn static chain
     static const uint64_t NestOffs       = 5;
-    static const uint64_t ByValAlign     = 0xFULL << 6; ///< Struct alignment
-    static const uint64_t ByValAlignOffs = 6;
-    static const uint64_t Split          = 1ULL << 10;
-    static const uint64_t SplitOffs      = 10;
+    static const uint64_t Returned       = 1ULL<<6;  ///< Always returned
+    static const uint64_t ReturnedOffs   = 6;
+    static const uint64_t ByValAlign     = 0xFULL<<7; ///< Struct alignment
+    static const uint64_t ByValAlignOffs = 7;
+    static const uint64_t Split          = 1ULL<<11;
+    static const uint64_t SplitOffs      = 11;
     static const uint64_t OrigAlign      = 0x1FULL<<27;
     static const uint64_t OrigAlignOffs  = 27;
-    static const uint64_t ByValSize      = 0xffffffffULL << 32; ///< Struct size
+    static const uint64_t ByValSize      = 0xffffffffULL<<32; ///< Struct size
     static const uint64_t ByValSizeOffs  = 32;
 
     static const uint64_t One            = 1ULL; ///< 1 of this type, for shifts
@@ -51,23 +53,26 @@ namespace ISD {
   public:
     ArgFlagsTy() : Flags(0) { }
 
-    bool isZExt()   const { return Flags & ZExt; }
-    void setZExt()  { Flags |= One << ZExtOffs; }
+    bool isZExt()      const { return Flags & ZExt; }
+    void setZExt()     { Flags |= One << ZExtOffs; }
 
-    bool isSExt()   const { return Flags & SExt; }
-    void setSExt()  { Flags |= One << SExtOffs; }
+    bool isSExt()      const { return Flags & SExt; }
+    void setSExt()     { Flags |= One << SExtOffs; }
 
-    bool isInReg()  const { return Flags & InReg; }
-    void setInReg() { Flags |= One << InRegOffs; }
+    bool isInReg()     const { return Flags & InReg; }
+    void setInReg()    { Flags |= One << InRegOffs; }
 
-    bool isSRet()   const { return Flags & SRet; }
-    void setSRet()  { Flags |= One << SRetOffs; }
+    bool isSRet()      const { return Flags & SRet; }
+    void setSRet()     { Flags |= One << SRetOffs; }
 
-    bool isByVal()  const { return Flags & ByVal; }
-    void setByVal() { Flags |= One << ByValOffs; }
+    bool isByVal()     const { return Flags & ByVal; }
+    void setByVal()    { Flags |= One << ByValOffs; }
 
-    bool isNest()   const { return Flags & Nest; }
-    void setNest()  { Flags |= One << NestOffs; }
+    bool isNest()      const { return Flags & Nest; }
+    void setNest()     { Flags |= One << NestOffs; }
+
+    bool isReturned()  const { return Flags & Returned; }
+    void setReturned() { Flags |= One << ReturnedOffs; }
 
     unsigned getByValAlign() const {
       return (unsigned)
@@ -97,9 +102,6 @@ namespace ISD {
       Flags = (Flags & ~ByValSize) | (uint64_t(S) << ByValSizeOffs);
     }
 
-    /// getArgFlagsString - Returns the flags as a string, eg: "zext align:4".
-    std::string getArgFlagsString();
-
     /// getRawBits - Represent the flags as a bunch of bits.
     uint64_t getRawBits() const { return Flags; }
   };
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 0ba75e5..d49ce1c 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -774,6 +774,10 @@ public:
 
   /// FoldImmediate - 'Reg' is known to be defined by a move immediate
   /// instruction, try to fold the immediate into the use instruction.
+  /// If MRI->hasOneNonDBGUse(Reg) is true, and this function returns true,
+  /// then the caller may assume that DefMI has been erased from its parent
+  /// block. The caller may assume that it will not be erased by this
+  /// function otherwise.
   virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
                              unsigned Reg, MachineRegisterInfo *MRI) const {
     return false;
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 1786bd2..d5c9ebe 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -135,6 +135,11 @@ public:
                               const TargetLoweringObjectFile *TLOF);
   virtual ~TargetLoweringBase();
 
+protected:
+  /// \brief Initialize all of the actions to default values.
+  void initActions();
+
+public:
   const TargetMachine &getTargetMachine() const { return TM; }
   const DataLayout *getDataLayout() const { return TD; }
   const TargetLoweringObjectFile &getObjFileLowering() const { return TLOF; }
@@ -805,13 +810,6 @@ public:
     return PrefLoopAlignment;
   }
 
-  /// getShouldFoldAtomicFences - return whether the combiner should fold
-  /// fence MEMBARRIER instructions into the atomic intrinsic instructions.
-  ///
-  bool getShouldFoldAtomicFences() const {
-    return ShouldFoldAtomicFences;
-  }
-
   /// getInsertFencesFor - return whether the DAG builder should automatically
   /// insert fences and reduce ordering for atomics.
   ///
@@ -851,6 +849,9 @@ public:
   // the derived class constructor to configure this object for the target.
   //
 
+  /// \brief Reset the operation actions based on target options.
+  virtual void resetOperationActions() {}
+
 protected:
   /// setBooleanContents - Specify how the target extends the result of a
   /// boolean value from i1 to a wider type.  See getBooleanContents.
@@ -951,13 +952,17 @@ protected:
     RegClassForVT[VT.SimpleTy] = RC;
   }
 
-  /// clearRegisterClasses - remove all register classes
+  /// clearRegisterClasses - Remove all register classes.
   void clearRegisterClasses() {
-    for (unsigned i = 0 ; i<array_lengthof(RegClassForVT); i++)
-      RegClassForVT[i] = 0;
+    memset(RegClassForVT, 0,MVT::LAST_VALUETYPE * sizeof(TargetRegisterClass*));
+
     AvailableRegClasses.clear();
   }
 
+  /// \brief Remove all operation actions.
+  void clearOperationActions() {
+  }
+
   /// findRepresentativeClass - Return the largest legal super-reg register class
   /// of the register class for the specified type and its associated "cost".
   virtual std::pair<const TargetRegisterClass*, uint8_t>
@@ -1089,12 +1094,6 @@ protected:
     MinStackArgumentAlignment = Align;
   }
 
-  /// setShouldFoldAtomicFences - Set if the target's implementation of the
-  /// atomic operation intrinsics includes locking. Default is false.
-  void setShouldFoldAtomicFences(bool fold) {
-    ShouldFoldAtomicFences = fold;
-  }
-
   /// setInsertFencesForAtomic - Set if the DAG builder should
   /// automatically insert fences and reduce the order of atomic memory
   /// operations to Monotonic.
@@ -1352,11 +1351,6 @@ private:
   ///
   unsigned PrefLoopAlignment;
 
-  /// ShouldFoldAtomicFences - Whether fencing MEMBARRIER instructions should
-  /// be folded into the enclosed atomic intrinsic instruction by the
-  /// combiner.
-  bool ShouldFoldAtomicFences;
-
   /// InsertFencesForAtomic - Whether the DAG builder should automatically
   /// insert fences and reduce ordering for atomics.  (This will be set for
   /// for most architectures with weak memory ordering.)
@@ -1511,6 +1505,7 @@ public:
       // or until the element integer type is too big. If a legal type was not
       // found, fallback to the usual mechanism of widening/splitting the
       // vector.
+      EVT OldEltVT = EltVT;
       while (1) {
         // Increase the bitwidth of the element to the next pow-of-two
         // (which is greater than 8 bits).
@@ -1529,6 +1524,10 @@ public:
           return LegalizeKind(TypePromoteInteger,
                               EVT::getVectorVT(Context, EltVT, NumElts));
       }
+
+      // Reset the type to the unexpanded type if we did not find a legal vector
+      // type with a promoted vector element type.
+      EltVT = OldEltVT;
     }
 
     // Try to widen the vector until a legal type is found.
@@ -1893,16 +1892,18 @@ public:
   struct ArgListEntry {
     SDValue Node;
     Type* Ty;
-    bool isSExt  : 1;
-    bool isZExt  : 1;
-    bool isInReg : 1;
-    bool isSRet  : 1;
-    bool isNest  : 1;
-    bool isByVal : 1;
+    bool isSExt     : 1;
+    bool isZExt     : 1;
+    bool isInReg    : 1;
+    bool isSRet     : 1;
+    bool isNest     : 1;
+    bool isByVal    : 1;
+    bool isReturned : 1;
     uint16_t Alignment;
 
     ArgListEntry() : isSExt(false), isZExt(false), isInReg(false),
-      isSRet(false), isNest(false), isByVal(false), Alignment(0) { }
+      isSRet(false), isNest(false), isByVal(false), isReturned(false),
+      Alignment(0) { }
   };
   typedef std::vector<ArgListEntry> ArgListTy;
 
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 66f3a3c..37a79fe 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -32,6 +32,7 @@ class MCContext;
 class PassManagerBase;
 class Target;
 class DataLayout;
+class TargetLibraryInfo;
 class TargetFrameLowering;
 class TargetInstrInfo;
 class TargetIntrinsicInfo;
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index c31db24..c763a59 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -208,6 +208,7 @@ namespace llvm {
     /// the value of this option.
     FPOpFusion::FPOpFusionMode AllowFPOpFusion;
 
+    bool operator==(const TargetOptions &);
   };
 } // End llvm namespace
 
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 83bd787..d89a6e6 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -414,9 +414,6 @@ def prefetch   : SDNode<"ISD::PREFETCH"   , SDTPrefetch,
 def readcyclecounter : SDNode<"ISD::READCYCLECOUNTER", SDTIntLeaf,
                      [SDNPHasChain, SDNPSideEffect]>;
 
-def membarrier : SDNode<"ISD::MEMBARRIER" , SDTMemBarrier,
-                        [SDNPHasChain, SDNPSideEffect]>;
-
 def atomic_fence : SDNode<"ISD::ATOMIC_FENCE" , SDTAtomicFence,
                           [SDNPHasChain, SDNPSideEffect]>;
 
diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 209f68d..563721e 100644
--- a/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -103,7 +103,8 @@ public:
   bool DisableSimplifyLibCalls;
   bool DisableUnitAtATime;
   bool DisableUnrollLoops;
-  bool Vectorize;
+  bool BBVectorize;
+  bool SLPVectorize;
   bool LoopVectorize;
 
 private:
diff --git a/include/llvm/Transforms/Utils/BlackList.h b/include/llvm/Transforms/Utils/BlackList.h
index f19470e..316b364 100644
--- a/include/llvm/Transforms/Utils/BlackList.h
+++ b/include/llvm/Transforms/Utils/BlackList.h
@@ -20,6 +20,7 @@
 // global-init:*global_with_initialization_issues*
 // global-init-type:*Namespace::ClassName*
 // src:file_with_tricky_code.cc
+// global-init-src:ignore-global-initializers-issues.cc
 // ---
 // Note that the wild card is in fact an llvm::Regex, but * is automatically
 // replaced with .*
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 687c9d5..2678250 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -234,12 +234,12 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &TD, User *GEP,
 ///  Dbg Intrinsic utilities
 ///
 
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
 /// that has an associated llvm.dbg.decl intrinsic.
 bool ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                      StoreInst *SI, DIBuilder &Builder);
 
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
 /// that has an associated llvm.dbg.decl intrinsic.
 bool ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                      LoadInst *LI, DIBuilder &Builder);
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
index d205dbd..8d0db16 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -117,6 +117,12 @@ createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
 Pass *createLoopVectorizePass();
 
 //===----------------------------------------------------------------------===//
+//
+// SLPVectorizer - Create a bottom-up SLP vectorizer pass.
+//
+Pass *createSLPVectorizerPass();
+
+//===----------------------------------------------------------------------===//
 /// @brief Vectorize the BasicBlock.
 ///
 /// @param BB The BasicBlock to be vectorized
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 66e416cd..349c417 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -11,6 +11,8 @@
 #include "llvm-c/Initialization.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassRegistry.h"
 #include <cstring>
 
 using namespace llvm;
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index ae6da1a..f8509dd 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -88,7 +88,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
                               const TargetLibraryInfo &TLI,
                               bool RoundToAlign = false) {
   uint64_t Size;
-  if (getUnderlyingObjectSize(V, Size, &TD, &TLI, RoundToAlign))
+  if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign))
     return Size;
   return AliasAnalysis::UnknownSize;
 }
@@ -98,6 +98,35 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
 static bool isObjectSmallerThan(const Value *V, uint64_t Size,
                                 const DataLayout &TD,
                                 const TargetLibraryInfo &TLI) {
+  // Note that the meanings of the "object" are slightly different in the
+  // following contexts:
+  //    c1: llvm::getObjectSize()
+  //    c2: llvm.objectsize() intrinsic
+  //    c3: isObjectSmallerThan()
+  // c1 and c2 share the same meaning; however, the meaning of "object" in c3
+  // refers to the "entire object".
+  //
+  //  Consider this example:
+  //     char *p = (char*)malloc(100)
+  //     char *q = p+80;
+  //
+  //  In the context of c1 and c2, the "object" pointed by q refers to the
+  // stretch of memory of q[0:19]. So, getObjectSize(q) should return 20.
+  //
+  //  However, in the context of c3, the "object" refers to the chunk of memory
+  // being allocated. So, the "object" has 100 bytes, and q points to the middle
+  // the "object". In case q is passed to isObjectSmallerThan() as the 1st
+  // parameter, before the llvm::getObjectSize() is called to get the size of
+  // entire object, we should:
+  //    - either rewind the pointer q to the base-address of the object in
+  //      question (in this case rewind to p), or
+  //    - just give up. It is up to caller to make sure the pointer is pointing
+  //      to the base address the object.
+  // 
+  // We go for 2nd option for simplicity.
+  if (!isIdentifiedObject(V))
+    return false;
+
   // This function needs to use the aligned object size because we allow
   // reads a bit past the end given sufficient alignment.
   uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true);
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 09d7608..bc0dffc 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -550,7 +551,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 
 
   if (Opc == Instruction::And && DL) {
-    unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType());
+    unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType());
     APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
     APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
     ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL);
@@ -880,19 +881,20 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
 }
 
-/// ConstantFoldConstantExpression - Attempt to fold the constant expression
-/// using the specified DataLayout.  If successful, the constant result is
-/// result is returned, if not, null is returned.
-Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
-                                               const DataLayout *TD,
-                                               const TargetLibraryInfo *TLI) {
-  SmallVector<Constant*, 8> Ops;
-  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
-       i != e; ++i) {
+static Constant *
+ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
+                                   const TargetLibraryInfo *TLI,
+                                   SmallPtrSet<ConstantExpr *, 4> &FoldedOps) {
+  SmallVector<Constant *, 8> Ops;
+  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e;
+       ++i) {
     Constant *NewC = cast<Constant>(*i);
-    // Recursively fold the ConstantExpr's operands.
-    if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
-      NewC = ConstantFoldConstantExpression(NewCE, TD, TLI);
+    // Recursively fold the ConstantExpr's operands. If we have already folded
+    // a ConstantExpr, we don't have to process it again.
+    if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
+      if (FoldedOps.insert(NewCE))
+        NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps);
+    }
     Ops.push_back(NewC);
   }
 
@@ -902,6 +904,16 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
   return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
 }
 
+/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// using the specified DataLayout.  If successful, the constant result is
+/// result is returned, if not, null is returned.
+Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
+                                               const DataLayout *TD,
+                                               const TargetLibraryInfo *TLI) {
+  SmallPtrSet<ConstantExpr *, 4> FoldedOps;
+  return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps);
+}
+
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
 /// specified opcode and operands.  If successful, the constant result is
 /// returned, if not, null is returned.  Note that this function can fail when
diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp
index aa5164e..1c1816d 100644
--- a/lib/Analysis/IPA/IPA.cpp
+++ b/lib/Analysis/IPA/IPA.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 #include "llvm-c/Initialization.h"
 
 using namespace llvm;
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 4a3c74e..bf77451 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1711,7 +1711,7 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
 //    subobject at its beginning) or function, both are pointers to one past the
 //    last element of the same array object, or one is a pointer to one past the
 //    end of one array object and the other is a pointer to the start of a
-//    different array object that happens to immediately follow the ﬁrst array
+//    different array object that happens to immediately follow the first array
 //    object in the address space.)
 //
 // C11's version is more restrictive, however there's no reason why an argument
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index d490d54..9c0d8ac 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -364,26 +364,6 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
   return true;
 }
 
-/// \brief Compute the size of the underlying object pointed by Ptr. Returns
-/// true and the object size in Size if successful, and false otherwise.
-/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
-/// byval arguments, and global variables.
-bool llvm::getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size,
-                                   const DataLayout *TD,
-                                   const TargetLibraryInfo *TLI,
-                                   bool RoundToAlign) {
-  if (!TD)
-    return false;
-
-  ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign);
-  SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
-  if (!Visitor.knownSize(Data))
-    return false;
-
-  Size = Data.first.getZExtValue();
-  return true;
-}
-
 
 STATISTIC(ObjectVisitorArgument,
           "Number of arguments with unsolved size and offset");
@@ -409,23 +389,16 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD,
 
 SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
   V = V->stripPointerCasts();
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    // If we have already seen this instruction, bail out. Cycles can happen in
+    // unreachable code after constant propagation.
+    if (!SeenInsts.insert(I))
+      return unknown();
 
-  if (isa<Instruction>(V) || isa<GEPOperator>(V)) {
-    // Return cached value or insert unknown in cache if size of V was not
-    // computed yet in order to avoid recursions in PHis.
-    std::pair<CacheMapTy::iterator, bool> CacheVal =
-      CacheMap.insert(std::make_pair(V, unknown()));
-    if (!CacheVal.second)
-      return CacheVal.first->second;
-
-    SizeOffsetType Result;
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
-      Result = visitGEPOperator(*GEP);
-    else
-      Result = visit(cast<Instruction>(*V));
-    return CacheMap[V] = Result;
+      return visitGEPOperator(*GEP);
+    return visit(*I);
   }
-
   if (Argument *A = dyn_cast<Argument>(V))
     return visitArgument(*A);
   if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V))
@@ -439,6 +412,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     if (CE->getOpcode() == Instruction::IntToPtr)
       return unknown(); // clueless
+    if (CE->getOpcode() == Instruction::GetElementPtr)
+      return visitGEPOperator(cast<GEPOperator>(*CE));
   }
 
   DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V
@@ -572,21 +547,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) {
   return unknown();
 }
 
-SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) {
-  if (PHI.getNumIncomingValues() == 0)
-    return unknown();
-
-  SizeOffsetType Ret = compute(PHI.getIncomingValue(0));
-  if (!bothKnown(Ret))
-    return unknown();
-
-  // Verify that all PHI incoming pointers have the same size and offset.
-  for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) {
-    SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i));
-    if (!bothKnown(EdgeData) || EdgeData != Ret)
-      return unknown();
-  }
-  return Ret;
+SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) {
+  // too complex to analyze statically.
+  return unknown();
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 2240e9d..c0009cb 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -47,9 +47,7 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
           "Number of block queries that were completely cached");
 
 // Limit for the number of instructions to scan in a block.
-// FIXME: Figure out what a sane value is for this.
-//        (500 is relatively insane.)
-static const int BlockScanLimit = 500;
+static const int BlockScanLimit = 100;
 
 char MemoryDependenceAnalysis::ID = 0;
 
@@ -913,7 +911,6 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
                             SmallVectorImpl<NonLocalDepResult> &Result,
                             DenseMap<BasicBlock*, Value*> &Visited,
                             bool SkipFirstBlock) {
-
   // Look up the cached info for Pointer.
   ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
 
@@ -1001,8 +998,17 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
          I != E; ++I) {
       Visited.insert(std::make_pair(I->getBB(), Addr));
-      if (!I->getResult().isNonLocal() && DT->isReachableFromEntry(I->getBB()))
+      if (I->getResult().isNonLocal()) {
+        continue;
+      }
+
+      if (!DT) {
+        Result.push_back(NonLocalDepResult(I->getBB(),
+                                           MemDepResult::getUnknown(),
+                                           Addr));
+      } else if (DT->isReachableFromEntry(I->getBB())) {
         Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
+      }
     }
     ++NumCacheCompleteNonLocalPtr;
     return false;
@@ -1047,9 +1053,16 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
                                                  NumSortedEntries);
 
       // If we got a Def or Clobber, add this to the list of results.
-      if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) {
-        Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
-        continue;
+      if (!Dep.isNonLocal()) {
+        if (!DT) {
+          Result.push_back(NonLocalDepResult(BB,
+                                             MemDepResult::getUnknown(),
+                                             Pointer.getAddr()));
+          continue;
+        } else if (DT->isReachableFromEntry(BB)) {
+          Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
+          continue;
+        }
       }
     }
 
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index fad5074..8577025 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -79,10 +79,43 @@ void Region::replaceExit(BasicBlock *BB) {
   exit = BB;
 }
 
+void Region::replaceEntryRecursive(BasicBlock *NewEntry) {
+  std::vector<Region *> RegionQueue;
+  BasicBlock *OldEntry = getEntry();
+
+  RegionQueue.push_back(this);
+  while (!RegionQueue.empty()) {
+    Region *R = RegionQueue.back();
+    RegionQueue.pop_back();
+
+    R->replaceEntry(NewEntry);
+    for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+      if ((*RI)->getEntry() == OldEntry)
+        RegionQueue.push_back(*RI);
+  }
+}
+
+void Region::replaceExitRecursive(BasicBlock *NewExit) {
+  std::vector<Region *> RegionQueue;
+  BasicBlock *OldExit = getExit();
+
+  RegionQueue.push_back(this);
+  while (!RegionQueue.empty()) {
+    Region *R = RegionQueue.back();
+    RegionQueue.pop_back();
+
+    R->replaceExit(NewExit);
+    for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+      if ((*RI)->getExit() == OldExit)
+        RegionQueue.push_back(*RI);
+  }
+}
+
 bool Region::contains(const BasicBlock *B) const {
   BasicBlock *BB = const_cast<BasicBlock*>(B);
 
-  assert(DT->getNode(BB) && "BB not part of the dominance tree");
+  if (!DT->getNode(BB))
+    return false;
 
   BasicBlock *entry = getEntry(), *exit = getExit();
 
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 6ea915f..f876748 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -3937,10 +3937,19 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
 /// before taking the branch. For loops with multiple exits, it may not be the
 /// number times that the loop header executes because the loop may exit
 /// prematurely via another branch.
+///
+/// FIXME: We conservatively call getBackedgeTakenCount(L) instead of
+/// getExitCount(L, ExitingBlock) to compute a safe trip count considering all
+/// loop exits. getExitCount() may return an exact count for this branch
+/// assuming no-signed-wrap. The number of well-defined iterations may actually
+/// be higher than this trip count if this exit test is skipped and the loop
+/// exits via a different branch. Ideally, getExitCount() would know whether it
+/// depends on a NSW assumption, and we would only fall back to a conservative
+/// trip count in that case.
 unsigned ScalarEvolution::
-getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) {
+getSmallConstantTripCount(Loop *L, BasicBlock */*ExitingBlock*/) {
   const SCEVConstant *ExitCount =
-    dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
+    dyn_cast<SCEVConstant>(getBackedgeTakenCount(L));
   if (!ExitCount)
     return 0;
 
@@ -3967,8 +3976,8 @@ getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) {
 /// As explained in the comments for getSmallConstantTripCount, this assumes
 /// that control exits the loop via ExitingBlock.
 unsigned ScalarEvolution::
-getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
-  const SCEV *ExitCount = getExitCount(L, ExitingBlock);
+getSmallConstantTripMultiple(Loop *L, BasicBlock */*ExitingBlock*/) {
+  const SCEV *ExitCount = getBackedgeTakenCount(L);
   if (ExitCount == getCouldNotCompute())
     return 1;
 
@@ -3997,7 +4006,7 @@ getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
 }
 
 // getExitCount - Get the expression for the number of loop iterations for which
-// this loop is guaranteed not to exit via ExitintBlock. Otherwise return
+// this loop is guaranteed not to exit via ExitingBlock. Otherwise return
 // SCEVCouldNotCompute.
 const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
   return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
@@ -4382,26 +4391,36 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
   // Proceed to the next level to examine the exit condition expression.
   return ComputeExitLimitFromCond(L, ExitBr->getCondition(),
                                   ExitBr->getSuccessor(0),
-                                  ExitBr->getSuccessor(1));
+                                  ExitBr->getSuccessor(1),
+                                  /*IsSubExpr=*/false);
 }
 
 /// ComputeExitLimitFromCond - Compute the number of times the
 /// backedge of the specified loop will execute if its exit condition
 /// were a conditional branch of ExitCond, TBB, and FBB.
+///
+/// @param IsSubExpr is true if ExitCond does not directly control the exit
+/// branch. In this case, we cannot assume that the loop only exits when the
+/// condition is true and cannot infer that failing to meet the condition prior
+/// to integer wraparound results in undefined behavior.
 ScalarEvolution::ExitLimit
 ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
                                           Value *ExitCond,
                                           BasicBlock *TBB,
-                                          BasicBlock *FBB) {
+                                          BasicBlock *FBB,
+                                          bool IsSubExpr) {
   // Check if the controlling expression for this loop is an And or Or.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
     if (BO->getOpcode() == Instruction::And) {
       // Recurse on the operands of the and.
-      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
-      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
+      bool EitherMayExit = L->contains(TBB);
+      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+                                               IsSubExpr || EitherMayExit);
+      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+                                               IsSubExpr || EitherMayExit);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
-      if (L->contains(TBB)) {
+      if (EitherMayExit) {
         // Both conditions must be true for the loop to continue executing.
         // Choose the less conservative count.
         if (EL0.Exact == getCouldNotCompute() ||
@@ -4429,11 +4448,14 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
     }
     if (BO->getOpcode() == Instruction::Or) {
       // Recurse on the operands of the or.
-      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB);
-      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB);
+      bool EitherMayExit = L->contains(FBB);
+      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+                                               IsSubExpr || EitherMayExit);
+      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+                                               IsSubExpr || EitherMayExit);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
-      if (L->contains(FBB)) {
+      if (EitherMayExit) {
         // Both conditions must be false for the loop to continue executing.
         // Choose the less conservative count.
         if (EL0.Exact == getCouldNotCompute() ||
@@ -4464,7 +4486,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
   // With an icmp, it may be feasible to compute an exact backedge-taken count.
   // Proceed to the next level to examine the icmp.
   if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
-    return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB);
+    return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, IsSubExpr);
 
   // Check for a constant condition. These are normally stripped out by
   // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -4490,7 +4512,8 @@ ScalarEvolution::ExitLimit
 ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
                                           ICmpInst *ExitCond,
                                           BasicBlock *TBB,
-                                          BasicBlock *FBB) {
+                                          BasicBlock *FBB,
+                                          bool IsSubExpr) {
 
   // If the condition was exit on true, convert the condition to exit on false
   ICmpInst::Predicate Cond;
@@ -4542,7 +4565,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
   switch (Cond) {
   case ICmpInst::ICMP_NE: {                     // while (X != Y)
     // Convert to: while (X-Y != 0)
-    ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+    ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
@@ -4553,24 +4576,24 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
     break;
   }
   case ICmpInst::ICMP_SLT: {
-    ExitLimit EL = HowManyLessThans(LHS, RHS, L, true);
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, true, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_SGT: {
     ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
-                                             getNotSCEV(RHS), L, true);
+                                    getNotSCEV(RHS), L, true, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_ULT: {
-    ExitLimit EL = HowManyLessThans(LHS, RHS, L, false);
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, false, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
   case ICmpInst::ICMP_UGT: {
     ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
-                                             getNotSCEV(RHS), L, false);
+                                    getNotSCEV(RHS), L, false, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
@@ -5439,7 +5462,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
 /// effectively V != 0.  We know and take advantage of the fact that this
 /// expression only being used in a comparison by zero context.
 ScalarEvolution::ExitLimit
-ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
   // If the value is a constant
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     // If the value is already zero, the branch will execute zero times.
@@ -5537,19 +5560,20 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   }
 
   // If the recurrence is known not to wraparound, unsigned divide computes the
-  // back edge count. We know that the value will either become zero (and thus
-  // the loop terminates), that the loop will terminate through some other exit
-  // condition first, or that the loop has undefined behavior.  This means
-  // we can't "miss" the exit value, even with nonunit stride.
+  // back edge count. (Ideally we would have an "isexact" bit for udiv). We know
+  // that the value will either become zero (and thus the loop terminates), that
+  // the loop will terminate through some other exit condition first, or that
+  // the loop has undefined behavior.  This means we can't "miss" the exit
+  // value, even with nonunit stride.
   //
-  // FIXME: Prove that loops always exhibits *acceptable* undefined
-  // behavior. Loops must exhibit defined behavior until a wrapped value is
-  // actually used. So the trip count computed by udiv could be smaller than the
-  // number of well-defined iterations.
-  if (AddRec->getNoWrapFlags(SCEV::FlagNW)) {
-    // FIXME: We really want an "isexact" bit for udiv.
+  // This is only valid for expressions that directly compute the loop exit. It
+  // is invalid for subexpressions in which the loop may exit through this
+  // branch even if this subexpression is false. In that case, the trip count
+  // computed by this udiv could be smaller than the number of well-defined
+  // iterations.
+  if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW))
     return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
-  }
+
   // Then, try to solve the above equation provided that Start is constant.
   if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
     return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
@@ -6315,9 +6339,14 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
 /// HowManyLessThans - Return the number of times a backedge containing the
 /// specified less-than comparison will execute.  If not computable, return
 /// CouldNotCompute.
+///
+/// @param IsSubExpr is true when the LHS < RHS condition does not directly
+/// control the branch. In this case, we can only compute an iteration count for
+/// a subexpression that cannot overflow before evaluating true.
 ScalarEvolution::ExitLimit
 ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
-                                  const Loop *L, bool isSigned) {
+                                  const Loop *L, bool isSigned,
+                                  bool IsSubExpr) {
   // Only handle:  "ADDREC < LoopInvariant".
   if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
 
@@ -6326,10 +6355,12 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     return getCouldNotCompute();
 
   // Check to see if we have a flag which makes analysis easy.
-  bool NoWrap = isSigned ?
-    AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNW)) :
-    AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNW));
-
+  bool NoWrap = false;
+  if (!IsSubExpr) {
+    NoWrap = AddRec->getNoWrapFlags(
+      (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
+                          | SCEV::FlagNW));
+  }
   if (AddRec->isAffine()) {
     unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
     const SCEV *Step = AddRec->getStepRecurrence(*this);
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 68e43b2..bbf3c3a 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -71,6 +71,7 @@ using namespace llvm;
 // achieved by stripping the !tbaa tags from IR, but this option is sometimes
 // more convenient.
 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false));
 
 namespace {
   /// TBAANode - This is a simple wrapper around an MDNode which provides a
@@ -109,6 +110,97 @@ namespace {
       return CI->getValue()[0];
     }
   };
+
+  /// This is a simple wrapper around an MDNode which provides a
+  /// higher-level interface by hiding the details of how alias analysis
+  /// information is encoded in its operands.
+  class TBAAStructTagNode {
+    /// This node should be created with createTBAAStructTagNode.
+    const MDNode *Node;
+
+  public:
+    TBAAStructTagNode() : Node(0) {}
+    explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
+
+    /// Get the MDNode for this TBAAStructTagNode.
+    const MDNode *getNode() const { return Node; }
+
+    const MDNode *getBaseType() const {
+      return dyn_cast_or_null<MDNode>(Node->getOperand(0));
+    }
+    const MDNode *getAccessType() const {
+      return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+    }
+    uint64_t getOffset() const {
+      return cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+    }
+    /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
+    /// objects which are not modified (by any means) in the context where this
+    /// AliasAnalysis is relevant.
+    bool TypeIsImmutable() const {
+      if (Node->getNumOperands() < 4)
+        return false;
+      ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3));
+      if (!CI)
+        return false;
+      return CI->getValue()[0];
+    }
+  };
+
+  /// This is a simple wrapper around an MDNode which provides a
+  /// higher-level interface by hiding the details of how alias analysis
+  /// information is encoded in its operands.
+  class TBAAStructTypeNode {
+    /// This node should be created with createTBAAStructTypeNode.
+    const MDNode *Node;
+
+  public:
+    TBAAStructTypeNode() : Node(0) {}
+    explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
+
+    /// Get the MDNode for this TBAAStructTypeNode.
+    const MDNode *getNode() const { return Node; }
+
+    /// Get this TBAAStructTypeNode's field in the type DAG with
+    /// given offset. Update the offset to be relative to the field type.
+    TBAAStructTypeNode getParent(uint64_t &Offset) const {
+      // Parent can be omitted for the root node.
+      if (Node->getNumOperands() < 2)
+        return TBAAStructTypeNode();
+
+      // Special handling for a scalar type node. 
+      if (Node->getNumOperands() <= 3) {
+        MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+        if (!P)
+          return TBAAStructTypeNode();
+        return TBAAStructTypeNode(P);
+      }
+
+      // Assume the offsets are in order. We return the previous field if
+      // the current offset is bigger than the given offset.
+      unsigned TheIdx = 0;
+      for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
+        uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))->
+                         getZExtValue();
+        if (Cur > Offset) {
+          assert(Idx >= 3 &&
+                 "TBAAStructTypeNode::getParent should have an offset match!");
+          TheIdx = Idx - 2;
+          break;
+        }
+      }
+      // Move along the last field.
+      if (TheIdx == 0)
+        TheIdx = Node->getNumOperands() - 2;
+      uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))->
+                       getZExtValue();
+      Offset -= Cur;
+      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+      if (!P)
+        return TBAAStructTypeNode();
+      return TBAAStructTypeNode(P);
+    }
+  };
 }
 
 namespace {
@@ -137,6 +229,7 @@ namespace {
     }
 
     bool Aliases(const MDNode *A, const MDNode *B) const;
+    bool PathAliases(const MDNode *A, const MDNode *B) const;
 
   private:
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -171,6 +264,9 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 bool
 TypeBasedAliasAnalysis::Aliases(const MDNode *A,
                                 const MDNode *B) const {
+  if (EnableStructPathTBAA)
+    return PathAliases(A, B);
+
   // Keep track of the root node for A and B.
   TBAANode RootA, RootB;
 
@@ -209,6 +305,67 @@ TypeBasedAliasAnalysis::Aliases(const MDNode *A,
   return false;
 }
 
+/// Test whether the struct-path tag represented by A may alias the
+/// struct-path tag represented by B.
+bool
+TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
+                                    const MDNode *B) const {
+  // Keep track of the root node for A and B.
+  TBAAStructTypeNode RootA, RootB;
+  TBAAStructTagNode TagA(A), TagB(B);
+
+  // TODO: We need to check if AccessType of TagA encloses AccessType of
+  // TagB to support aggregate AccessType. If yes, return true.
+
+  // Start from the base type of A, follow the edge with the correct offset in
+  // the type DAG and adjust the offset until we reach the base type of B or
+  // until we reach the Root node.
+  // Compare the adjusted offset once we have the same base.
+
+  // Climb the type DAG from base type of A to see if we reach base type of B.
+  const MDNode *BaseA = TagA.getBaseType();
+  const MDNode *BaseB = TagB.getBaseType();
+  uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
+  for (TBAAStructTypeNode T(BaseA); ; ) {
+    if (T.getNode() == BaseB)
+      // Base type of A encloses base type of B, check if the offsets match.
+      return OffsetA == OffsetB;
+
+    RootA = T;
+    // Follow the edge with the correct offset, OffsetA will be adjusted to
+    // be relative to the field type.
+    T = T.getParent(OffsetA);
+    if (!T.getNode())
+      break;
+  }
+
+  // Reset OffsetA and climb the type DAG from base type of B to see if we reach
+  // base type of A.
+  OffsetA = TagA.getOffset();
+  for (TBAAStructTypeNode T(BaseB); ; ) {
+    if (T.getNode() == BaseA)
+      // Base type of B encloses base type of A, check if the offsets match.
+      return OffsetA == OffsetB;
+
+    RootB = T;
+    // Follow the edge with the correct offset, OffsetB will be adjusted to
+    // be relative to the field type.
+    T = T.getParent(OffsetB);
+    if (!T.getNode())
+      break;
+  }
+
+  // Neither node is an ancestor of the other.
+
+  // If they have different roots, they're part of different potentially
+  // unrelated type systems, so we must be conservative.
+  if (RootA.getNode() != RootB.getNode())
+    return true;
+
+  // If they have the same root, then we've proved there's no alias.
+  return false;
+}
+
 AliasAnalysis::AliasResult
 TypeBasedAliasAnalysis::alias(const Location &LocA,
                               const Location &LocB) {
@@ -240,7 +397,8 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
 
   // If this is an "immutable" type, we can assume the pointer is pointing
   // to constant memory.
-  if (TBAANode(M).TypeIsImmutable())
+  if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
+      (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
     return true;
 
   return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
@@ -256,7 +414,8 @@ TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
   // If this is an "immutable" type, we can assume the call doesn't write
   // to memory.
   if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
-    if (TBAANode(M).TypeIsImmutable())
+    if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
+        (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
       Min = OnlyReadsMemory;
 
   return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
@@ -298,3 +457,55 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
 
   return AliasAnalysis::getModRefInfo(CS1, CS2);
 }
+
+MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
+  if (!A || !B)
+    return NULL;
+
+  if (A == B)
+    return A;
+
+  // For struct-path aware TBAA, we use the access type of the tag.
+  if (EnableStructPathTBAA) {
+    A = cast_or_null<MDNode>(A->getOperand(1));
+    if (!A) return 0;
+    B = cast_or_null<MDNode>(B->getOperand(1));
+    if (!B) return 0;
+  }
+
+  SmallVector<MDNode *, 4> PathA;
+  MDNode *T = A;
+  while (T) {
+    PathA.push_back(T);
+    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+  }
+
+  SmallVector<MDNode *, 4> PathB;
+  T = B;
+  while (T) {
+    PathB.push_back(T);
+    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+  }
+
+  int IA = PathA.size() - 1;
+  int IB = PathB.size() - 1;
+
+  MDNode *Ret = 0;
+  while (IA >= 0 && IB >=0) {
+    if (PathA[IA] == PathB[IB])
+      Ret = PathA[IA];
+    else
+      break;
+    --IA;
+    --IB;
+  }
+  if (!EnableStructPathTBAA)
+    return Ret;
+
+  if (!Ret)
+    return 0;
+  // We need to convert from a type node to a tag node.
+  Type *Int64 = IntegerType::get(A->getContext(), 64);
+  Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) };
+  return MDNode::get(A->getContext(), Ops);
+}
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index f46383b..e7a9f2a 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -582,6 +582,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(optsize);
   KEYWORD(readnone);
   KEYWORD(readonly);
+  KEYWORD(returned);
   KEYWORD(returns_twice);
   KEYWORD(signext);
   KEYWORD(sret);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index c8da1f8..62d8070d 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -528,7 +528,7 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) {
   if (Result) return false;
 
   // Otherwise, create MDNode forward reference.
-  MDNode *FwdNode = MDNode::getTemporary(Context, ArrayRef<Value*>());
+  MDNode *FwdNode = MDNode::getTemporary(Context, None);
   ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
 
   if (NumberedMetadata.size() <= MID)
@@ -878,8 +878,9 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
 
     // Target-independent attributes:
     case lltok::kw_align: {
-      // As a hack, we allow "align 2" on functions as a synonym for "alignstack
-      // 2".
+      // As a hack, we allow function alignment to be initially parsed as an
+      // attribute on a function declaration/definition or added to an attribute
+      // group and later moved to the alignment field.
       unsigned Alignment;
       if (inAttrGrp) {
         Lex.Lex();
@@ -943,6 +944,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
     case lltok::kw_nest:
     case lltok::kw_noalias:
     case lltok::kw_nocapture:
+    case lltok::kw_returned:
     case lltok::kw_sret:
       HaveError |=
         Error(Lex.getLoc(),
@@ -1155,21 +1157,35 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
     case lltok::kw_nest:            B.addAttribute(Attribute::Nest); break;
     case lltok::kw_noalias:         B.addAttribute(Attribute::NoAlias); break;
     case lltok::kw_nocapture:       B.addAttribute(Attribute::NoCapture); break;
+    case lltok::kw_returned:        B.addAttribute(Attribute::Returned); break;
     case lltok::kw_signext:         B.addAttribute(Attribute::SExt); break;
     case lltok::kw_sret:            B.addAttribute(Attribute::StructRet); break;
     case lltok::kw_zeroext:         B.addAttribute(Attribute::ZExt); break;
 
-    case lltok::kw_alignstack:      case lltok::kw_nounwind:
-    case lltok::kw_alwaysinline:    case lltok::kw_optsize:
-    case lltok::kw_inlinehint:      case lltok::kw_readnone:
-    case lltok::kw_minsize:         case lltok::kw_readonly:
-    case lltok::kw_naked:           case lltok::kw_returns_twice:
-    case lltok::kw_nobuiltin:       case lltok::kw_sanitize_address:
-    case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_memory:
-    case lltok::kw_noinline:        case lltok::kw_sanitize_thread:
-    case lltok::kw_nonlazybind:     case lltok::kw_ssp:
-    case lltok::kw_noredzone:       case lltok::kw_sspreq:
-    case lltok::kw_noreturn:        case lltok::kw_uwtable:
+    case lltok::kw_alignstack:
+    case lltok::kw_alwaysinline:
+    case lltok::kw_inlinehint:
+    case lltok::kw_minsize:
+    case lltok::kw_naked:
+    case lltok::kw_nobuiltin:
+    case lltok::kw_noduplicate:
+    case lltok::kw_noimplicitfloat:
+    case lltok::kw_noinline:
+    case lltok::kw_nonlazybind:
+    case lltok::kw_noredzone:
+    case lltok::kw_noreturn:
+    case lltok::kw_nounwind:
+    case lltok::kw_optsize:
+    case lltok::kw_readnone:
+    case lltok::kw_readonly:
+    case lltok::kw_returns_twice:
+    case lltok::kw_sanitize_address:
+    case lltok::kw_sanitize_memory:
+    case lltok::kw_sanitize_thread:
+    case lltok::kw_ssp:
+    case lltok::kw_sspreq:
+    case lltok::kw_sspstrong:
+    case lltok::kw_uwtable:
       HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
       break;
     }
@@ -1195,24 +1211,39 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
     case lltok::kw_zeroext:         B.addAttribute(Attribute::ZExt); break;
 
     // Error handling.
-    case lltok::kw_sret:  case lltok::kw_nocapture:
-    case lltok::kw_byval: case lltok::kw_nest:
+    case lltok::kw_align:
+    case lltok::kw_byval:
+    case lltok::kw_nest:
+    case lltok::kw_nocapture:
+    case lltok::kw_returned:
+    case lltok::kw_sret:
       HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
       break;
 
-    case lltok::kw_align:                 case lltok::kw_noreturn:
-    case lltok::kw_alignstack:            case lltok::kw_nounwind:
-    case lltok::kw_alwaysinline:          case lltok::kw_optsize:
-    case lltok::kw_inlinehint:            case lltok::kw_readnone:
-    case lltok::kw_minsize:               case lltok::kw_readonly:
-    case lltok::kw_naked:                 case lltok::kw_returns_twice:
-    case lltok::kw_nobuiltin:             case lltok::kw_sanitize_address:
-    case lltok::kw_noduplicate:           case lltok::kw_sanitize_memory:
-    case lltok::kw_noimplicitfloat:       case lltok::kw_sanitize_thread:
-    case lltok::kw_noinline:              case lltok::kw_ssp:
-    case lltok::kw_nonlazybind:           case lltok::kw_sspreq:
-    case lltok::kw_noredzone:             case lltok::kw_sspstrong:
-                                          case lltok::kw_uwtable:
+    case lltok::kw_alignstack:
+    case lltok::kw_alwaysinline:
+    case lltok::kw_inlinehint:
+    case lltok::kw_minsize:
+    case lltok::kw_naked:
+    case lltok::kw_nobuiltin:
+    case lltok::kw_noduplicate:
+    case lltok::kw_noimplicitfloat:
+    case lltok::kw_noinline:
+    case lltok::kw_nonlazybind:
+    case lltok::kw_noredzone:
+    case lltok::kw_noreturn:
+    case lltok::kw_nounwind:
+    case lltok::kw_optsize:
+    case lltok::kw_readnone:
+    case lltok::kw_readonly:
+    case lltok::kw_returns_twice:
+    case lltok::kw_sanitize_address:
+    case lltok::kw_sanitize_memory:
+    case lltok::kw_sanitize_thread:
+    case lltok::kw_ssp:
+    case lltok::kw_sspreq:
+    case lltok::kw_sspstrong:
+    case lltok::kw_uwtable:
       HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
       break;
     }
@@ -4232,7 +4263,9 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
 
   if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
 
-  if (!Ptr->getType()->getScalarType()->isPointerTy())
+  Type *BaseType = Ptr->getType();
+  PointerType *BasePointerType = dyn_cast<PointerType>(BaseType->getScalarType());
+  if (!BasePointerType)
     return Error(Loc, "base of getelementptr must be a pointer");
 
   SmallVector<Value*, 16> Indices;
@@ -4257,7 +4290,10 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
     Indices.push_back(Val);
   }
 
-  if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices))
+  if (!Indices.empty() && !BasePointerType->getElementType()->isSized())
+    return Error(Loc, "base element of getelementptr must be sized");
+
+  if (!GetElementPtrInst::getIndexedType(BaseType, Indices))
     return Error(Loc, "invalid getelementptr indices");
   Inst = GetElementPtrInst::Create(Ptr, Indices);
   if (InBounds)
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index cd25ba3..3bf54fa 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -114,6 +114,7 @@ namespace lltok {
     kw_optsize,
     kw_readnone,
     kw_readonly,
+    kw_returned,
     kw_returns_twice,
     kw_signext,
     kw_ssp,
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 5cd6c55..23630e5 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -10,6 +10,7 @@
 #include "llvm-c/BitReader.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <cstring>
 #include <string>
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index f348843..e6ff4b4 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -405,7 +405,7 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
   }
 
   // Create and return a placeholder, which will later be RAUW'd.
-  Value *V = MDNode::getTemporary(Context, ArrayRef<Value*>());
+  Value *V = MDNode::getTemporary(Context, None);
   MDValuePtrs[Idx] = V;
   return V;
 }
diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp
index 9f51c35..985208c 100644
--- a/lib/Bitcode/Writer/BitWriter.cpp
+++ b/lib/Bitcode/Writer/BitWriter.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm-c/BitWriter.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index dd7282c..4731af5 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -201,62 +201,161 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
   }
 }
 
+static bool isNoopBitcast(Type *T1, Type *T2,
+                          const TargetLowering& TLI) {
+  return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) ||
+         (isa<VectorType>(T1) && isa<VectorType>(T2) &&
+          TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2)));
+}
 
-/// getNoopInput - If V is a noop (i.e., lowers to no machine code), look
-/// through it (and any transitive noop operands to it) and return its input
-/// value.  This is used to determine if a tail call can be formed.
-///
-static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
-  // If V is not an instruction, it can't be looked through.
-  const Instruction *I = dyn_cast<Instruction>(V);
-  if (I == 0 || !I->hasOneUse() || I->getNumOperands() == 0) return V;
-  
-  Value *Op = I->getOperand(0);
+/// sameNoopInput - Return true if V1 == V2, else if either V1 or V2 is a noop
+/// (i.e., lowers to no machine code), look through it (and any transitive noop
+/// operands to it) and check if it has the same noop input value.  This is
+/// used to determine if a tail call can be formed.
+static bool sameNoopInput(const Value *V1, const Value *V2,
+                          SmallVectorImpl<unsigned> &Els1,
+                          SmallVectorImpl<unsigned> &Els2,
+                          const TargetLowering &TLI) {
+  using std::swap;
+  bool swapParity = false;
+  bool equalEls = Els1 == Els2;
+  while (true) {
+    if ((equalEls && V1 == V2) || isa<UndefValue>(V1) || isa<UndefValue>(V2)) {
+      if (swapParity)
+        // Revert to original Els1 and Els2 to avoid confusing recursive calls
+        swap(Els1, Els2);
+      return true;
+    }
 
-  // Look through truly no-op truncates.
-  if (isa<TruncInst>(I) &&
-      TLI.isTruncateFree(I->getOperand(0)->getType(), I->getType()))
-    return getNoopInput(I->getOperand(0), TLI);
-  
-  // Look through truly no-op bitcasts.
-  if (isa<BitCastInst>(I)) {
-    // No type change at all.
-    if (Op->getType() == I->getType())
-      return getNoopInput(Op, TLI);
+    // Try to look through V1; if V1 is not an instruction, it can't be looked
+    // through.
+    const Instruction *I = dyn_cast<Instruction>(V1);
+    const Value *NoopInput = 0;
+    if (I != 0 && I->getNumOperands() > 0) {
+     Value *Op = I->getOperand(0);
+      if (isa<TruncInst>(I)) {
+        // Look through truly no-op truncates.
+        if (TLI.isTruncateFree(Op->getType(), I->getType()))
+          NoopInput = Op;
+      } else if (isa<BitCastInst>(I)) {
+        // Look through truly no-op bitcasts.
+        if (isNoopBitcast(Op->getType(), I->getType(), TLI))
+          NoopInput = Op;
+      } else if (isa<GetElementPtrInst>(I)) {
+        // Look through getelementptr
+        if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+          NoopInput = Op;
+      } else if (isa<IntToPtrInst>(I)) {
+        // Look through inttoptr.
+        // Make sure this isn't a truncating or extending cast.  We could
+        // support this eventually, but don't bother for now.
+        if (!isa<VectorType>(I->getType()) &&
+            TLI.getPointerTy().getSizeInBits() == 
+              cast<IntegerType>(Op->getType())->getBitWidth())
+          NoopInput = Op;
+      } else if (isa<PtrToIntInst>(I)) {
+        // Look through ptrtoint.
+        // Make sure this isn't a truncating or extending cast.  We could
+        // support this eventually, but don't bother for now.
+        if (!isa<VectorType>(I->getType()) &&
+            TLI.getPointerTy().getSizeInBits() == 
+              cast<IntegerType>(I->getType())->getBitWidth())
+          NoopInput = Op;
+      } else if (isa<CallInst>(I)) {
+        // Look through call
+        for (User::const_op_iterator i = I->op_begin(),
+                                     // Skip Callee
+                                     e = I->op_end() - 1;
+             i != e; ++i) {
+          unsigned attrInd = i - I->op_begin() + 1;
+          if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
+              isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
+            NoopInput = *i;
+            break;
+          }
+        }
+      } else if (isa<InvokeInst>(I)) {
+        // Look through invoke
+        for (User::const_op_iterator i = I->op_begin(),
+                                     // Skip BB, BB, Callee
+                                     e = I->op_end() - 3;
+             i != e; ++i) {
+          unsigned attrInd = i - I->op_begin() + 1;
+          if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) &&
+              isNoopBitcast((*i)->getType(), I->getType(), TLI)) {
+            NoopInput = *i;
+            break;
+          }
+        }
+      }
+    }
 
-    // Pointer to pointer cast.
-    if (Op->getType()->isPointerTy() && I->getType()->isPointerTy())
-      return getNoopInput(Op, TLI);
-    
-    if (isa<VectorType>(Op->getType()) && isa<VectorType>(I->getType()) &&
-        TLI.isTypeLegal(EVT::getEVT(Op->getType())) &&
-        TLI.isTypeLegal(EVT::getEVT(I->getType())))
-      return getNoopInput(Op, TLI);
-  }
-  
-  // Look through inttoptr.
-  if (isa<IntToPtrInst>(I) && !isa<VectorType>(I->getType())) {
-    // Make sure this isn't a truncating or extending cast.  We could support
-    // this eventually, but don't bother for now.
-    if (TLI.getPointerTy().getSizeInBits() == 
-          cast<IntegerType>(Op->getType())->getBitWidth())
-      return getNoopInput(Op, TLI);
-  }
+    if (NoopInput) {
+      V1 = NoopInput;
+      continue;
+    }
 
-  // Look through ptrtoint.
-  if (isa<PtrToIntInst>(I) && !isa<VectorType>(I->getType())) {
-    // Make sure this isn't a truncating or extending cast.  We could support
-    // this eventually, but don't bother for now.
-    if (TLI.getPointerTy().getSizeInBits() == 
-        cast<IntegerType>(I->getType())->getBitWidth())
-      return getNoopInput(Op, TLI);
+    // If we already swapped, avoid infinite loop
+    if (swapParity)
+      break;
+
+    // Otherwise, swap V1<->V2, Els1<->Els2
+    swap(V1, V2);
+    swap(Els1, Els2);
+    swapParity = !swapParity;
   }
 
+  for (unsigned n = 0; n < 2; ++n) {
+    if (isa<InsertValueInst>(V1)) {
+      if (isa<StructType>(V1->getType())) {
+        // Look through insertvalue
+        unsigned i, e;
+        for (i = 0, e = cast<StructType>(V1->getType())->getNumElements();
+             i != e; ++i) {
+          const Value *InScalar = FindInsertedValue(const_cast<Value*>(V1), i);
+          if (InScalar == 0)
+            break;
+          Els1.push_back(i);
+          if (!sameNoopInput(InScalar, V2, Els1, Els2, TLI)) {
+            Els1.pop_back();
+            break;
+          }
+          Els1.pop_back();
+        }
+        if (i == e) {
+          if (swapParity)
+            swap(Els1, Els2);
+          return true;
+        }
+      }
+    } else if (!Els1.empty() && isa<ExtractValueInst>(V1)) {
+      const ExtractValueInst *EVI = cast<ExtractValueInst>(V1);
+      unsigned i = Els1.back();
+      // If the scalar value being inserted is an extractvalue of the right
+      // index from the call, then everything is good.
+      if (isa<StructType>(EVI->getOperand(0)->getType()) &&
+          EVI->getNumIndices() == 1 && EVI->getIndices()[0] == i) {
+        // Look through extractvalue
+        Els1.pop_back();
+        if (sameNoopInput(EVI->getOperand(0), V2, Els1, Els2, TLI)) {
+          Els1.push_back(i);
+          if (swapParity)
+            swap(Els1, Els2);
+          return true;
+        }
+        Els1.push_back(i);
+      }
+    }
 
-  // Otherwise it's not something we can look through.
-  return V;
-}
+    swap(V1, V2);
+    swap(Els1, Els2);
+    swapParity = !swapParity;
+  }
 
+  if (swapParity)
+    swap(Els1, Els2);
+  return false;
+}
 
 /// Test if the given instruction is in a position to be optimized
 /// with a tail-call. This roughly means that it's in a block with
@@ -264,7 +363,8 @@ static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) {
 /// between it and the return.
 ///
 /// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){
+bool llvm::isInTailCallPosition(ImmutableCallSite CS,
+                                const TargetLowering &TLI) {
   const Instruction *I = CS.getInstruction();
   const BasicBlock *ExitBB = I->getParent();
   const TerminatorInst *Term = ExitBB->getTerminator();
@@ -322,28 +422,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){
       CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
     return false;
 
-  // Otherwise, make sure the unmodified return value of I is the return value.
-  // We handle two cases: multiple return values + scalars.
-  Value *RetVal = Ret->getOperand(0);
-  if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType()))
-    // Handle scalars first.
-    return getNoopInput(Ret->getOperand(0), TLI) == I;
-  
-  // If this is an aggregate return, look through the insert/extract values and
-  // see if each is transparent.
-  for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements();
-       i != e; ++i) {
-    const Value *InScalar = FindInsertedValue(RetVal, i);
-    if (InScalar == 0) return false;
-    InScalar = getNoopInput(InScalar, TLI);
-    
-    // If the scalar value being inserted is an extractvalue of the right index
-    // from the call, then everything is good.
-    const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar);
-    if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 ||
-        EVI->getIndices()[0] != i)
-      return false;
-  }
-  
-  return true;
+  // Otherwise, make sure the return value and I have the same value
+  SmallVector<unsigned, 4> Els1, Els2;
+  return sameNoopInput(Ret->getOperand(0), I, Els1, Els2, TLI);
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d4a745d..84162ac 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -133,9 +133,13 @@ const DataLayout &AsmPrinter::getDataLayout() const {
   return *TM.getDataLayout();
 }
 
+StringRef AsmPrinter::getTargetTriple() const {
+  return TM.getTargetTriple();
+}
+
 /// getCurrentSection() - Return the current section we are emitting to.
 const MCSection *AsmPrinter::getCurrentSection() const {
-  return OutStreamer.getCurrentSection();
+  return OutStreamer.getCurrentSection().first;
 }
 
 
@@ -813,7 +817,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
   // caller might be in the middle of an dwarf expression. We should
   // probably assert that Reg >= 0 once debug info generation is more mature.
 
-  if (int Offset =  MLoc.getOffset()) {
+  if (MLoc.isIndirect()) {
     if (Reg < 32) {
       OutStreamer.AddComment(
         dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg));
@@ -824,7 +828,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
       OutStreamer.AddComment(Twine(Reg));
       EmitULEB128(Reg);
     }
-    EmitSLEB128(Offset);
+    EmitSLEB128(MLoc.getOffset());
   } else {
     if (Reg < 32) {
       OutStreamer.AddComment(
@@ -1213,7 +1217,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
 bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   if (GV->getName() == "llvm.used") {
     if (MAI->hasNoDeadStrip())    // No need to emit this at all.
-      EmitLLVMUsedList(GV->getInitializer());
+      EmitLLVMUsedList(cast<ConstantArray>(GV->getInitializer()));
     return true;
   }
 
@@ -1256,11 +1260,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
 /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
 /// global in the specified llvm.used list for which emitUsedDirectiveFor
 /// is true, as being used with this directive.
-void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
+void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
   // Should be an array of 'i8*'.
-  const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
-  if (InitList == 0) return;
-
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
     const GlobalValue *GV =
       dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 156acac..31e42d4 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -141,7 +141,7 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
 void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
                                    const MCSymbol *SectionLabel) const {
   // On COFF targets, we have to emit the special .secrel32 directive.
-  if (MAI->getDwarfSectionOffsetDirective()) {
+  if (MAI->needsDwarfSectionOffsetDirective()) {
     OutStreamer.EmitCOFFSecRel32(Label);
     return;
   }
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 57e0acd..673867a 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -112,8 +112,9 @@ DIE::~DIE() {
     delete Children[i];
 }
 
-/// Climb up the parent chain to get the compile unit DIE this DIE belongs to.
-DIE *DIE::getCompileUnit() const{
+/// Climb up the parent chain to get the compile unit DIE to which this DIE
+/// belongs.
+DIE *DIE::getCompileUnit() const {
   DIE *p = getParent();
   while (p) {
     if (p->getTag() == dwarf::DW_TAG_compile_unit)
@@ -124,8 +125,7 @@ DIE *DIE::getCompileUnit() const{
 }
 
 #ifndef NDEBUG
-void DIE::print(raw_ostream &O, unsigned IncIndent) {
-  IndentCount += IncIndent;
+void DIE::print(raw_ostream &O, unsigned IndentCount) const {
   const std::string Indent(IndentCount, ' ');
   bool isBlock = Abbrev.getTag() == 0;
 
@@ -164,11 +164,10 @@ void DIE::print(raw_ostream &O, unsigned IncIndent) {
   IndentCount -= 2;
 
   for (unsigned j = 0, M = Children.size(); j < M; ++j) {
-    Children[j]->print(O, 4);
+    Children[j]->print(O, IndentCount+4);
   }
 
   if (!isBlock) O << "\n";
-  IndentCount -= IncIndent;
 }
 
 void DIE::dump() {
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index c332aa2..3c06001 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -139,8 +139,7 @@ namespace llvm {
     mutable unsigned IndentCount;
   public:
     explicit DIE(unsigned Tag)
-      : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0),
-        IndentCount(0) {}
+      : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0) {}
     virtual ~DIE();
 
     // Accessors.
@@ -179,7 +178,7 @@ namespace llvm {
     }
 
 #ifndef NDEBUG
-    void print(raw_ostream &O, unsigned IncIndent = 0);
+    void print(raw_ostream &O, unsigned IndentCount = 0) const;
     void dump();
 #endif
   };
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index f9b6f94..89abcff 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -685,7 +685,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val,
   return true;
 }
 
-/// addTemplateParams - Add template parameters in buffer.
+/// addTemplateParams - Add template parameters into buffer.
 void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
   // Add template parameters.
   for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) {
@@ -707,7 +707,7 @@ DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) {
     return getOrCreateNameSpace(DINameSpace(Context));
   else if (Context.isSubprogram())
     return getOrCreateSubprogramDIE(DISubprogram(Context));
-  else 
+  else
     return getDIE(Context);
 }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 2b180c6..8f08c63 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -94,9 +94,6 @@ class CompileUnit {
   /// DWARF version doesn't handle the language, return -1.
   int64_t getDefaultLowerBound() const;
 
-  /// getOrCreateContextDIE - Get context owner's DIE.
-  DIE *getOrCreateContextDIE(DIDescriptor Context);
-
 public:
   CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW,
               DwarfUnits *);
@@ -372,6 +369,9 @@ public:
   /// createStaticMemberDIE - Create new static data member DIE.
   DIE *createStaticMemberDIE(DIDerivedType DT);
 
+  /// getOrCreateContextDIE - Get context owner's DIE.
+  DIE *getOrCreateContextDIE(DIDescriptor Context);
+
 private:
 
   // DIEValueAllocator - All DIEValues are allocated through this allocator.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index d3cb4f9..73bba69 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -94,6 +94,12 @@ static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden,
 namespace {
   const char *DWARFGroupName = "DWARF Emission";
   const char *DbgTimerName = "DWARF Debug Writer";
+
+  struct CompareFirst {
+    template <typename T> bool operator()(const T &lhs, const T &rhs) const {
+      return lhs.first < rhs.first;
+    }
+  };
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
@@ -170,12 +176,13 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
   DwarfStrSectionSym = TextSectionSym = 0;
   DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0;
+  DwarfAddrSectionSym = 0;
   DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0;
   FunctionBeginSym = FunctionEndSym = 0;
 
   // Turn on accelerator tables and older gdb compatibility
   // for Darwin.
-  bool IsDarwin = Triple(M->getTargetTriple()).isOSDarwin();
+  bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin();
   if (DarwinGDBCompat == Default) {
     if (IsDarwin)
       IsDarwinGDBCompat = true;
@@ -596,9 +603,16 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
   }
   else {
     // There is no need to emit empty lexical block DIE.
-    if (Children.empty())
+    std::pair<ImportedEntityMap::const_iterator,
+              ImportedEntityMap::const_iterator> Range = std::equal_range(
+        ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(),
+        std::pair<const MDNode *, const MDNode *>(DS, (const MDNode*)0),
+        CompareFirst());
+    if (Children.empty() && Range.first == Range.second)
       return NULL;
     ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
+    for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i)
+      constructImportedModuleDIE(TheCU, i->second, ScopeDIE);
   }
 
   if (!ScopeDIE) return NULL;
@@ -643,7 +657,7 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
 
   // We look up the CUID/file/dir by concatenating them with a zero byte.
   SmallString<128> NamePair;
-  NamePair += CUID;
+  NamePair += utostr(CUID);
   NamePair += '\0';
   NamePair += DirName;
   NamePair += '\0'; // Zero bytes are not allowed in paths.
@@ -681,9 +695,12 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
                  DIUnit.getLanguage());
   NewCU->addString(Die, dwarf::DW_AT_name, FN);
+
   // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
-  // into an entity. We're using 0 (or a NULL label) for this.
-  NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
+  // into an entity. We're using 0 (or a NULL label) for this. For
+  // split dwarf it's in the skeleton CU so omit it here.
+  if (!useSplitDwarf())
+    NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL);
 
   // Define start line table label for each Compile Unit.
   MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start",
@@ -691,21 +708,32 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym,
                                                      NewCU->getUniqueID());
 
+  // Use a single line table if we are using .loc and generating assembly.
+  bool UseTheFirstCU =
+    (Asm->TM.hasMCUseLoc() &&
+     Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) ||
+    (NewCU->getUniqueID() == 0);
+
   // DW_AT_stmt_list is a offset of line number information for this
-  // compile unit in debug_line section.
+  // compile unit in debug_line section. For split dwarf this is
+  // left in the skeleton CU and so not included.
   // The line table entries are not always emitted in assembly, so it
   // is not okay to use line_table_start here.
-  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
-    NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
-                    NewCU->getUniqueID() == 0 ?
-                    Asm->GetTempSymbol("section_line") : LineTableStartSym);
-  else if (NewCU->getUniqueID() == 0)
-    NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
-  else
-    NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
-                    LineTableStartSym, DwarfLineSectionSym);
+  if (!useSplitDwarf()) {
+    if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+      NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+                      UseTheFirstCU ?
+                      Asm->GetTempSymbol("section_line") : LineTableStartSym);
+    else if (UseTheFirstCU)
+      NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+    else
+      NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+                      LineTableStartSym, DwarfLineSectionSym);
+  }
 
-  if (!CompilationDir.empty())
+  // If we're using split dwarf the compilation dir is going to be in the
+  // skeleton CU and so we don't need to duplicate it here.
+  if (!useSplitDwarf() && !CompilationDir.empty())
     NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
   if (DIUnit.isOptimized())
     NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized);
@@ -754,6 +782,41 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
     TheCU->addGlobalName(SP.getName(), SubprogramDie);
 }
 
+void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU,
+                                            const MDNode *N) {
+  DIImportedModule Module(N);
+  if (!Module.Verify())
+    return;
+  if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext()))
+    constructImportedModuleDIE(TheCU, Module, D);
+}
+
+void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N,
+                                            DIE *Context) {
+  DIImportedModule Module(N);
+  if (!Module.Verify())
+    return;
+  return constructImportedModuleDIE(TheCU, Module, Context);
+}
+
+void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU,
+                                            const DIImportedModule &Module,
+                                            DIE *Context) {
+  assert(Module.Verify() &&
+         "Use one of the MDNode * overloads to handle invalid metadata");
+  assert(Context && "Should always have a context for an imported_module");
+  DIE *IMDie = new DIE(dwarf::DW_TAG_imported_module);
+  TheCU->insertDIE(Module, IMDie);
+  DIE *NSDie = TheCU->getOrCreateNameSpace(Module.getNameSpace());
+  unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(),
+                                        Module.getContext().getDirectory(),
+                                        TheCU->getUniqueID());
+  TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, 0, FileID);
+  TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, 0, Module.getLineNumber());
+  TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4, NSDie);
+  Context->addChild(IMDie);
+}
+
 // Emit all Dwarf sections that should come prior to the content. Create
 // global DIEs and emit initial debug info sections. This is invoked by
 // the target AsmPrinter.
@@ -775,6 +838,13 @@ void DwarfDebug::beginModule() {
   for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
     DICompileUnit CUNode(CU_Nodes->getOperand(i));
     CompileUnit *CU = constructCompileUnit(CUNode);
+    DIArray ImportedModules = CUNode.getImportedModules();
+    for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i)
+      ScopesWithImportedEntities.push_back(std::make_pair(
+          DIImportedModule(ImportedModules.getElement(i)).getContext(),
+          ImportedModules.getElement(i)));
+    std::sort(ScopesWithImportedEntities.begin(),
+              ScopesWithImportedEntities.end(), CompareFirst());
     DIArray GVs = CUNode.getGlobalVariables();
     for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
       CU->createGlobalVariableDIE(GVs.getElement(i));
@@ -787,11 +857,16 @@ void DwarfDebug::beginModule() {
     DIArray RetainedTypes = CUNode.getRetainedTypes();
     for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
       CU->getOrCreateTypeDIE(RetainedTypes.getElement(i));
+    // Emit imported_modules last so that the relevant context is already
+    // available.
+    for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i)
+      constructImportedModuleDIE(CU, ImportedModules.getElement(i));
     // If we're splitting the dwarf out now that we've got the entire
     // CU then construct a skeleton CU based upon it.
     if (useSplitDwarf()) {
-    // This should be a unique identifier when we want to build .dwp files.
-      CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
+      // This should be a unique identifier when we want to build .dwp files.
+      CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id,
+                  dwarf::DW_FORM_data8, 0);
       // Now construct the skeleton CU associated.
       constructSkeletonCU(CUNode);
     }
@@ -1099,7 +1174,13 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
   }
   if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) {
     MachineLocation MLoc;
-    MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+    // TODO: Currently an offset of 0 in a DBG_VALUE means
+    // we need to generate a direct register value.
+    // There is no way to specify an indirect value with offset 0.
+    if (MI->getOperand(1).getImm() == 0)
+      MLoc.set(MI->getOperand(0).getReg());
+    else
+      MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
     return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
   }
   if (MI->getOperand(0).isImm())
@@ -1366,7 +1447,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
   CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
   assert(TheCU && "Unable to find compile unit!");
-  Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
+  if (Asm->TM.hasMCUseLoc() &&
+      Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer)
+    // Use a single line table if we are using .loc and generating assembly.
+    Asm->OutStreamer.getContext().setDwarfCompileUnitID(0);
+  else
+    Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID());
 
   FunctionBeginSym = Asm->GetTempSymbol("func_begin",
                                         Asm->getFunctionNumber());
@@ -1740,9 +1826,12 @@ void DwarfDebug::emitSectionLabels() {
   emitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
   DwarfStrSectionSym =
     emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string");
-  if (useSplitDwarf())
+  if (useSplitDwarf()) {
     DwarfStrDWOSectionSym =
       emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string");
+    DwarfAddrSectionSym =
+      emitSectionSym(Asm, TLOF.getDwarfAddrSection(), "addr_sec");
+  }
   DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(),
                                              "debug_range");
 
@@ -2510,9 +2599,14 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
   // This should be a unique identifier when we want to build .dwp files.
   NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0);
 
-  // FIXME: The addr base should be relative for each compile unit, however,
-  // this one is going to be 0 anyhow.
-  NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0);
+  // Relocate to the beginning of the addr_base section, else 0 for the
+  // beginning of the one for this compile unit.
+  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+    NewCU->addLabel(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset,
+                    DwarfAddrSectionSym);
+  else
+    NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base,
+                   dwarf::DW_FORM_sec_offset, 0);
 
   // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
   // into an entity. We're using 0, or a NULL label for this.
@@ -2520,6 +2614,7 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
 
   // DW_AT_stmt_list is a offset of line number information for this
   // compile unit in debug_line section.
+  // FIXME: Should handle multiple compile units.
   if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
     NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset,
                     DwarfLineSectionSym);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 81e345e..24f758d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -392,7 +392,7 @@ class DwarfDebug {
   // section offsets and are created by EmitSectionLabels.
   MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
   MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
-  MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym;
+  MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym;
   MCSymbol *FunctionBeginSym, *FunctionEndSym;
   MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym;
 
@@ -433,6 +433,10 @@ class DwarfDebug {
   // Holder for the skeleton information.
   DwarfUnits SkeletonHolder;
 
+  typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32>
+    ImportedEntityMap;
+  ImportedEntityMap ScopesWithImportedEntities;
+
 private:
 
   void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
@@ -555,6 +559,18 @@ private:
   /// \brief Construct subprogram DIE.
   void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N);
 
+  /// \brief Construct import_module DIE.
+  void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N);
+
+  /// \brief Construct import_module DIE.
+  void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N,
+                                  DIE *Context);
+
+  /// \brief Construct import_module DIE.
+  void constructImportedModuleDIE(CompileUnit *TheCU,
+                                  const DIImportedModule &Module,
+                                  DIE *Context);
+
   /// \brief Register a source line with debug info. Returns the unique
   /// label that was emitted and which provides correspondence to the
   /// source line list.
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 012ff8a..4a99184 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -204,20 +204,25 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
 
   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
 
+  bool IsFloat = Ty->getScalarType()->isFloatingPointTy();
+  // Assume that floating point arithmetic operations cost twice as much as
+  // integer operations.
+  unsigned OpCost = (IsFloat ? 2 : 1);
+
   if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
     // The operation is legal. Assume it costs 1.
-    // If the type is split to multiple registers, assume that thre is some
+    // If the type is split to multiple registers, assume that there is some
     // overhead to this.
     // TODO: Once we have extract/insert subvector cost we need to use them.
     if (LT.first > 1)
-      return LT.first * 2;
-    return LT.first * 1;
+      return LT.first * 2 * OpCost;
+    return LT.first * 1 * OpCost;
   }
 
   if (!TLI->isOperationExpand(ISD, LT.second)) {
     // If the operation is custom lowered then assume
     // thare the code is twice as expensive.
-    return LT.first * 2;
+    return LT.first * 2 * OpCost;
   }
 
   // Else, assume that we need to scalarize this op.
@@ -230,7 +235,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
   }
 
   // We don't know anything about this scalar instruction.
-  return 1;
+  return OpCost;
 }
 
 unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index dee339a..38ae17d 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -117,7 +117,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
   float totalWeight = 0;
   SmallPtrSet<MachineInstr*, 8> visited;
 
-  // Find the best physreg hist and the best virtreg hint.
+  // Find the best physreg hint and the best virtreg hint.
   float bestPhys = 0, bestVirt = 0;
   unsigned hintPhys = 0, hintVirt = 0;
 
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index f1d4ace..75f4b96 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -32,7 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
   // No stack is used.
   StackOffset = 0;
 
-  clearFirstByValReg();
+  clearByValRegsInfo();
   UsedRegs.resize((TRI.getNumRegs()+31)/32);
 }
 
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 35ec68d..c641991 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 #include "llvm-c/Initialization.h"
 
 using namespace llvm;
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 9958d7d..8264d6d 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1039,6 +1039,10 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
     return false;
   }
 
+  if (CvtBBI->BB->hasAddressTaken())
+    // Conservatively abort if-conversion if BB's address is taken.
+    return false;
+
   if (Kind == ICSimpleFalse)
     if (TII->ReverseBranchCondition(Cond))
       llvm_unreachable("Unable to reverse branch condition!");
@@ -1054,6 +1058,10 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
     // Copy instructions in the true block, predicate them, and add them to
     // the entry block.
     CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
+
+    // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
+    // explicitly remove CvtBBI as a successor.
+    BBI.BB->removeSuccessor(CvtBBI->BB);
   } else {
     PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
 
@@ -1112,6 +1120,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
     return false;
   }
 
+  if (CvtBBI->BB->hasAddressTaken())
+    // Conservatively abort if-conversion if BB's address is taken.
+    return false;
+
   if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
     if (TII->ReverseBranchCondition(Cond))
       llvm_unreachable("Unable to reverse branch condition!");
@@ -1146,6 +1158,10 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
     // Copy instructions in the true block, predicate them, and add them to
     // the entry block.
     CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
+
+    // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
+    // explicitly remove CvtBBI as a successor.
+    BBI.BB->removeSuccessor(CvtBBI->BB);
   } else {
     // Predicate the 'true' block after removing its branch.
     CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
@@ -1176,7 +1192,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
     // block. By not merging them, we make it possible to iteratively
     // ifcvt the blocks.
     if (!HasEarlyExit &&
-        NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+        NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough &&
+        !NextBBI->BB->hasAddressTaken()) {
       MergeBlocks(BBI, *NextBBI);
       FalseBBDead = true;
     } else {
@@ -1226,6 +1243,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
     return false;
   }
 
+  if (TrueBBI.BB->hasAddressTaken() || FalseBBI.BB->hasAddressTaken())
+    // Conservatively abort if-conversion if either BB has its address taken.
+    return false;
+
   // Put the predicated instructions from the 'true' block before the
   // instructions from the 'false' block, unless the true block would clobber
   // the predicate, in which case, do the opposite.
@@ -1374,7 +1395,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   // tail, add a unconditional branch to it.
   if (TailBB) {
     BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
-    bool CanMergeTail = !TailBBI.HasFallThrough;
+    bool CanMergeTail = !TailBBI.HasFallThrough &&
+      !TailBBI.BB->hasAddressTaken();
     // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
     // check if there are any other predecessors besides those.
     unsigned NumPreds = TailBB->pred_size();
@@ -1543,6 +1565,9 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
 /// i.e., when FromBBI's branch is being moved, add those successor edges to
 /// ToBBI.
 void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
+  assert(!FromBBI.BB->hasAddressTaken() &&
+         "Removing a BB whose address is taken!");
+
   ToBBI.BB->splice(ToBBI.BB->end(),
                    FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
 
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index c6d1a18..35295fe 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -955,18 +955,21 @@ void InlineSpiller::reMaterializeAll() {
   Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
 
   // Get rid of deleted and empty intervals.
-  for (unsigned i = RegsToSpill.size(); i != 0; --i) {
-    unsigned Reg = RegsToSpill[i-1];
-    if (!LIS.hasInterval(Reg)) {
-      RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+  unsigned ResultPos = 0;
+  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+    unsigned Reg = RegsToSpill[i];
+    if (!LIS.hasInterval(Reg))
       continue;
-    }
+
     LiveInterval &LI = LIS.getInterval(Reg);
-    if (!LI.empty())
+    if (LI.empty()) {
+      Edit->eraseVirtReg(Reg);
       continue;
-    Edit->eraseVirtReg(Reg);
-    RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
+    }
+
+    RegsToSpill[ResultPos++] = Reg;
   }
+  RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end());
   DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
 }
 
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 07f0ccf..d894f66 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -453,6 +453,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
     break;
 
+  case Intrinsic::annotation:
+  case Intrinsic::ptr_annotation:
+    // Just drop the annotation, but forward the value
+    CI->replaceAllUsesWith(CI->getOperand(0));
+    break;
+
   case Intrinsic::var_annotation:
     break;   // Strip out annotate intrinsic
     
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 352ef94..26a1176 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -46,13 +46,16 @@ namespace {
   class FrameRef {
     MachineBasicBlock::iterator MI; // Instr referencing the frame
     int64_t LocalOffset;            // Local offset of the frame idx referenced
+    int FrameIdx;                   // The frame index
   public:
-    FrameRef(MachineBasicBlock::iterator I, int64_t Offset) :
-      MI(I), LocalOffset(Offset) {}
+    FrameRef(MachineBasicBlock::iterator I, int64_t Offset, int Idx) :
+      MI(I), LocalOffset(Offset), FrameIdx(Idx) {}
     bool operator<(const FrameRef &RHS) const {
       return LocalOffset < RHS.LocalOffset;
     }
-    MachineBasicBlock::iterator getMachineInstr() { return MI; }
+    MachineBasicBlock::iterator getMachineInstr() const { return MI; }
+    int64_t getLocalOffset() const { return LocalOffset; }
+    int getFrameIndex() const { return FrameIdx; }
   };
 
   class LocalStackSlotPass: public MachineFunctionPass {
@@ -194,22 +197,15 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
 }
 
 static inline bool
-lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs,
-                       std::pair<unsigned, int64_t> &RegOffset,
+lookupCandidateBaseReg(int64_t BaseOffset,
                        int64_t FrameSizeAdjust,
                        int64_t LocalFrameOffset,
                        const MachineInstr *MI,
                        const TargetRegisterInfo *TRI) {
-  unsigned e = Regs.size();
-  for (unsigned i = 0; i < e; ++i) {
-    RegOffset = Regs[i];
-    // Check if the relative offset from the where the base register references
-    // to the target address is in range for the instruction.
-    int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second;
-    if (TRI->isFrameOffsetLegal(MI, Offset))
-      return true;
-  }
-  return false;
+  // Check if the relative offset from the where the base register references
+  // to the target address is in range for the instruction.
+  int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset;
+  return TRI->isFrameOffsetLegal(MI, Offset);
 }
 
 bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
@@ -233,9 +229,6 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
   // choose the first one).
   SmallVector<FrameRef, 64> FrameReferenceInsns;
 
-  // A base register definition is a register + offset pair.
-  SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
-
   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
       MachineInstr *MI = I;
@@ -258,8 +251,12 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
           // Don't try this with values not in the local block.
           if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex()))
             break;
+          int Idx = MI->getOperand(i).getIndex();
+          int64_t LocalOffset = LocalOffsets[Idx];
+          if (!TRI->needsFrameBaseReg(MI, LocalOffset))
+            break;
           FrameReferenceInsns.
-            push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()]));
+            push_back(FrameRef(MI, LocalOffset, Idx));
           break;
         }
       }
@@ -271,86 +268,106 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
 
   MachineBasicBlock *Entry = Fn.begin();
 
+  unsigned BaseReg = 0;
+  int64_t BaseOffset = 0;
+
   // Loop through the frame references and allocate for them as necessary.
   for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
-    MachineBasicBlock::iterator I =
-      FrameReferenceInsns[ref].getMachineInstr();
+    FrameRef &FR = FrameReferenceInsns[ref];
+    MachineBasicBlock::iterator I = FR.getMachineInstr();
     MachineInstr *MI = I;
-    for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) {
-      // Consider replacing all frame index operands that reference
-      // an object allocated in the local block.
-      if (MI->getOperand(idx).isFI()) {
-        int FrameIdx = MI->getOperand(idx).getIndex();
-
-        assert(MFI->isObjectPreAllocated(FrameIdx) &&
-               "Only pre-allocated locals expected!");
-
-        DEBUG(dbgs() << "Considering: " << *MI);
-        if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) {
-          unsigned BaseReg = 0;
-          int64_t Offset = 0;
-          int64_t FrameSizeAdjust =
-            StackGrowsDown ? MFI->getLocalFrameSize() : 0;
-
-          DEBUG(dbgs() << "  Replacing FI in: " << *MI);
-
-          // If we have a suitable base register available, use it; otherwise
-          // create a new one. Note that any offset encoded in the
-          // instruction itself will be taken into account by the target,
-          // so we don't have to adjust for it here when reusing a base
-          // register.
-          std::pair<unsigned, int64_t> RegOffset;
-          if (lookupCandidateBaseReg(BaseRegisters, RegOffset,
-                                     FrameSizeAdjust,
-                                     LocalOffsets[FrameIdx],
-                                     MI, TRI)) {
-            DEBUG(dbgs() << "  Reusing base register " <<
-                  RegOffset.first << "\n");
-            // We found a register to reuse.
-            BaseReg = RegOffset.first;
-            Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] -
-              RegOffset.second;
-          } else {
-            // No previously defined register was in range, so create a
-            // new one.
-            int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
-            const MachineFunction *MF = MI->getParent()->getParent();
-            const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
-            BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
-
-            DEBUG(dbgs() << "  Materializing base register " << BaseReg <<
-                  " at frame local offset " <<
-                  LocalOffsets[FrameIdx] + InstrOffset << "\n");
-
-            // Tell the target to insert the instruction to initialize
-            // the base register.
-            //            MachineBasicBlock::iterator InsertionPt = Entry->begin();
-            TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
-                                              InstrOffset);
-
-            // The base register already includes any offset specified
-            // by the instruction, so account for that so it doesn't get
-            // applied twice.
-            Offset = -InstrOffset;
-
-            int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] +
-              InstrOffset;
-            BaseRegisters.push_back(
-              std::pair<unsigned, int64_t>(BaseReg, BaseOffset));
-            ++NumBaseRegisters;
-            UsedBaseReg = true;
-          }
-          assert(BaseReg != 0 && "Unable to allocate virtual base register!");
-
-          // Modify the instruction to use the new base register rather
-          // than the frame index operand.
-          TRI->resolveFrameIndex(I, BaseReg, Offset);
-          DEBUG(dbgs() << "Resolved: " << *MI);
-
-          ++NumReplacements;
-        }
+    int64_t LocalOffset = FR.getLocalOffset();
+    int FrameIdx = FR.getFrameIndex();
+    assert(MFI->isObjectPreAllocated(FrameIdx) &&
+           "Only pre-allocated locals expected!");
+
+    DEBUG(dbgs() << "Considering: " << *MI);
+
+    unsigned idx = 0;
+    for (unsigned f = MI->getNumOperands(); idx != f; ++idx) {
+      if (!MI->getOperand(idx).isFI())
+        continue;
+
+      if (FrameIdx == I->getOperand(idx).getIndex())
+        break;
+    }
+
+    assert(idx < MI->getNumOperands() && "Cannot find FI operand");
+
+    int64_t Offset = 0;
+    int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+
+    DEBUG(dbgs() << "  Replacing FI in: " << *MI);
+
+    // If we have a suitable base register available, use it; otherwise
+    // create a new one. Note that any offset encoded in the
+    // instruction itself will be taken into account by the target,
+    // so we don't have to adjust for it here when reusing a base
+    // register.
+    if (UsedBaseReg && lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust,
+                                              LocalOffset, MI, TRI)) {
+      DEBUG(dbgs() << "  Reusing base register " << BaseReg << "\n");
+      // We found a register to reuse.
+      Offset = FrameSizeAdjust + LocalOffset - BaseOffset;
+    } else {
+      // No previously defined register was in range, so create a // new one.
+ 
+      int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
+
+      int64_t PrevBaseOffset = BaseOffset;
+      BaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset;
+
+      // We'd like to avoid creating single-use virtual base registers.
+      // Because the FrameRefs are in sorted order, and we've already
+      // processed all FrameRefs before this one, just check whether or not
+      // the next FrameRef will be able to reuse this new register. If not,
+      // then don't bother creating it.
+      bool CanReuse = false;
+      for (int refn = ref + 1; refn < e; ++refn) {
+        FrameRef &FRN = FrameReferenceInsns[refn];
+        MachineBasicBlock::iterator J = FRN.getMachineInstr();
+        MachineInstr *MIN = J;
+
+        CanReuse = lookupCandidateBaseReg(BaseOffset, FrameSizeAdjust,
+                                          FRN.getLocalOffset(), MIN, TRI);
+        break;
       }
+
+      if (!CanReuse) {
+        BaseOffset = PrevBaseOffset;
+        continue;
+      }
+
+      const MachineFunction *MF = MI->getParent()->getParent();
+      const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF);
+      BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
+
+      DEBUG(dbgs() << "  Materializing base register " << BaseReg <<
+            " at frame local offset " << LocalOffset + InstrOffset << "\n");
+
+      // Tell the target to insert the instruction to initialize
+      // the base register.
+      //            MachineBasicBlock::iterator InsertionPt = Entry->begin();
+      TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+                                        InstrOffset);
+
+      // The base register already includes any offset specified
+      // by the instruction, so account for that so it doesn't get
+      // applied twice.
+      Offset = -InstrOffset;
+
+      ++NumBaseRegisters;
+      UsedBaseReg = true;
     }
+    assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+
+    // Modify the instruction to use the new base register rather
+    // than the frame index operand.
+    TRI->resolveFrameIndex(I, BaseReg, Offset);
+    DEBUG(dbgs() << "Resolved: " << *MI);
+
+    ++NumReplacements;
   }
+
   return UsedBaseReg;
 }
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 898e165..78e9950 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
 
 MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
   : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
-    AddressTaken(false) {
+    AddressTaken(false), CachedMCSymbol(NULL) {
   Insts.Parent = this;
 }
 
@@ -48,12 +48,16 @@ MachineBasicBlock::~MachineBasicBlock() {
 /// getSymbol - Return the MCSymbol for this basic block.
 ///
 MCSymbol *MachineBasicBlock::getSymbol() const {
-  const MachineFunction *MF = getParent();
-  MCContext &Ctx = MF->getContext();
-  const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
-  return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
-                               Twine(MF->getFunctionNumber()) + "_" +
-                               Twine(getNumber()));
+  if (!CachedMCSymbol) {
+    const MachineFunction *MF = getParent();
+    MCContext &Ctx = MF->getContext();
+    const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
+    CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
+                                           Twine(MF->getFunctionNumber()) +
+                                           "_" + Twine(getNumber()));
+  }
+
+  return CachedMCSymbol;
 }
 
 
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index cd948e2..bfba503 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -39,6 +39,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
@@ -52,6 +53,11 @@ STATISTIC(CondBranchTakenFreq,
 STATISTIC(UncondBranchTakenFreq,
           "Potential frequency of taking unconditional branches");
 
+static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
+                                       cl::desc("Force the alignment of all "
+                                                "blocks in the function."),
+                                       cl::init(0), cl::Hidden);
+
 namespace {
 class BlockChain;
 /// \brief Type for our function-wide basic block -> block chain mapping.
@@ -1088,6 +1094,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
   BlockToChain.clear();
   ChainAllocator.DestroyAll();
 
+  if (AlignAllBlock)
+    // Align all of the blocks in the function to a specific alignment.
+    for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+         FI != FE; ++FI)
+      FI->setAlignment(AlignAllBlock);
+
   // We always return true as we have no way to track whether the final order
   // differs from the original order.
   return true;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 0ea9ae0..8af9d05 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -326,8 +326,7 @@ void MachineModuleInfo::AnalyzeModule(const Module &M) {
   if (!GV || !GV->hasInitializer()) return;
 
   // Should be an array of 'i8*'.
-  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
-  if (InitList == 0) return;
+  const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
 
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
     if (const Function *F =
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 1af00e8..68372f6 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -15,6 +15,8 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/raw_os_ostream.h"
+
 using namespace llvm;
 
 MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
@@ -106,13 +108,59 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
 /// clearVirtRegs - Remove all virtual registers (after physreg assignment).
 void MachineRegisterInfo::clearVirtRegs() {
 #ifndef NDEBUG
-  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
-    assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
-           "Vreg use list non-empty still?");
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (!VRegInfo[Reg].second)
+      continue;
+    verifyUseList(Reg);
+    llvm_unreachable("Remaining virtual register operands");
+  }
 #endif
   VRegInfo.clear();
 }
 
+void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
+#ifndef NDEBUG
+  bool Valid = true;
+  for (reg_iterator I = reg_begin(Reg), E = reg_end(); I != E; ++I) {
+    MachineOperand *MO = &I.getOperand();
+    MachineInstr *MI = MO->getParent();
+    if (!MI) {
+      errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO
+             << " has no parent instruction.\n";
+      Valid = false;
+    }
+    MachineOperand *MO0 = &MI->getOperand(0);
+    unsigned NumOps = MI->getNumOperands();
+    if (!(MO >= MO0 && MO < MO0+NumOps)) {
+      errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO
+             << " doesn't belong to parent MI: " << *MI;
+      Valid = false;
+    }
+    if (!MO->isReg()) {
+      errs() << PrintReg(Reg, TRI) << " MachineOperand " << MO << ": " << *MO
+             << " is not a register\n";
+      Valid = false;
+    }
+    if (MO->getReg() != Reg) {
+      errs() << PrintReg(Reg, TRI) << " use-list MachineOperand " << MO << ": "
+             << *MO << " is the wrong register\n";
+      Valid = false;
+    }
+  }
+  assert(Valid && "Invalid use list");
+#endif
+}
+
+void MachineRegisterInfo::verifyUseLists() const {
+#ifndef NDEBUG
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+    verifyUseList(TargetRegisterInfo::index2VirtReg(i));
+  for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i)
+    verifyUseList(i);
+#endif
+}
+
 /// Add MO to the linked list of operands for its register.
 void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
   assert(!MO->isOnRegUseList() && "Already on list");
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 5bd2349..fff6b2b 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -51,7 +51,11 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
 static bool ViewMISchedDAGs = false;
 #endif // NDEBUG
 
-// Experimental heuristics
+// FIXME: remove this flag after initial testing. It should always be a good
+// thing.
+static cl::opt<bool> EnableCopyConstrain("misched-vcopy", cl::Hidden,
+    cl::desc("Constrain vreg copies."), cl::init(true));
+
 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
   cl::desc("Enable load clustering."), cl::init(true));
 
@@ -323,6 +327,10 @@ ScheduleDAGMI::~ScheduleDAGMI() {
   delete SchedImpl;
 }
 
+bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
+  return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
+}
+
 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
   if (SuccSU != &ExitSU) {
     // Do not use WillCreateCycle, it assumes SD scheduling.
@@ -404,6 +412,8 @@ void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
   }
 }
 
+/// This is normally called from the main scheduler loop but may also be invoked
+/// by the scheduling strategy to perform additional code motion.
 void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
                                     MachineBasicBlock::iterator InsertPos) {
   // Advance RegionBegin if the first instruction moves down.
@@ -505,6 +515,14 @@ updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
     if ((int)NewMaxPressure[ID] > MaxUnits)
       MaxUnits = NewMaxPressure[ID];
   }
+  DEBUG(
+    for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) {
+      unsigned Limit = TRI->getRegPressureSetLimit(i);
+      if (NewMaxPressure[i] > Limit ) {
+        dbgs() << "  " << TRI->getRegPressureSetName(i) << ": "
+               << NewMaxPressure[i] << " > " << Limit << "\n";
+      }
+    });
 }
 
 /// schedule - Called back from MachineScheduler::runOnMachineFunction
@@ -905,6 +923,184 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {
 }
 
 //===----------------------------------------------------------------------===//
+// CopyConstrain - DAG post-processing to encourage copy elimination.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Post-process the DAG to create weak edges from all uses of a copy to
+/// the one use that defines the copy's source vreg, most likely an induction
+/// variable increment.
+class CopyConstrain : public ScheduleDAGMutation {
+  // Transient state.
+  SlotIndex RegionBeginIdx;
+  // RegionEndIdx is the slot index of the last non-debug instruction in the
+  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
+  SlotIndex RegionEndIdx;
+public:
+  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
+
+  virtual void apply(ScheduleDAGMI *DAG);
+
+protected:
+  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG);
+};
+} // anonymous
+
+/// constrainLocalCopy handles two possibilities:
+/// 1) Local src:
+/// I0:     = dst
+/// I1: src = ...
+/// I2:     = dst
+/// I3: dst = src (copy)
+/// (create pred->succ edges I0->I1, I2->I1)
+///
+/// 2) Local copy:
+/// I0: dst = src (copy)
+/// I1:     = dst
+/// I2: src = ...
+/// I3:     = dst
+/// (create pred->succ edges I1->I2, I3->I2)
+///
+/// Although the MachineScheduler is currently constrained to single blocks,
+/// this algorithm should handle extended blocks. An EBB is a set of
+/// contiguously numbered blocks such that the previous block in the EBB is
+/// always the single predecessor.
+void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) {
+  LiveIntervals *LIS = DAG->getLIS();
+  MachineInstr *Copy = CopySU->getInstr();
+
+  // Check for pure vreg copies.
+  unsigned SrcReg = Copy->getOperand(1).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+    return;
+
+  unsigned DstReg = Copy->getOperand(0).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+    return;
+
+  // Check if either the dest or source is local. If it's live across a back
+  // edge, it's not local. Note that if both vregs are live across the back
+  // edge, we cannot successfully contrain the copy without cyclic scheduling.
+  unsigned LocalReg = DstReg;
+  unsigned GlobalReg = SrcReg;
+  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
+  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
+    LocalReg = SrcReg;
+    GlobalReg = DstReg;
+    LocalLI = &LIS->getInterval(LocalReg);
+    if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
+      return;
+  }
+  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
+
+  // Find the global segment after the start of the local LI.
+  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
+  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
+  // local live range. We could create edges from other global uses to the local
+  // start, but the coalescer should have already eliminated these cases, so
+  // don't bother dealing with it.
+  if (GlobalSegment == GlobalLI->end())
+    return;
+
+  // If GlobalSegment is killed at the LocalLI->start, the call to find()
+  // returned the next global segment. But if GlobalSegment overlaps with
+  // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
+  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
+  if (GlobalSegment->contains(LocalLI->beginIndex()))
+    ++GlobalSegment;
+
+  if (GlobalSegment == GlobalLI->end())
+    return;
+
+  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
+  if (GlobalSegment != GlobalLI->begin()) {
+    // Two address defs have no hole.
+    if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end,
+                               GlobalSegment->start)) {
+      return;
+    }
+    // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
+    // it would be a disconnected component in the live range.
+    assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() &&
+           "Disconnected LRG within the scheduling region.");
+  }
+  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
+  if (!GlobalDef)
+    return;
+
+  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
+  if (!GlobalSU)
+    return;
+
+  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
+  // constraining the uses of the last local def to precede GlobalDef.
+  SmallVector<SUnit*,8> LocalUses;
+  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
+  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
+  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
+  for (SUnit::const_succ_iterator
+         I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
+       I != E; ++I) {
+    if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
+      continue;
+    if (I->getSUnit() == GlobalSU)
+      continue;
+    if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
+      return;
+    LocalUses.push_back(I->getSUnit());
+  }
+  // Open the top of the GlobalLI hole by constraining any earlier global uses
+  // to precede the start of LocalLI.
+  SmallVector<SUnit*,8> GlobalUses;
+  MachineInstr *FirstLocalDef =
+    LIS->getInstructionFromIndex(LocalLI->beginIndex());
+  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
+  for (SUnit::const_pred_iterator
+         I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
+    if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
+      continue;
+    if (I->getSUnit() == FirstLocalSU)
+      continue;
+    if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
+      return;
+    GlobalUses.push_back(I->getSUnit());
+  }
+  DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
+  // Add the weak edges.
+  for (SmallVectorImpl<SUnit*>::const_iterator
+         I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
+    DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
+          << GlobalSU->NodeNum << ")\n");
+    DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
+  }
+  for (SmallVectorImpl<SUnit*>::const_iterator
+         I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
+    DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
+          << FirstLocalSU->NodeNum << ")\n");
+    DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
+  }
+}
+
+/// \brief Callback from DAG postProcessing to create weak edges to encourage
+/// copy elimination.
+void CopyConstrain::apply(ScheduleDAGMI *DAG) {
+  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
+  if (FirstPos == DAG->end())
+    return;
+  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
+  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
+    &*priorNonDebug(DAG->end(), DAG->begin()));
+
+  for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
+    SUnit *SU = &DAG->SUnits[Idx];
+    if (!SU->getInstr()->isCopy())
+      continue;
+
+    constrainLocalCopy(SU, DAG);
+  }
+}
+
+//===----------------------------------------------------------------------===//
 // ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
 //===----------------------------------------------------------------------===//
 
@@ -916,7 +1112,7 @@ public:
   /// Represent the type of SchedCandidate found within a single queue.
   /// pickNodeBidirectional depends on these listed by decreasing priority.
   enum CandReason {
-    NoCand, SingleExcess, SingleCritical, Cluster,
+    NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak,
     ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
     TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse,
     NodeOrder};
@@ -1191,6 +1387,8 @@ protected:
                          const RegPressureTracker &RPTracker,
                          SchedCandidate &Candidate);
 
+  void reschedulePhysRegCopies(SUnit *SU, bool isTop);
+
 #ifndef NDEBUG
   void traceCandidate(const SchedCandidate &Cand);
 #endif
@@ -1339,6 +1537,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
   for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
        I != E; ++I) {
     unsigned L = getUnscheduledLatency(*I);
+    DEBUG(dbgs() << "  " << Available.getName()
+          << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n');
     if (L > RemLatency)
       RemLatency = L;
   }
@@ -1349,10 +1549,13 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
       RemLatency = L;
   }
   unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
+  DEBUG(dbgs() << "  " << Available.getName()
+        << " ExpectedLatency " << ExpectedLatency
+        << " CP Limit " << CriticalPathLimit << '\n');
   if (RemLatency + ExpectedLatency >= CriticalPathLimit
       && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
     Policy.ReduceLatency = true;
-    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
+    DEBUG(dbgs() << "  Increase ILP: " << Available.getName() << '\n');
   }
 }
 
@@ -1569,7 +1772,8 @@ void ConvergingScheduler::balanceZones(
   if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
       > (int)SchedModel->getLatencyFactor()) {
     CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
-    DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
+    DEBUG(dbgs() << "  Balance " << CriticalZone.Available.getName()
+          << " reduce "
           << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name
           << '\n');
   }
@@ -1580,7 +1784,8 @@ void ConvergingScheduler::balanceZones(
   if ((int)(OppositeZone.ExpectedCount - OppositeCount)
       > (int)SchedModel->getLatencyFactor()) {
     OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx;
-    DEBUG(dbgs() << "Balance " << OppositeZone.Available.getName() << " demand "
+    DEBUG(dbgs() << "  Balance " << OppositeZone.Available.getName()
+          << " demand "
           << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name
           << '\n');
   }
@@ -1604,7 +1809,7 @@ void ConvergingScheduler::checkResourceLimits(
     if (Top.CritResIdx != Rem.CritResIdx) {
       TopCand.Policy.ReduceResIdx = Top.CritResIdx;
       BotCand.Policy.ReduceResIdx = Bot.CritResIdx;
-      DEBUG(dbgs() << "Reduce scheduled "
+      DEBUG(dbgs() << "  Reduce scheduled "
             << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n');
     }
     return;
@@ -1621,7 +1826,7 @@ void ConvergingScheduler::checkResourceLimits(
         && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) {
       TopCand.Policy.ReduceLatency = true;
       BotCand.Policy.ReduceLatency = true;
-      DEBUG(dbgs() << "Reduce scheduled latency " << Top.ExpectedLatency
+      DEBUG(dbgs() << "  Reduce scheduled latency " << Top.ExpectedLatency
             << " + " << Bot.ExpectedLatency << '\n');
     }
     return;
@@ -1696,6 +1901,34 @@ static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
 }
 
+/// Minimize physical register live ranges. Regalloc wants them adjacent to
+/// their physreg def/use.
+///
+/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
+/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
+/// with the operation that produces or consumes the physreg. We'll do this when
+/// regalloc has support for parallel copies.
+static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
+  const MachineInstr *MI = SU->getInstr();
+  if (!MI->isCopy())
+    return 0;
+
+  unsigned ScheduledOper = isTop ? 1 : 0;
+  unsigned UnscheduledOper = isTop ? 0 : 1;
+  // If we have already scheduled the physreg produce/consumer, immediately
+  // schedule the copy.
+  if (TargetRegisterInfo::isPhysicalRegister(
+        MI->getOperand(ScheduledOper).getReg()))
+    return 1;
+  // If the physreg is at the boundary, defer it. Otherwise schedule it
+  // immediately to free the dependent. We can hoist the copy later.
+  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
+  if (TargetRegisterInfo::isPhysicalRegister(
+        MI->getOperand(UnscheduledOper).getReg()))
+    return AtBoundary ? -1 : 1;
+  return 0;
+}
+
 /// Apply a set of heursitics to a new candidate. Heuristics are currently
 /// hierarchical. This may be more efficient than a graduated cost model because
 /// we don't need to evaluate all aspects of the model for each node in the
@@ -1723,6 +1956,12 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
     TryCand.Reason = NodeOrder;
     return;
   }
+
+  if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
+                 biasPhysRegCopy(Cand.SU, Zone.isTop()),
+                 TryCand, Cand, PhysRegCopy))
+    return;
+
   // Avoid exceeding the target's limit.
   if (tryLess(TryCand.RPDelta.Excess.UnitIncrease,
               Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess))
@@ -1749,12 +1988,16 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
   if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
                  TryCand, Cand, Cluster))
     return;
-  // Currently, weak edges are for clustering, so we hard-code that reason.
-  // However, deferring the current TryCand will not change Cand's reason.
+
+  // Weak edges are for clustering and other constraints.
+  //
+  // Deferring TryCand here does not change Cand's reason. This is good in the
+  // sense that a bad candidate shouldn't affect a previous candidate's
+  // goodness, but bad in that it is assymetric and depends on queue order.
   CandReason OrigReason = Cand.Reason;
   if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
               getWeakLeft(Cand.SU, Zone.isTop()),
-              TryCand, Cand, Cluster)) {
+              TryCand, Cand, Weak)) {
     Cand.Reason = OrigReason;
     return;
   }
@@ -1825,20 +2068,20 @@ static bool compareRPDelta(const RegPressureDelta &LHS,
 
   // Avoid increasing the max critical pressure in the scheduled region.
   if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) {
-    DEBUG(dbgs() << "RP excess top - bot: "
+    DEBUG(dbgs() << "  RP excess top - bot: "
           << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n');
     return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease;
   }
   // Avoid increasing the max critical pressure in the scheduled region.
   if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) {
-    DEBUG(dbgs() << "RP critical top - bot: "
+    DEBUG(dbgs() << "  RP critical top - bot: "
           << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease)
           << '\n');
     return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease;
   }
   // Avoid increasing the max pressure of the entire region.
   if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) {
-    DEBUG(dbgs() << "RP current top - bot: "
+    DEBUG(dbgs() << "  RP current top - bot: "
           << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease)
           << '\n');
     return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease;
@@ -1851,9 +2094,11 @@ const char *ConvergingScheduler::getReasonStr(
   ConvergingScheduler::CandReason Reason) {
   switch (Reason) {
   case NoCand:         return "NOCAND    ";
+  case PhysRegCopy:    return "PREG-COPY";
   case SingleExcess:   return "REG-EXCESS";
   case SingleCritical: return "REG-CRIT  ";
   case Cluster:        return "CLUSTER   ";
+  case Weak:           return "WEAK      ";
   case SingleMax:      return "REG-MAX   ";
   case MultiPressure:  return "REG-MULTI ";
   case ResourceReduce: return "RES-REDUCE";
@@ -1953,8 +2198,7 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
 
 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
                       bool IsTop) {
-  DEBUG(dbgs() << "Pick " << (IsTop ? "Top" : "Bot")
-        << " SU(" << Cand.SU->NodeNum << ") "
+  DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
         << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
 }
 
@@ -1964,10 +2208,12 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
   // efficient, but also provides the best heuristics for CriticalPSets.
   if (SUnit *SU = Bot.pickOnlyChoice()) {
     IsTopNode = false;
+    DEBUG(dbgs() << "Pick Top NOCAND\n");
     return SU;
   }
   if (SUnit *SU = Top.pickOnlyChoice()) {
     IsTopNode = true;
+    DEBUG(dbgs() << "Pick Bot NOCAND\n");
     return SU;
   }
   CandPolicy NoPolicy;
@@ -2065,21 +2311,53 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
   if (SU->isBottomReady())
     Bot.removeReady(SU);
 
-  DEBUG(dbgs() << "Scheduling " << *SU->getInstr());
+  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
   return SU;
 }
 
+void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
+
+  MachineBasicBlock::iterator InsertPos = SU->getInstr();
+  if (!isTop)
+    ++InsertPos;
+  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
+
+  // Find already scheduled copies with a single physreg dependence and move
+  // them just above the scheduled instruction.
+  for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
+       I != E; ++I) {
+    if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
+      continue;
+    SUnit *DepSU = I->getSUnit();
+    if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
+      continue;
+    MachineInstr *Copy = DepSU->getInstr();
+    if (!Copy->isCopy())
+      continue;
+    DEBUG(dbgs() << "  Rescheduling physreg copy ";
+          I->getSUnit()->dump(DAG));
+    DAG->moveInstruction(Copy, InsertPos);
+  }
+}
+
 /// Update the scheduler's state after scheduling a node. This is the same node
 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
 /// it's state based on the current cycle before MachineSchedStrategy does.
+///
+/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
+/// them here. See comments in biasPhysRegCopy.
 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   if (IsTopNode) {
     SU->TopReadyCycle = Top.CurrCycle;
     Top.bumpNode(SU);
+    if (SU->hasPhysRegUses)
+      reschedulePhysRegCopies(SU, true);
   }
   else {
     SU->BotReadyCycle = Bot.CurrCycle;
     Bot.bumpNode(SU);
+    if (SU->hasPhysRegDefs)
+      reschedulePhysRegCopies(SU, false);
   }
 }
 
@@ -2090,6 +2368,12 @@ static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
          "-misched-topdown incompatible with -misched-bottomup");
   ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
   // Register DAG post-processors.
+  //
+  // FIXME: extend the mutation API to allow earlier mutations to instantiate
+  // data and pass it to later mutations. Have a single mutation that gathers
+  // the interesting nodes in one pass.
+  if (EnableCopyConstrain)
+    DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
   if (EnableLoadCluster)
     DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
   if (EnableMacroFusion)
@@ -2179,12 +2463,12 @@ public:
     SUnit *SU = ReadyQ.back();
     ReadyQ.pop_back();
     IsTopNode = false;
-    DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
-          << *SU->getInstr()
+    DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
           << " ILP: " << DAG->getDFSResult()->getILP(SU)
           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
           << DAG->getDFSResult()->getSubtreeLevel(
-            DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
+            DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
+          << "Scheduling " << *SU->getInstr());
     return SU;
   }
 
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 49d8c4e..00f702c 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -1200,8 +1200,10 @@ unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
   return std::max(Instrs, PRMax);
 }
 
+
 unsigned MachineTraceMetrics::Trace::
-getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks,
+                  ArrayRef<const MCSchedClassDesc*> ExtraInstrs) const {
   // Add up resources above and below the center block.
   ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum());
   ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum());
@@ -1210,6 +1212,18 @@ getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
     unsigned PRCycles = PRDepths[K] + PRHeights[K];
     for (unsigned I = 0; I != Extrablocks.size(); ++I)
       PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
+    for (unsigned I = 0; I != ExtraInstrs.size(); ++I) {
+      const MCSchedClassDesc* SC = ExtraInstrs[I];
+      if (!SC->isValid())
+        continue;
+      for (TargetSchedModel::ProcResIter
+             PI = TE.MTM.SchedModel.getWriteProcResBegin(SC),
+             PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
+        if (PI->ProcResourceIdx != K)
+          continue;
+        PRCycles += (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(K));
+      }
+    }
     PRMax = std::max(PRMax, PRCycles);
   }
   // Convert to cycle count.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 4b12300..037043f 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -472,6 +472,9 @@ void MachineVerifier::visitMachineFunctionBefore() {
     if (MInfo.Succs.size() != I->succ_size())
       report("MBB has duplicate entries in its successor list.", I);
   }
+
+  // Check that the register use lists are sane.
+  MRI->verifyUseLists();
 }
 
 // Does iterator point to a and b as the first two elements?
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 1af65c8..bfbc062 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -93,9 +93,10 @@ static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
 /// simple binary flags that either suppress the pass or do nothing.
 /// i.e. -disable-mypass=false has no effect.
 /// These should be converted to boolOrDefault in order to use applyOverride.
-static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
+static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
+                                       bool Override) {
   if (Override)
-    return 0;
+    return IdentifyingPassPtr();
   return PassID;
 }
 
@@ -103,19 +104,20 @@ static AnalysisID applyDisable(AnalysisID PassID, bool Override) {
 /// flags with ternary conditions. TargetID is passed through by default. The
 /// pass is suppressed when the option is false. When the option is true, the
 /// StandardID is selected if the target provides no default.
-static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
-                                AnalysisID StandardID) {
+static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID,
+                                        cl::boolOrDefault Override,
+                                        AnalysisID StandardID) {
   switch (Override) {
   case cl::BOU_UNSET:
     return TargetID;
   case cl::BOU_TRUE:
-    if (TargetID)
+    if (TargetID.isValid())
       return TargetID;
     if (StandardID == 0)
       report_fatal_error("Target cannot enable pass");
     return StandardID;
   case cl::BOU_FALSE:
-    return 0;
+    return IdentifyingPassPtr();
   }
   llvm_unreachable("Invalid command line option state");
 }
@@ -132,7 +134,8 @@ static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
 /// StandardID may be a pseudo ID. In that case TargetID is the name of the real
 /// pass to run. This allows multiple options to control a single pass depending
 /// on where in the pipeline that pass is added.
-static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
+static IdentifyingPassPtr overridePass(AnalysisID StandardID,
+                                       IdentifyingPassPtr TargetID) {
   if (StandardID == &PostRASchedulerID)
     return applyDisable(TargetID, DisablePostRA);
 
@@ -200,11 +203,11 @@ public:
   // user interface. For example, a target may disable a standard pass by
   // default by substituting a pass ID of zero, and the user may still enable
   // that standard pass with an explicit command line option.
-  DenseMap<AnalysisID,AnalysisID> TargetPasses;
+  DenseMap<AnalysisID,IdentifyingPassPtr> TargetPasses;
 
   /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
   /// is inserted after each instance of the first one.
-  SmallVector<std::pair<AnalysisID, AnalysisID>, 4> InsertedPasses;
+  SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4> InsertedPasses;
 };
 } // namespace llvm
 
@@ -239,9 +242,13 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
 
 /// Insert InsertedPassID pass after TargetPassID.
 void TargetPassConfig::insertPass(AnalysisID TargetPassID,
-                                  AnalysisID InsertedPassID) {
-  assert(TargetPassID != InsertedPassID && "Insert a pass after itself!");
-  std::pair<AnalysisID, AnalysisID> P(TargetPassID, InsertedPassID);
+                                  IdentifyingPassPtr InsertedPassID) {
+  assert(((!InsertedPassID.isInstance() &&
+           TargetPassID != InsertedPassID.getID()) ||
+          (InsertedPassID.isInstance() &&
+           TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
+         "Insert a pass after itself!");
+  std::pair<AnalysisID, IdentifyingPassPtr> P(TargetPassID, InsertedPassID);
   Impl->InsertedPasses.push_back(P);
 }
 
@@ -265,12 +272,12 @@ void TargetPassConfig::setOpt(bool &Opt, bool Val) {
 }
 
 void TargetPassConfig::substitutePass(AnalysisID StandardID,
-                                      AnalysisID TargetID) {
+                                      IdentifyingPassPtr TargetID) {
   Impl->TargetPasses[StandardID] = TargetID;
 }
 
-AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
-  DenseMap<AnalysisID, AnalysisID>::const_iterator
+IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
+  DenseMap<AnalysisID, IdentifyingPassPtr>::const_iterator
     I = Impl->TargetPasses.find(ID);
   if (I == Impl->TargetPasses.end())
     return ID;
@@ -303,24 +310,39 @@ void TargetPassConfig::addPass(Pass *P) {
 
 /// Add a CodeGen pass at this point in the pipeline after checking for target
 /// and command line overrides.
+///
+/// addPass cannot return a pointer to the pass instance because is internal the
+/// PassManager and the instance we create here may already be freed.
 AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
-  AnalysisID TargetID = getPassSubstitution(PassID);
-  AnalysisID FinalID = overridePass(PassID, TargetID);
-  if (FinalID == 0)
-    return FinalID;
-
-  Pass *P = Pass::createPass(FinalID);
-  if (!P)
-    llvm_unreachable("Pass ID not registered");
-  addPass(P);
+  IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
+  IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
+  if (!FinalPtr.isValid())
+    return 0;
+
+  Pass *P;
+  if (FinalPtr.isInstance())
+    P = FinalPtr.getInstance();
+  else {
+    P = Pass::createPass(FinalPtr.getID());
+    if (!P)
+      llvm_unreachable("Pass ID not registered");
+  }
+  AnalysisID FinalID = P->getPassID();
+  addPass(P); // Ends the lifetime of P.
+
   // Add the passes after the pass P if there is any.
-  for (SmallVector<std::pair<AnalysisID, AnalysisID>, 4>::iterator
+  for (SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4>::iterator
          I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end();
        I != E; ++I) {
     if ((*I).first == PassID) {
-      assert((*I).second && "Illegal Pass ID!");
-      Pass *NP = Pass::createPass((*I).second);
-      assert(NP && "Pass ID not registered");
+      assert((*I).second.isValid() && "Illegal Pass ID!");
+      Pass *NP;
+      if ((*I).second.isInstance())
+        NP = (*I).second.getInstance();
+      else {
+        NP = Pass::createPass((*I).second.getID());
+        assert(NP && "Pass ID not registered");
+      }
       addPass(NP);
     }
   }
@@ -687,14 +709,6 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
   addPass(&VirtRegRewriterID);
   printAndVerify("After Virtual Register Rewriter");
 
-  // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
-  // but eventually, all users of it should probably be moved to addPostRA and
-  // it can go away.  Currently, it's the intended place for targets to run
-  // FinalizeMachineBundles, because passes other than MachineScheduling an
-  // RegAlloc itself may not be aware of bundles.
-  if (addFinalizeRegAlloc())
-    printAndVerify("After RegAlloc finalization");
-
   // Perform stack slot coloring and post-ra machine LICM.
   //
   // FIXME: Re-enable coloring with register when it's capable of adding
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index e5872df..959dd7d 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -824,6 +824,12 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
     // The instruction stream may change in the loop, so check BB->end()
     // directly.
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+      // We might end up here again with a NULL iterator if we scavenged a
+      // register for which we inserted spill code for definition by what was
+      // originally the first instruction in BB.
+      if (I == MachineBasicBlock::iterator(NULL))
+        I = BB->begin();
+
       MachineInstr *MI = I;
       MachineBasicBlock::iterator J = llvm::next(I);
       MachineBasicBlock::iterator P = I == BB->begin() ?
@@ -883,8 +889,6 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
           "The register scavenger has an unexpected position");
         I = P;
         RS->unprocess(P);
-
-        // RS->skipTo(I == BB->begin() ? NULL : llvm::prior(I));
       } else
         ++I;
     }
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 0b6dc68..7fcfe9e 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -63,7 +63,7 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
   MachineFunction *MF;
 
   // state
-  std::auto_ptr<Spiller> SpillerInstance;
+  OwningPtr<Spiller> SpillerInstance;
   std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
                       CompSpillWeight> Queue;
 
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 6d84176..9eed1fc 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -78,7 +78,7 @@ class RAGreedy : public MachineFunctionPass,
   LiveDebugVariables *DebugVars;
 
   // state
-  std::auto_ptr<Spiller> SpillerInstance;
+  OwningPtr<Spiller> SpillerInstance;
   std::priority_queue<std::pair<unsigned, unsigned> > Queue;
   unsigned NextCascade;
 
@@ -166,8 +166,8 @@ class RAGreedy : public MachineFunctionPass,
   };
 
   // splitting state.
-  std::auto_ptr<SplitAnalysis> SA;
-  std::auto_ptr<SplitEditor> SE;
+  OwningPtr<SplitAnalysis> SA;
+  OwningPtr<SplitEditor> SE;
 
   /// Cached per-block interference maps
   InterferenceCache IntfCache;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 607edac..15a88e2 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -34,6 +34,7 @@
 #include "llvm/CodeGen/RegAllocPBQP.h"
 #include "RegisterCoalescer.h"
 #include "Spiller.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -89,8 +90,8 @@ public:
   static char ID;
 
   /// Construct a PBQP register allocator.
-  RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0)
-      : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
+  RegAllocPBQP(OwningPtr<PBQPBuilder> &b, char *cPassID=0)
+      : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) {
     initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
     initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
@@ -121,7 +122,7 @@ private:
   typedef std::set<unsigned> RegSet;
 
 
-  std::auto_ptr<PBQPBuilder> builder;
+  OwningPtr<PBQPBuilder> builder;
 
   char *customPassID;
 
@@ -132,7 +133,7 @@ private:
   const MachineLoopInfo *loopInfo;
   MachineRegisterInfo *mri;
 
-  std::auto_ptr<Spiller> spiller;
+  OwningPtr<Spiller> spiller;
   LiveIntervals *lis;
   LiveStacks *lss;
   VirtRegMap *vrm;
@@ -186,16 +187,15 @@ unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
   return allowedSet[option - 1];
 }
 
-std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
-                                                const LiveIntervals *lis,
-                                                const MachineLoopInfo *loopInfo,
-                                                const RegSet &vregs) {
+PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis,
+                                  const MachineLoopInfo *loopInfo,
+                                  const RegSet &vregs) {
 
   LiveIntervals *LIS = const_cast<LiveIntervals*>(lis);
   MachineRegisterInfo *mri = &mf->getRegInfo();
   const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
 
-  std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+  OwningPtr<PBQPRAProblem> p(new PBQPRAProblem());
   PBQP::Graph &g = p->getGraph();
   RegSet pregs;
 
@@ -282,7 +282,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     }
   }
 
-  return p;
+  return p.take();
 }
 
 void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
@@ -311,13 +311,12 @@ void PBQPBuilder::addInterferenceCosts(
   }
 }
 
-std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
-                                                MachineFunction *mf,
+PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf,
                                                 const LiveIntervals *lis,
                                                 const MachineLoopInfo *loopInfo,
                                                 const RegSet &vregs) {
 
-  std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+  OwningPtr<PBQPRAProblem> p(PBQPBuilder::build(mf, lis, loopInfo, vregs));
   PBQP::Graph &g = p->getGraph();
 
   const TargetMachine &tm = mf->getTarget();
@@ -391,7 +390,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
     }
   }
 
-  return p;
+  return p.take();
 }
 
 void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
@@ -584,8 +583,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
     while (!pbqpAllocComplete) {
       DEBUG(dbgs() << "  PBQP Regalloc round " << round << ":\n");
 
-      std::auto_ptr<PBQPRAProblem> problem =
-        builder->build(mf, lis, loopInfo, vregsToAlloc);
+      OwningPtr<PBQPRAProblem> problem(
+        builder->build(mf, lis, loopInfo, vregsToAlloc));
 
 #ifndef NDEBUG
       if (pbqpDumpGraphs) {
@@ -621,18 +620,18 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
 }
 
 FunctionPass* llvm::createPBQPRegisterAllocator(
-                                           std::auto_ptr<PBQPBuilder> builder,
+                                           OwningPtr<PBQPBuilder> &builder,
                                            char *customPassID) {
   return new RegAllocPBQP(builder, customPassID);
 }
 
 FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
-  if (pbqpCoalescing) {
-    return createPBQPRegisterAllocator(
-             std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
-  } // else
-  return createPBQPRegisterAllocator(
-           std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+  OwningPtr<PBQPBuilder> Builder;
+  if (pbqpCoalescing)
+    Builder.reset(new PBQPBuilderWithCoalescing());
+  else
+    Builder.reset(new PBQPBuilder());
+  return createPBQPRegisterAllocator(Builder);
 }
 
 #undef DEBUG_TYPE
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 07ace7a..f82ccbe 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -154,14 +154,13 @@ void RegScavenger::unprocess() {
   assert(Tracking && "Cannot unprocess because we're not tracking");
 
   MachineInstr *MI = MBBI;
-  if (MI->isDebugValue())
-    return;
-
-  determineKillsAndDefs();
+  if (!MI->isDebugValue()) {
+    determineKillsAndDefs();
 
-  // Commit the changes.
-  setUsed(KillRegs);
-  setUnused(DefRegs);
+    // Commit the changes.
+    setUsed(KillRegs);
+    setUnused(DefRegs);
+  }
 
   if (MBBI == MBB->begin()) {
     MBBI = MachineBasicBlock::iterator(NULL);
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 71e7a21..e4da6a4 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -262,6 +262,9 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
       if (UseOp < 0)
         Dep = SDep(SU, SDep::Artificial);
       else {
+        // Set the hasPhysRegDefs only for physreg defs that have a use within
+        // the scheduling region.
+        SU->hasPhysRegDefs = true;
         Dep = SDep(SU, SDep::Data, *Alias);
         RegUse = UseSU->getInstr();
         Dep.setMinLatency(
@@ -318,6 +321,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
   }
 
   if (!MO.isDef()) {
+    SU->hasPhysRegUses = true;
     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
     // retrieve the existing SUnits list for this register's uses.
     // Push this SUnit on the use list.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index eb16095..2e09ec0 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -205,6 +205,7 @@ namespace {
     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
     SDValue visitCTPOP(SDNode *N);
     SDValue visitSELECT(SDNode *N);
+    SDValue visitVSELECT(SDNode *N);
     SDValue visitSELECT_CC(SDNode *N);
     SDValue visitSETCC(SDNode *N);
     SDValue visitSIGN_EXTEND(SDNode *N);
@@ -243,7 +244,6 @@ namespace {
     SDValue visitCONCAT_VECTORS(SDNode *N);
     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
     SDValue visitVECTOR_SHUFFLE(SDNode *N);
-    SDValue visitMEMBARRIER(SDNode *N);
 
     SDValue XformToShuffleWithZero(SDNode *N);
     SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
@@ -1127,6 +1127,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
   case ISD::CTPOP:              return visitCTPOP(N);
   case ISD::SELECT:             return visitSELECT(N);
+  case ISD::VSELECT:            return visitVSELECT(N);
   case ISD::SELECT_CC:          return visitSELECT_CC(N);
   case ISD::SETCC:              return visitSETCC(N);
   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
@@ -1165,7 +1166,6 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
-  case ISD::MEMBARRIER:         return visitMEMBARRIER(N);
   }
   return SDValue();
 }
@@ -4164,6 +4164,46 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitVSELECT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  DebugLoc DL = N->getDebugLoc();
+
+  // Canonicalize integer abs.
+  // vselect (setg[te] X,  0),  X, -X ->
+  // vselect (setgt    X, -1),  X, -X ->
+  // vselect (setl[te] X,  0), -X,  X ->
+  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+  if (N0.getOpcode() == ISD::SETCC) {
+    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+    ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+    bool isAbs = false;
+    bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+    if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+         (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
+        N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
+      isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
+    else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
+             N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
+      isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
+
+    if (isAbs) {
+      EVT VT = LHS.getValueType();
+      SDValue Shift = DAG.getNode(
+          ISD::SRA, DL, VT, LHS,
+          DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT));
+      SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
+      AddToWorkList(Shift.getNode());
+      AddToWorkList(Add.getNode());
+      return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -4453,7 +4493,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   if (N0.getOpcode() == ISD::SETCC) {
     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
     // Only do this before legalize for now.
-    if (VT.isVector() && !LegalOperations) {
+    if (VT.isVector() && !LegalOperations &&
+        TLI.getBooleanContents(true) == 
+          TargetLowering::ZeroOrNegativeOneBooleanContent) {
       EVT N0VT = N0.getOperand(0).getValueType();
       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
       // of the same size as the compared operands. Only optimize sext(setcc())
@@ -7110,25 +7152,40 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
            BasePtr.getNode() && "Expected BasePtr operand");
 
-    APInt OV =
-      cast<ConstantSDNode>(Offset)->getAPIntValue();
-    if (AM == ISD::PRE_DEC)
-      OV = -OV;
+    // We need to replace ptr0 in the following expression:
+    //   x0 * offset0 + y0 * ptr0 = t0
+    // knowing that
+    //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
+    // 
+    // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
+    // indexed load/store and the expresion that needs to be re-written.
+    //
+    // Therefore, we have:
+    //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
 
     ConstantSDNode *CN =
       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
-    APInt CNV = CN->getAPIntValue();
-    if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1)
-      CNV += OV;
-    else
-      CNV -= OV;
+    int X0, X1, Y0, Y1;
+    APInt Offset0 = CN->getAPIntValue();
+    APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
 
-    SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0);
-    SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0));
-    if (OffsetIdx == 0)
-      std::swap(NewOp1, NewOp2);
+    X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
+    Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
+    X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
+    Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
 
-    SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(),
+    unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
+
+    APInt CNV = Offset0;
+    if (X0 < 0) CNV = -CNV;
+    if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
+    else CNV = CNV - Offset1;
+
+    // We can now generate the new expression.
+    SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0));
+    SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
+
+    SDValue NewUse = DAG.getNode(Opcode,
                                  OtherUses[i]->getDebugLoc(),
                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
@@ -9065,6 +9122,51 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   if (ISD::allOperandsUndef(N))
     return DAG.getUNDEF(N->getValueType(0));
 
+  // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
+  // nodes often generate nop CONCAT_VECTOR nodes.
+  // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
+  // place the incoming vectors at the exact same location.
+  SDValue SingleSource = SDValue();
+  unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDValue Op = N->getOperand(i);
+
+    if (Op.getOpcode() == ISD::UNDEF)
+      continue;
+
+    // Check if this is the identity extract:
+    if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+      return SDValue();
+
+    // Find the single incoming vector for the extract_subvector.
+    if (SingleSource.getNode()) {
+      if (Op.getOperand(0) != SingleSource)
+        return SDValue();
+    } else {
+      SingleSource = Op.getOperand(0);
+
+      // Check the source type is the same as the type of the result.
+      // If not, this concat may extend the vector, so we can not
+      // optimize it away.
+      if (SingleSource.getValueType() != N->getValueType(0))
+        return SDValue();
+    }
+
+    unsigned IdentityIndex = i * PartNumElem;
+    ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+    // The extract index must be constant.
+    if (!CS)
+      return SDValue();
+    
+    // Check that we are reading from the identity index.
+    if (CS->getZExtValue() != IdentityIndex)
+      return SDValue();
+  }
+
+  if (SingleSource.getNode())
+    return SingleSource;
+  
   return SDValue();
 }
 
@@ -9125,6 +9227,44 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
   return SDValue();
 }
 
+// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat.
+static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  unsigned NumElts = VT.getVectorNumElements();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+  SmallVector<SDValue, 4> Ops;
+  EVT ConcatVT = N0.getOperand(0).getValueType();
+  unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
+  unsigned NumConcats = NumElts / NumElemsPerConcat;
+
+  // Look at every vector that's inserted. We're looking for exact
+  // subvector-sized copies from a concatenated vector
+  for (unsigned I = 0; I != NumConcats; ++I) {
+    // Make sure we're dealing with a copy.
+    unsigned Begin = I * NumElemsPerConcat;
+    if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
+      return SDValue();
+
+    for (unsigned J = 1; J != NumElemsPerConcat; ++J) {
+      if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
+        return SDValue();
+    }
+
+    unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
+    if (FirstElt < N0.getNumOperands())
+      Ops.push_back(N0.getOperand(FirstElt));
+    else
+      Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
+  }
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(),
+                     Ops.size());
+}
+
 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   EVT VT = N->getValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
@@ -9226,6 +9366,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
     }
   }
 
+  if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
+      Level < AfterLegalizeVectorOps &&
+      (N1.getOpcode() == ISD::UNDEF ||
+      (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+       N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
+    SDValue V = partitionShuffleOfConcats(N, DAG);
+
+    if (V.getNode())
+      return V;
+  }
+
   // If this shuffle node is simply a swizzle of another shuffle node,
   // and it reverses the swizzle of the previous shuffle then we can
   // optimize shuffle(shuffle(x, undef), undef) -> x.
@@ -9262,59 +9413,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   return SDValue();
 }
 
-SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) {
-  if (!TLI.getShouldFoldAtomicFences())
-    return SDValue();
-
-  SDValue atomic = N->getOperand(0);
-  switch (atomic.getOpcode()) {
-    case ISD::ATOMIC_CMP_SWAP:
-    case ISD::ATOMIC_SWAP:
-    case ISD::ATOMIC_LOAD_ADD:
-    case ISD::ATOMIC_LOAD_SUB:
-    case ISD::ATOMIC_LOAD_AND:
-    case ISD::ATOMIC_LOAD_OR:
-    case ISD::ATOMIC_LOAD_XOR:
-    case ISD::ATOMIC_LOAD_NAND:
-    case ISD::ATOMIC_LOAD_MIN:
-    case ISD::ATOMIC_LOAD_MAX:
-    case ISD::ATOMIC_LOAD_UMIN:
-    case ISD::ATOMIC_LOAD_UMAX:
-      break;
-    default:
-      return SDValue();
-  }
-
-  SDValue fence = atomic.getOperand(0);
-  if (fence.getOpcode() != ISD::MEMBARRIER)
-    return SDValue();
-
-  switch (atomic.getOpcode()) {
-    case ISD::ATOMIC_CMP_SWAP:
-      return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
-                                    fence.getOperand(0),
-                                    atomic.getOperand(1), atomic.getOperand(2),
-                                    atomic.getOperand(3)), atomic.getResNo());
-    case ISD::ATOMIC_SWAP:
-    case ISD::ATOMIC_LOAD_ADD:
-    case ISD::ATOMIC_LOAD_SUB:
-    case ISD::ATOMIC_LOAD_AND:
-    case ISD::ATOMIC_LOAD_OR:
-    case ISD::ATOMIC_LOAD_XOR:
-    case ISD::ATOMIC_LOAD_NAND:
-    case ISD::ATOMIC_LOAD_MIN:
-    case ISD::ATOMIC_LOAD_MAX:
-    case ISD::ATOMIC_LOAD_UMIN:
-    case ISD::ATOMIC_LOAD_UMAX:
-      return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
-                                    fence.getOperand(0),
-                                    atomic.getOperand(1), atomic.getOperand(2)),
-                     atomic.getResNo());
-    default:
-      return SDValue();
-  }
-}
-
 /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
 /// an AND to a vector_shuffle with the destination vector and a zero vector.
 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 9ac738e..288499a 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1505,3 +1505,61 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
 
   return true;
 }
+
+bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) {
+  assert(LI->hasOneUse() &&
+      "tryToFoldLoad expected a LoadInst with a single use");
+  // We know that the load has a single use, but don't know what it is.  If it
+  // isn't one of the folded instructions, then we can't succeed here.  Handle
+  // this by scanning the single-use users of the load until we get to FoldInst.
+  unsigned MaxUsers = 6;  // Don't scan down huge single-use chains of instrs.
+
+  const Instruction *TheUser = LI->use_back();
+  while (TheUser != FoldInst &&   // Scan up until we find FoldInst.
+         // Stay in the right block.
+         TheUser->getParent() == FoldInst->getParent() &&
+         --MaxUsers) {  // Don't scan too far.
+    // If there are multiple or no uses of this instruction, then bail out.
+    if (!TheUser->hasOneUse())
+      return false;
+
+    TheUser = TheUser->use_back();
+  }
+
+  // If we didn't find the fold instruction, then we failed to collapse the
+  // sequence.
+  if (TheUser != FoldInst)
+    return false;
+
+  // Don't try to fold volatile loads.  Target has to deal with alignment
+  // constraints.
+  if (LI->isVolatile())
+    return false;
+
+  // Figure out which vreg this is going into.  If there is no assigned vreg yet
+  // then there actually was no reference to it.  Perhaps the load is referenced
+  // by a dead instruction.
+  unsigned LoadReg = getRegForValue(LI);
+  if (LoadReg == 0)
+    return false;
+
+  // We can't fold if this vreg has no uses or more than one use.  Multiple uses
+  // may mean that the instruction got lowered to multiple MIs, or the use of
+  // the loaded value ended up being multiple operands of the result.
+  if (!MRI.hasOneUse(LoadReg))
+    return false;
+
+  MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg);
+  MachineInstr *User = &*RI;
+
+  // Set the insertion point properly.  Folding the load can cause generation of
+  // other random instructions (like sign extends) for addressing modes; make
+  // sure they get inserted in a logical place before the new instruction.
+  FuncInfo.InsertPt = User;
+  FuncInfo.MBB = User->getParent();
+
+  // Ask the target to try folding the load.
+  return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI);
+}
+
+
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 51cc254..2a1d8c2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2759,8 +2759,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(DAG.getConstant(0, MVT::i32));
     Results.push_back(Node->getOperand(0));
     break;
-  case ISD::ATOMIC_FENCE:
-  case ISD::MEMBARRIER: {
+  case ISD::ATOMIC_FENCE: {
     // If the target didn't lower this, lower it to '__sync_synchronize()' call
     // FIXME: handle "fence singlethread" more efficiently.
     TargetLowering::ArgListTy Args;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index d19c13b..cd2f060 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -777,7 +777,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
                           Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
   case ISD::INSERT_VECTOR_ELT:
                           Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
-  case ISD::MEMBARRIER:   Res = PromoteIntOp_MEMBARRIER(N); break;
   case ISD::SCALAR_TO_VECTOR:
                           Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
   case ISD::VSELECT:
@@ -961,17 +960,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
                                 N->getOperand(1), Idx), 0);
 }
 
-SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
-  SDValue NewOps[6];
-  DebugLoc dl = N->getDebugLoc();
-  NewOps[0] = N->getOperand(0);
-  for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {
-    SDValue Flag = GetPromotedInteger(N->getOperand(i));
-    NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
-  }
-  return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0);
-}
-
 SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
   // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
   // the operand in place.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 54ea926..1c4274a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -270,7 +270,6 @@ private:
   SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
   SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
-  SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
   SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
   SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
@@ -582,6 +581,7 @@ private:
   SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue SplitVecOp_TRUNCATE(SDNode *N);
   SDValue SplitVecOp_VSETCC(SDNode *N);
   SDValue SplitVecOp_FP_ROUND(SDNode *N);
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 5ec8535..04c6bfd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1046,6 +1046,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
     case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
     case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break;
+    case ISD::TRUNCATE:          Res = SplitVecOp_TRUNCATE(N); break;
     case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
     case ISD::STORE:
       Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
@@ -1062,7 +1063,6 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP:
     case ISD::FTRUNC:
-    case ISD::TRUNCATE:
     case ISD::SIGN_EXTEND:
     case ISD::ZERO_EXTEND:
     case ISD::ANY_EXTEND:
@@ -1272,8 +1272,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
   DebugLoc DL = N->getDebugLoc();
 
-  // The input operands all must have the same type, and we know the result the
-  // result type is valid.  Convert this to a buildvector which extracts all the
+  // The input operands all must have the same type, and we know the result
+  // type is valid.  Convert this to a buildvector which extracts all the
   // input elements.
   // TODO: If the input elements are power-two vectors, we could convert this to
   // a new CONCAT_VECTORS node with elements that are half-wide.
@@ -1293,6 +1293,66 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
                      &Elts[0], Elts.size());
 }
 
+SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
+  // The result type is legal, but the input type is illegal.  If splitting
+  // ends up with the result type of each half still being legal, just
+  // do that.  If, however, that would result in an illegal result type,
+  // we can try to get more clever with power-two vectors. Specifically,
+  // split the input type, but also widen the result element size, then
+  // concatenate the halves and truncate again.  For example, consider a target
+  // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit
+  // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do:
+  //   %inlo = v4i32 extract_subvector %in, 0
+  //   %inhi = v4i32 extract_subvector %in, 4
+  //   %lo16 = v4i16 trunc v4i32 %inlo
+  //   %hi16 = v4i16 trunc v4i32 %inhi
+  //   %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16
+  //   %res = v8i8 trunc v8i16 %in16
+  //
+  // Without this transform, the original truncate would end up being
+  // scalarized, which is pretty much always a last resort.
+  SDValue InVec = N->getOperand(0);
+  EVT InVT = InVec->getValueType(0);
+  EVT OutVT = N->getValueType(0);
+  unsigned NumElements = OutVT.getVectorNumElements();
+  // Widening should have already made sure this is a power-two vector
+  // if we're trying to split it at all. assert() that's true, just in case.
+  assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+
+  unsigned InElementSize = InVT.getVectorElementType().getSizeInBits();
+  unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits();
+
+  // If the input elements are only 1/2 the width of the result elements,
+  // just use the normal splitting. Our trick only work if there's room
+  // to split more than once.
+  if (InElementSize <= OutElementSize * 2)
+    return SplitVecOp_UnaryOp(N);
+  DebugLoc DL = N->getDebugLoc();
+
+  // Extract the halves of the input via extract_subvector.
+  EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
+                                 InVT.getVectorElementType(), NumElements/2);
+  SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
+                                DAG.getIntPtrConstant(0));
+  SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec,
+                                DAG.getIntPtrConstant(NumElements/2));
+  // Truncate them to 1/2 the element size.
+  EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
+  EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
+                                NumElements/2);
+  SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec);
+  SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec);
+  // Concatenate them to get the full intermediate truncation result.
+  EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
+  SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
+                                 HalfHi);
+  // Now finish up by truncating all the way down to the original result
+  // type. This should normally be something that ends up being legal directly,
+  // but in theory if a target has very wide vectors and an annoyingly
+  // restricted set of legal types, this split can chain to build things up.
+  return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+}
+
 SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
   assert(N->getValueType(0).isVector() &&
          N->getOperand(0).getValueType().isVector() &&
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6424431..15235c8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2785,7 +2785,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
   }
 
   // Handle the scalar case first.
-  if (Outputs.size() == 1)
+  if (Scalar1 && Scalar2)
     return Outputs.back();
 
   // Otherwise build a big vector out of the scalar elements we generated.
@@ -5252,14 +5252,14 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
   SDVTList VTs = getVTList(VT);
-  return getMachineNode(Opcode, dl, VTs, 0, 0);
+  return getMachineNode(Opcode, dl, VTs, None);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5267,7 +5267,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
                              SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5275,20 +5275,20 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
                              SDValue Op1, SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT);
   SDValue Ops[] = { Op1, Op2, Op3 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(VT);
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
   SDVTList VTs = getVTList(VT1, VT2);
-  return getMachineNode(Opcode, dl, VTs, 0, 0);
+  return getMachineNode(Opcode, dl, VTs, None);
 }
 
 MachineSDNode *
@@ -5296,7 +5296,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              EVT VT1, EVT VT2, SDValue Op1) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5304,7 +5304,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5313,15 +5313,15 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2);
   SDValue Ops[] = { Op1, Op2, Op3 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              EVT VT1, EVT VT2,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(VT1, VT2);
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5330,7 +5330,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   SDValue Ops[] = { Op1, Op2 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
@@ -5339,39 +5339,41 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              SDValue Op1, SDValue Op2, SDValue Op3) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
   SDValue Ops[] = { Op1, Op2, Op3 };
-  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              EVT VT1, EVT VT2, EVT VT3,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(VT1, VT2, VT3);
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
                              EVT VT2, EVT VT3, EVT VT4,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
                              ArrayRef<EVT> ResultTys,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
-  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+  return getMachineNode(Opcode, dl, VTs, Ops);
 }
 
 MachineSDNode *
 SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
-                             const SDValue *Ops, unsigned NumOps) {
+                             ArrayRef<SDValue> OpsArray) {
   bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
   MachineSDNode *N;
   void *IP = 0;
+  const SDValue *Ops = OpsArray.data();
+  unsigned NumOps = OpsArray.size();
 
   if (DoCSE) {
     FoldingSetNodeID ID;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ce40cd6..67db211 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -314,7 +314,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
     } else {
       Ctx.emitError(ErrMsg);
     }
-    report_fatal_error("Cannot handle scalar-to-vector conversion!");
+    return DAG.getUNDEF(ValueVT);
   }
 
   if (ValueVT.getVectorNumElements() == 1 &&
@@ -5034,6 +5034,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, Res);
     return 0;
   }
+  case Intrinsic::annotation:
+  case Intrinsic::ptr_annotation:
+    // Drop the intrinsic, but forward the value
+    setValue(&I, getValue(I.getOperand(0)));
+    return 0;
   case Intrinsic::var_annotation:
     // Discard annotate attributes
     return 0;
@@ -5232,6 +5237,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     Entry.isSRet = true;
     Entry.isNest = false;
     Entry.isByVal = false;
+    Entry.isReturned = false;
     Entry.Alignment = Align;
     Args.push_back(Entry);
     RetTy = Type::getVoidTy(FTy->getContext());
@@ -5249,13 +5255,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     Entry.Node = ArgNode; Entry.Ty = V->getType();
 
     unsigned attrInd = i - CS.arg_begin() + 1;
-    Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
-    Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
-    Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
-    Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
-    Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
-    Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
-    Entry.Alignment = CS.getParamAlignment(attrInd);
+    Entry.isSExt     = CS.paramHasAttr(attrInd, Attribute::SExt);
+    Entry.isZExt     = CS.paramHasAttr(attrInd, Attribute::ZExt);
+    Entry.isInReg    = CS.paramHasAttr(attrInd, Attribute::InReg);
+    Entry.isSRet     = CS.paramHasAttr(attrInd, Attribute::StructRet);
+    Entry.isNest     = CS.paramHasAttr(attrInd, Attribute::Nest);
+    Entry.isByVal    = CS.paramHasAttr(attrInd, Attribute::ByVal);
+    Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned);
+    Entry.Alignment  = CS.getParamAlignment(attrInd);
     Args.push_back(Entry);
   }
 
@@ -6169,10 +6176,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
           MatchedRegs.RegVTs.push_back(RegVT);
           MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
           for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
-               i != e; ++i)
-            MatchedRegs.Regs.push_back
-              (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
-
+               i != e; ++i) {
+            if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
+              MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC));
+            else {
+              LLVMContext &Ctx = *DAG.getContext();
+              Ctx.emitError(CS.getInstruction(), "inline asm error: This value"
+                            " type register class is not natively supported!");
+              report_fatal_error("inline asm error: This value type register "
+                                 "class is not natively supported!");
+            }
+          }
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
                                     Chain, &Flag, CS.getInstruction());
@@ -6389,6 +6403,28 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
 /// migrated to using LowerCall, this hook should be integrated into SDISel.
 std::pair<SDValue, SDValue>
 TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
+  // Handle the incoming return values from the call.
+  CLI.Ins.clear();
+  SmallVector<EVT, 4> RetTys;
+  ComputeValueVTs(*this, CLI.RetTy, RetTys);
+  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+    EVT VT = RetTys[I];
+    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      ISD::InputArg MyFlags;
+      MyFlags.VT = RegisterVT;
+      MyFlags.Used = CLI.IsReturnValueUsed;
+      if (CLI.RetSExt)
+        MyFlags.Flags.setSExt();
+      if (CLI.RetZExt)
+        MyFlags.Flags.setZExt();
+      if (CLI.IsInReg)
+        MyFlags.Flags.setInReg();
+      CLI.Ins.push_back(MyFlags);
+    }
+  }
+
   // Handle all of the outgoing arguments.
   CLI.Outs.clear();
   CLI.OutVals.clear();
@@ -6442,6 +6478,26 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       else if (Args[i].isZExt)
         ExtendKind = ISD::ZERO_EXTEND;
 
+      // Conservatively only handle 'returned' on non-vectors for now
+      if (Args[i].isReturned && !Op.getValueType().isVector()) {
+        assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
+               "unexpected use of 'returned'");
+        // Before passing 'returned' to the target lowering code, ensure that
+        // either the register MVT and the actual EVT are the same size or that
+        // the return value and argument are extended in the same way; in these
+        // cases it's safe to pass the argument register value unchanged as the
+        // return register value (although it's at the target's option whether
+        // to do so)
+        // TODO: allow code generation to take advantage of partially preserved
+        // registers rather than clobbering the entire register when the
+        // parameter extension method is not compatible with the return
+        // extension method
+        if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
+            (ExtendKind != ISD::ANY_EXTEND &&
+             CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt))
+        Flags.setReturned();
+      }
+
       getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
                      PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind);
 
@@ -6461,28 +6517,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     }
   }
 
-  // Handle the incoming return values from the call.
-  CLI.Ins.clear();
-  SmallVector<EVT, 4> RetTys;
-  ComputeValueVTs(*this, CLI.RetTy, RetTys);
-  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-    EVT VT = RetTys[I];
-    MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
-    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
-    for (unsigned i = 0; i != NumRegs; ++i) {
-      ISD::InputArg MyFlags;
-      MyFlags.VT = RegisterVT;
-      MyFlags.Used = CLI.IsReturnValueUsed;
-      if (CLI.RetSExt)
-        MyFlags.Flags.setSExt();
-      if (CLI.RetZExt)
-        MyFlags.Flags.setZExt();
-      if (CLI.IsInReg)
-        MyFlags.Flags.setInReg();
-      CLI.Ins.push_back(MyFlags);
-    }
-  }
-
   SmallVector<SDValue, 4> InVals;
   CLI.Chain = LowerCall(CLI, InVals);
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 3b5823b..47b0391 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -54,7 +54,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::DELETED_NODE:               return "<<Deleted Node!>>";
 #endif
   case ISD::PREFETCH:                   return "Prefetch";
-  case ISD::MEMBARRIER:                 return "MemBarrier";
   case ISD::ATOMIC_FENCE:               return "AtomicFence";
   case ISD::ATOMIC_CMP_SWAP:            return "AtomicCmpSwap";
   case ISD::ATOMIC_SWAP:                return "AtomicSwap";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index eeea9e4..e21f26e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -63,12 +63,16 @@ STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
 STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
 STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
 STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+STATISTIC(NumEntryBlocks, "Number of entry blocks encountered");
+STATISTIC(NumFastIselFailLowerArguments,
+          "Number of entry blocks where fast isel failed to lower arguments");
 
 #ifndef NDEBUG
 static cl::opt<bool>
 EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
           cl::desc("Enable extra verbose messages in the \"fast\" "
                    "instruction selector"));
+
   // Terminators
 STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
 STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
@@ -742,7 +746,7 @@ public:
 } // end anonymous namespace
 
 void SelectionDAGISel::DoInstructionSelection() {
-  DEBUG(errs() << "===== Instruction selection begins: BB#"
+  DEBUG(dbgs() << "===== Instruction selection begins: BB#"
         << FuncInfo->MBB->getNumber()
         << " '" << FuncInfo->MBB->getName() << "'\n");
 
@@ -801,7 +805,7 @@ void SelectionDAGISel::DoInstructionSelection() {
     CurDAG->setRoot(Dummy.getValue());
   }
 
-  DEBUG(errs() << "===== Instruction selection ends:\n");
+  DEBUG(dbgs() << "===== Instruction selection ends:\n");
 
   PostprocessISelDAG();
 }
@@ -831,84 +835,6 @@ void SelectionDAGISel::PrepareEHLandingPad() {
   if (Reg) MBB->addLiveIn(Reg);
 }
 
-/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
-/// load into the specified FoldInst.  Note that we could have a sequence where
-/// multiple LLVM IR instructions are folded into the same machineinstr.  For
-/// example we could have:
-///   A: x = load i32 *P
-///   B: y = icmp A, 42
-///   C: br y, ...
-///
-/// In this scenario, LI is "A", and FoldInst is "C".  We know about "B" (and
-/// any other folded instructions) because it is between A and C.
-///
-/// If we succeed in folding the load into the operation, return true.
-///
-bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
-                                             const Instruction *FoldInst,
-                                             FastISel *FastIS) {
-  // We know that the load has a single use, but don't know what it is.  If it
-  // isn't one of the folded instructions, then we can't succeed here.  Handle
-  // this by scanning the single-use users of the load until we get to FoldInst.
-  unsigned MaxUsers = 6;  // Don't scan down huge single-use chains of instrs.
-
-  const Instruction *TheUser = LI->use_back();
-  while (TheUser != FoldInst &&   // Scan up until we find FoldInst.
-         // Stay in the right block.
-         TheUser->getParent() == FoldInst->getParent() &&
-         --MaxUsers) {  // Don't scan too far.
-    // If there are multiple or no uses of this instruction, then bail out.
-    if (!TheUser->hasOneUse())
-      return false;
-
-    TheUser = TheUser->use_back();
-  }
-
-  // If we didn't find the fold instruction, then we failed to collapse the
-  // sequence.
-  if (TheUser != FoldInst)
-    return false;
-
-  // Don't try to fold volatile loads.  Target has to deal with alignment
-  // constraints.
-  if (LI->isVolatile()) return false;
-
-  // Figure out which vreg this is going into.  If there is no assigned vreg yet
-  // then there actually was no reference to it.  Perhaps the load is referenced
-  // by a dead instruction.
-  unsigned LoadReg = FastIS->getRegForValue(LI);
-  if (LoadReg == 0)
-    return false;
-
-  // Check to see what the uses of this vreg are.  If it has no uses, or more
-  // than one use (at the machine instr level) then we can't fold it.
-  MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
-  if (RI == RegInfo->reg_end())
-    return false;
-
-  // See if there is exactly one use of the vreg.  If there are multiple uses,
-  // then the instruction got lowered to multiple machine instructions or the
-  // use of the loaded value ended up being multiple operands of the result, in
-  // either case, we can't fold this.
-  MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
-  if (PostRI != RegInfo->reg_end())
-    return false;
-
-  assert(RI.getOperand().isUse() &&
-         "The only use of the vreg must be a use, we haven't emitted the def!");
-
-  MachineInstr *User = &*RI;
-
-  // Set the insertion point properly.  Folding the load can cause generation of
-  // other random instructions (like sign extends) for addressing modes, make
-  // sure they get inserted in a logical place before the new instruction.
-  FuncInfo->InsertPt = User;
-  FuncInfo->MBB = User->getParent();
-
-  // Ask the target to try folding the load.
-  return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
-}
-
 /// isFoldedOrDeadInstruction - Return true if the specified instruction is
 /// side-effect free and is either dead or folded into a generated instruction.
 /// Return false if it needs to be emitted.
@@ -1054,9 +980,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       // Emit code for any incoming arguments. This must happen before
       // beginning FastISel on the entry block.
       if (LLVMBB == &Fn.getEntryBlock()) {
+        ++NumEntryBlocks;
+
         // Lower any arguments needed in this block if this is the entry block.
         if (!FastIS->LowerArguments()) {
           // Fast isel failed to lower these arguments
+          ++NumFastIselFailLowerArguments;
           if (EnableFastISelAbortArgs)
             llvm_unreachable("FastISel didn't lower all arguments");
 
@@ -1106,7 +1035,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           }
           if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
               BeforeInst->hasOneUse() &&
-              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) {
+              FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) {
             // If we succeeded, don't re-select the load.
             BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
             --NumFastIselRemaining;
@@ -1178,8 +1107,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       FastIS->recomputeInsertPt();
     } else {
       // Lower any arguments needed in this block if this is the entry block.
-      if (LLVMBB == &Fn.getEntryBlock())
+      if (LLVMBB == &Fn.getEntryBlock()) {
+        ++NumEntryBlocks;
         LowerArguments(Fn);
+      }
     }
 
     if (Begin != BI)
@@ -1771,7 +1702,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
   if (!NowDeadNodes.empty())
     CurDAG->RemoveDeadNodes(NowDeadNodes);
 
-  DEBUG(errs() << "ISEL: Match complete!\n");
+  DEBUG(dbgs() << "ISEL: Match complete!\n");
 }
 
 enum ChainResult {
@@ -2276,9 +2207,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   SmallVector<SDNode*, 3> ChainNodesMatched;
   SmallVector<SDNode*, 3> GlueResultNodesMatched;
 
-  DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+  DEBUG(dbgs() << "ISEL: Starting pattern match on root node: ";
         NodeToMatch->dump(CurDAG);
-        errs() << '\n');
+        dbgs() << '\n');
 
   // Determine where to start the interpreter.  Normally we start at opcode #0,
   // but if the state machine starts with an OPC_SwitchOpcode, then we
@@ -2290,7 +2221,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     // Already computed the OpcodeOffset table, just index into it.
     if (N.getOpcode() < OpcodeOffset.size())
       MatcherIndex = OpcodeOffset[N.getOpcode()];
-    DEBUG(errs() << "  Initial Opcode index to " << MatcherIndex << "\n");
+    DEBUG(dbgs() << "  Initial Opcode index to " << MatcherIndex << "\n");
 
   } else if (MatcherTable[0] == OPC_SwitchOpcode) {
     // Otherwise, the table isn't computed, but the state machine does start
@@ -2357,7 +2288,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         if (!Result)
           break;
 
-        DEBUG(errs() << "  Skipped scope entry (due to false predicate) at "
+        DEBUG(dbgs() << "  Skipped scope entry (due to false predicate) at "
                      << "index " << MatcherIndexOfPredicate
                      << ", continuing at " << FailIndex << "\n");
         ++NumDAGIselRetries;
@@ -2487,7 +2418,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (CaseSize == 0) break;
 
       // Otherwise, execute the case we found.
-      DEBUG(errs() << "  OpcodeSwitch from " << SwitchStart
+      DEBUG(dbgs() << "  OpcodeSwitch from " << SwitchStart
                    << " to " << MatcherIndex << "\n");
       continue;
     }
@@ -2519,7 +2450,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (CaseSize == 0) break;
 
       // Otherwise, execute the case we found.
-      DEBUG(errs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+      DEBUG(dbgs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString()
                    << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
       continue;
     }
@@ -2787,7 +2718,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         // If this is a normal EmitNode command, just create the new node and
         // add the results to the RecordedNodes list.
         Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
-                                     VTList, Ops.data(), Ops.size());
+                                     VTList, Ops);
 
         // Add all the non-glue/non-chain results to the RecordedNodes list.
         for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
@@ -2863,9 +2794,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
           ->setMemRefs(MemRefs, MemRefs + NumMemRefs);
       }
 
-      DEBUG(errs() << "  "
+      DEBUG(dbgs() << "  "
                    << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
-                   << " node: "; Res->dump(CurDAG); errs() << "\n");
+                   << " node: "; Res->dump(CurDAG); dbgs() << "\n");
 
       // If this was a MorphNodeTo then we're completely done!
       if (Opcode == OPC_MorphNodeTo) {
@@ -2940,7 +2871,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     // If the code reached this point, then the match failed.  See if there is
     // another child to try in the current 'Scope', otherwise pop it until we
     // find a case to check.
-    DEBUG(errs() << "  Match failed at index " << CurrentOpcodeIndex << "\n");
+    DEBUG(dbgs() << "  Match failed at index " << CurrentOpcodeIndex << "\n");
     ++NumDAGIselRetries;
     while (1) {
       if (MatchScopes.empty()) {
@@ -2960,7 +2891,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
       MatcherIndex = LastScope.FailIndex;
 
-      DEBUG(errs() << "  Continuing at " << MatcherIndex << "\n");
+      DEBUG(dbgs() << "  Continuing at " << MatcherIndex << "\n");
 
       InputChain = LastScope.InputChain;
       InputGlue = LastScope.InputGlue;
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 9ab4918..2feea59 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -70,14 +70,14 @@ ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
 
 // Debugging level for shrink wrapping.
 enum ShrinkWrapDebugLevel {
-  None, BasicInfo, Iterations, Details
+  Disabled, BasicInfo, Iterations, Details
 };
 
 static cl::opt<enum ShrinkWrapDebugLevel>
 ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
   cl::desc("Print shrink wrapping debugging information"),
   cl::values(
-    clEnumVal(None      , "disable debug output"),
+    clEnumVal(Disabled  , "disable debug output"),
     clEnumVal(BasicInfo , "print basic DF sets"),
     clEnumVal(Iterations, "print SR sets for each iteration"),
     clEnumVal(Details   , "print all DF sets"),
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index f42bdbd..8074d16 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -620,12 +620,55 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
 TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
                                        const TargetLoweringObjectFile *tlof)
   : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) {
+  initActions();
+
+  // Perform these initializations only once.
+  IsLittleEndian = TD->isLittleEndian();
+  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
+  MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
+  MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
+    = MaxStoresPerMemmoveOptSize = 4;
+  UseUnderscoreSetJmp = false;
+  UseUnderscoreLongJmp = false;
+  SelectIsExpensive = false;
+  IntDivIsCheap = false;
+  Pow2DivIsCheap = false;
+  JumpIsExpensive = false;
+  PredictableSelectIsExpensive = false;
+  StackPointerRegisterToSaveRestore = 0;
+  ExceptionPointerRegister = 0;
+  ExceptionSelectorRegister = 0;
+  BooleanContents = UndefinedBooleanContent;
+  BooleanVectorContents = UndefinedBooleanContent;
+  SchedPreferenceInfo = Sched::ILP;
+  JumpBufSize = 0;
+  JumpBufAlignment = 0;
+  MinFunctionAlignment = 0;
+  PrefFunctionAlignment = 0;
+  PrefLoopAlignment = 0;
+  MinStackArgumentAlignment = 1;
+  InsertFencesForAtomic = false;
+  SupportJumpTables = true;
+  MinimumJumpTableEntries = 4;
+
+  InitLibcallNames(LibcallRoutineNames, TM);
+  InitCmpLibcallCCs(CmpLibcallCCs);
+  InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLoweringBase::~TargetLoweringBase() {
+  delete &TLOF;
+}
+
+void TargetLoweringBase::initActions() {
   // All operations default to being supported.
   memset(OpActions, 0, sizeof(OpActions));
   memset(LoadExtActions, 0, sizeof(LoadExtActions));
   memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
   memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
   memset(CondCodeActions, 0, sizeof(CondCodeActions));
+  memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+  memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
 
   // Set default actions for various operations.
   for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
@@ -702,45 +745,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
   // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
   //
   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
-
-  IsLittleEndian = TD->isLittleEndian();
-  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
-  memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
-  memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
-  MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
-  MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
-    = MaxStoresPerMemmoveOptSize = 4;
-  UseUnderscoreSetJmp = false;
-  UseUnderscoreLongJmp = false;
-  SelectIsExpensive = false;
-  IntDivIsCheap = false;
-  Pow2DivIsCheap = false;
-  JumpIsExpensive = false;
-  PredictableSelectIsExpensive = false;
-  StackPointerRegisterToSaveRestore = 0;
-  ExceptionPointerRegister = 0;
-  ExceptionSelectorRegister = 0;
-  BooleanContents = UndefinedBooleanContent;
-  BooleanVectorContents = UndefinedBooleanContent;
-  SchedPreferenceInfo = Sched::ILP;
-  JumpBufSize = 0;
-  JumpBufAlignment = 0;
-  MinFunctionAlignment = 0;
-  PrefFunctionAlignment = 0;
-  PrefLoopAlignment = 0;
-  MinStackArgumentAlignment = 1;
-  ShouldFoldAtomicFences = false;
-  InsertFencesForAtomic = false;
-  SupportJumpTables = true;
-  MinimumJumpTableEntries = 4;
-
-  InitLibcallNames(LibcallRoutineNames, TM);
-  InitCmpLibcallCCs(CmpLibcallCCs);
-  InitLibcallCallingConvs(LibcallCallingConvs);
-}
-
-TargetLoweringBase::~TargetLoweringBase() {
-  delete &TLOF;
 }
 
 MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 3bdca4c..7e7359a 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -523,11 +523,6 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
 const MCSection *TargetLoweringObjectFileMachO::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                        Mangler *Mang, const TargetMachine &TM) const {
-
-  // Handle thread local data.
-  if (Kind.isThreadBSS()) return TLSBSSSection;
-  if (Kind.isThreadData()) return TLSDataSection;
-
   if (Kind.isText())
     return GV->isWeakForLinker() ? TextCoalSection : TextSection;
 
@@ -580,6 +575,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   if (Kind.isBSSLocal())
     return DataBSSSection;
 
+  // Handle thread local data.
+  if (Kind.isThreadBSS()) return TLSBSSSection;
+  if (Kind.isThreadData()) return TLSDataSection;
+
   // Otherwise, just drop the variable in the normal data section.
   return DataSection;
 }
@@ -782,3 +781,49 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   return getDataSection();
 }
 
+void TargetLoweringObjectFileCOFF::
+emitModuleFlags(MCStreamer &Streamer,
+                ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+                Mangler *Mang, const TargetMachine &TM) const {
+  MDNode *LinkerOptions = 0;
+
+  // Look for the "Linker Options" flag, since it's the only one we support.
+  for (ArrayRef<Module::ModuleFlagEntry>::iterator
+       i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) {
+    const Module::ModuleFlagEntry &MFE = *i;
+    StringRef Key = MFE.Key->getString();
+    Value *Val = MFE.Val;
+    if (Key == "Linker Options") {
+      LinkerOptions = cast<MDNode>(Val);
+      break;
+    }
+  }
+  if (!LinkerOptions)
+    return;
+
+  // Emit the linker options to the linker .drectve section.  According to the
+  // spec, this section is a space-separated string containing flags for linker.
+  const MCSection *Sec = getDrectveSection();
+  Streamer.SwitchSection(Sec);
+  for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
+    MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
+    for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
+      MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
+      StringRef Op = MDOption->getString();
+      // Lead with a space for consistency with our dllexport implementation.
+      std::string Escaped(" ");
+      if (Op.find(" ") != StringRef::npos) {
+        // The PE-COFF spec says args with spaces must be quoted.  It doesn't say
+        // how to escape quotes, but it probably uses this algorithm:
+        // http://msdn.microsoft.com/en-us/library/17w5ykft(v=vs.85).aspx
+        // FIXME: Reuse escaping code from Support/Windows/Program.inc
+        Escaped.push_back('\"');
+        Escaped.append(Op);
+        Escaped.push_back('\"');
+      } else {
+        Escaped.append(Op);
+      }
+      Streamer.EmitBytes(Escaped);
+    }
+  }
+}
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index 0f59d01..435a5e7 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -50,3 +50,29 @@ StringRef TargetOptions::getTrapFunctionName() const {
   return TrapFuncName;
 }
 
+bool TargetOptions::operator==(const TargetOptions &TO) {
+#define ARE_EQUAL(X) X == TO.X
+  return
+    ARE_EQUAL(UnsafeFPMath) &&
+    ARE_EQUAL(NoInfsFPMath) &&
+    ARE_EQUAL(NoNaNsFPMath) &&
+    ARE_EQUAL(HonorSignDependentRoundingFPMathOption) &&
+    ARE_EQUAL(UseSoftFloat) &&
+    ARE_EQUAL(NoZerosInBSS) &&
+    ARE_EQUAL(JITExceptionHandling) &&
+    ARE_EQUAL(JITEmitDebugInfo) &&
+    ARE_EQUAL(JITEmitDebugInfoToDisk) &&
+    ARE_EQUAL(GuaranteedTailCallOpt) &&
+    ARE_EQUAL(DisableTailCalls) &&
+    ARE_EQUAL(StackAlignmentOverride) &&
+    ARE_EQUAL(RealignStack) &&
+    ARE_EQUAL(SSPBufferSize) &&
+    ARE_EQUAL(EnableFastISel) &&
+    ARE_EQUAL(PositionIndependentExecutable) &&
+    ARE_EQUAL(EnableSegmentedStacks) &&
+    ARE_EQUAL(UseInitArray) &&
+    ARE_EQUAL(TrapFuncName) &&
+    ARE_EQUAL(FloatABIType) &&
+    ARE_EQUAL(AllowFPOpFusion);
+#undef ARE_EQUAL
+}
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 783bfa1..1bf14db 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -128,6 +128,8 @@ resolveSchedClass(const MachineInstr *MI) const {
   // Get the definition's scheduling class descriptor from this machine model.
   unsigned SchedClass = MI->getDesc().getSchedClass();
   const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+  if (!SCDesc->isValid())
+    return SCDesc;
 
 #ifndef NDEBUG
   unsigned NIter = 0;
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index e6dfe10..7ca2bee 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -43,6 +43,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -58,6 +59,12 @@ STATISTIC(Num3AddrSunk,        "Number of 3-address instructions sunk");
 STATISTIC(NumReSchedUps,       "Number of instructions re-scheduled up");
 STATISTIC(NumReSchedDowns,     "Number of instructions re-scheduled down");
 
+// Temporary flag to disable rescheduling.
+static cl::opt<bool>
+EnableRescheduling("twoaddr-reschedule",
+                   cl::desc("Coalesce copies by rescheduling (default=true)"),
+                   cl::init(true), cl::Hidden);
+
 namespace {
 class TwoAddressInstructionPass : public MachineFunctionPass {
   MachineFunction *MF;
@@ -426,10 +433,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
 /// isTwoAddrUse - Return true if the specified MI uses the specified register
 /// as a two-address use. If so, return the destination register by reference.
 static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
-  const MCInstrDesc &MCID = MI.getDesc();
-  unsigned NumOps = MI.isInlineAsm()
-    ? MI.getNumOperands() : MCID.getNumOperands();
-  for (unsigned i = 0; i != NumOps; ++i) {
+  for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
       continue;
@@ -1144,7 +1148,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
 
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule this MI below it.
-  if (rescheduleMIBelowKill(mi, nmi, regB)) {
+  if (EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
     ++NumReSchedDowns;
     return true;
   }
@@ -1163,7 +1167,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
 
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule it before this MI if it's legal.
-  if (rescheduleKillAboveMI(mi, nmi, regB)) {
+  if (EnableRescheduling && rescheduleKillAboveMI(mi, nmi, regB)) {
     ++NumReSchedUps;
     return true;
   }
diff --git a/lib/DebugInfo/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARFCompileUnit.cpp
index e3e4ccd..4f0eed4 100644
--- a/lib/DebugInfo/DWARFCompileUnit.cpp
+++ b/lib/DebugInfo/DWARFCompileUnit.cpp
@@ -9,7 +9,7 @@
 
 #include "DWARFCompileUnit.h"
 #include "DWARFContext.h"
-#include "DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
@@ -165,7 +165,7 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) {
   // we were told to parse
 
   const uint8_t *fixed_form_sizes =
-    DWARFFormValue::getFixedFormSizesForAddressSize(getAddressByteSize());
+    DWARFFormValue::getFixedFormSizes(getAddressByteSize(), getVersion());
 
   while (offset < next_cu_offset &&
          die.extractFast(this, fixed_form_sizes, &offset)) {
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index 9e19310..9f52133 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -9,6 +9,9 @@
 
 #include "DWARFContext.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Path.h"
@@ -107,36 +110,43 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
   }
 
   if (DumpType == DIDT_All || DumpType == DIDT_AbbrevDwo) {
-    OS << "\n.debug_abbrev.dwo contents:\n";
-    getDebugAbbrevDWO()->dump(OS);
+    const DWARFDebugAbbrev *D = getDebugAbbrevDWO();
+    if (D) {
+      OS << "\n.debug_abbrev.dwo contents:\n";
+      getDebugAbbrevDWO()->dump(OS);
+    }
   }
 
-  if (DumpType == DIDT_All || DumpType == DIDT_InfoDwo) {
-    OS << "\n.debug_info.dwo contents:\n";
-    for (unsigned i = 0, e = getNumDWOCompileUnits(); i != e; ++i)
-      getDWOCompileUnitAtIndex(i)->dump(OS);
-  }
+  if (DumpType == DIDT_All || DumpType == DIDT_InfoDwo)
+    if (getNumDWOCompileUnits()) {
+      OS << "\n.debug_info.dwo contents:\n";
+      for (unsigned i = 0, e = getNumDWOCompileUnits(); i != e; ++i)
+        getDWOCompileUnitAtIndex(i)->dump(OS);
+    }
 
-  if (DumpType == DIDT_All || DumpType == DIDT_StrDwo) {
-    OS << "\n.debug_str.dwo contents:\n";
-    DataExtractor strDWOData(getStringDWOSection(), isLittleEndian(), 0);
-    offset = 0;
-    uint32_t strDWOOffset = 0;
-    while (const char *s = strDWOData.getCStr(&offset)) {
-      OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s);
-      strDWOOffset = offset;
+  if (DumpType == DIDT_All || DumpType == DIDT_StrDwo)
+    if (!getStringDWOSection().empty()) {
+      OS << "\n.debug_str.dwo contents:\n";
+      DataExtractor strDWOData(getStringDWOSection(), isLittleEndian(), 0);
+      offset = 0;
+      uint32_t strDWOOffset = 0;
+      while (const char *s = strDWOData.getCStr(&offset)) {
+        OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s);
+        strDWOOffset = offset;
+      }
     }
-  }
 
-  if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) {
-    OS << "\n.debug_str_offsets.dwo contents:\n";
-    DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0);
-    offset = 0;
-    while (offset < getStringOffsetDWOSection().size()) {
-      OS << format("0x%8.8x: ", offset);
-      OS << format("%8.8x\n", strOffsetExt.getU32(&offset));
+  if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo)
+    if (!getStringOffsetDWOSection().empty()) {
+      OS << "\n.debug_str_offsets.dwo contents:\n";
+      DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0);
+      offset = 0;
+      uint64_t size = getStringOffsetDWOSection().size();
+      while (offset < size) {
+        OS << format("0x%8.8x: ", offset);
+        OS << format("%8.8x\n", strOffsetExt.getU32(&offset));
+      }
     }
-  }
 }
 
 const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
@@ -482,6 +492,22 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address,
   return InliningInfo;
 }
 
+static bool consumeCompressedDebugSectionHeader(StringRef &data,
+                                                uint64_t &OriginalSize) {
+  // Consume "ZLIB" prefix.
+  if (!data.startswith("ZLIB"))
+    return false;
+  data = data.substr(4);
+  // Consume uncompressed section size (big-endian 8 bytes).
+  DataExtractor extractor(data, false, 8);
+  uint32_t Offset = 0;
+  OriginalSize = extractor.getU64(&Offset);
+  if (Offset == 0)
+    return false;
+  data = data.substr(Offset);
+  return true;
+}
+
 DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
   IsLittleEndian(Obj->isLittleEndian()),
   AddressSize(Obj->getBytesInAddress()) {
@@ -495,49 +521,55 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
     i->getContents(data);
 
     name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes.
-    if (name == "debug_info")
-      InfoSection = data;
-    else if (name == "debug_abbrev")
-      AbbrevSection = data;
-    else if (name == "debug_line")
-      LineSection = data;
-    else if (name == "debug_aranges")
-      ARangeSection = data;
-    else if (name == "debug_frame")
-      DebugFrameSection = data;
-    else if (name == "debug_str")
-      StringSection = data;
-    else if (name == "debug_ranges") {
+
+    // Check if debug info section is compressed with zlib.
+    if (name.startswith("zdebug_")) {
+      uint64_t OriginalSize;
+      if (!zlib::isAvailable() ||
+          !consumeCompressedDebugSectionHeader(data, OriginalSize))
+        continue;
+      OwningPtr<MemoryBuffer> UncompressedSection;
+      if (zlib::uncompress(data, UncompressedSection, OriginalSize) !=
+          zlib::StatusOK)
+        continue;
+      // Make data point to uncompressed section contents and save its contents.
+      name = name.substr(1);
+      data = UncompressedSection->getBuffer();
+      UncompressedSections.push_back(UncompressedSection.take());
+    }
+
+    StringRef *Section = StringSwitch<StringRef*>(name)
+        .Case("debug_info", &InfoSection)
+        .Case("debug_abbrev", &AbbrevSection)
+        .Case("debug_line", &LineSection)
+        .Case("debug_aranges", &ARangeSection)
+        .Case("debug_frame", &DebugFrameSection)
+        .Case("debug_str", &StringSection)
+        .Case("debug_ranges", &RangeSection)
+        .Case("debug_pubnames", &PubNamesSection)
+        .Case("debug_info.dwo", &InfoDWOSection)
+        .Case("debug_abbrev.dwo", &AbbrevDWOSection)
+        .Case("debug_str.dwo", &StringDWOSection)
+        .Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
+        .Case("debug_addr", &AddrSection)
+        // Any more debug info sections go here.
+        .Default(0);
+    if (!Section)
+      continue;
+    *Section = data;
+    if (name == "debug_ranges") {
       // FIXME: Use the other dwo range section when we emit it.
       RangeDWOSection = data;
-      RangeSection = data;
     }
-    else if (name == "debug_pubnames")
-      PubNamesSection = data;
-    else if (name == "debug_info.dwo")
-      InfoDWOSection = data;
-    else if (name == "debug_abbrev.dwo")
-      AbbrevDWOSection = data;
-    else if (name == "debug_str.dwo")
-      StringDWOSection = data;
-    else if (name == "debug_str_offsets.dwo")
-      StringOffsetDWOSection = data;
-    else if (name == "debug_addr")
-      AddrSection = data;
-    // Any more debug info sections go here.
-    else
-      continue;
 
     // TODO: Add support for relocations in other sections as needed.
     // Record relocations for the debug_info and debug_line sections.
-    RelocAddrMap *Map;
-    if (name == "debug_info")
-      Map = &InfoRelocMap;
-    else if (name == "debug_info.dwo")
-      Map = &InfoDWORelocMap;
-    else if (name == "debug_line")
-      Map = &LineRelocMap;
-    else
+    RelocAddrMap *Map = StringSwitch<RelocAddrMap*>(name)
+        .Case("debug_info", &InfoRelocMap)
+        .Case("debug_info.dwo", &InfoDWORelocMap)
+        .Case("debug_line", &LineRelocMap)
+        .Default(0);
+    if (!Map)
       continue;
 
     if (i->begin_relocations() != i->end_relocations()) {
@@ -547,7 +579,7 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
              reloc_e = i->end_relocations();
            reloc_i != reloc_e; reloc_i.increment(ec)) {
         uint64_t Address;
-        reloc_i->getAddress(Address);
+        reloc_i->getOffset(Address);
         uint64_t Type;
         reloc_i->getType(Type);
         uint64_t SymAddr = 0;
@@ -593,4 +625,8 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) :
   }
 }
 
+DWARFContextInMemory::~DWARFContextInMemory() {
+  DeleteContainerPointers(UncompressedSections);
+}
+
 void DWARFContextInMemory::anchor() { }
diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h
index 37b2729..78c18e6 100644
--- a/lib/DebugInfo/DWARFContext.h
+++ b/lib/DebugInfo/DWARFContext.h
@@ -161,8 +161,11 @@ class DWARFContextInMemory : public DWARFContext {
   StringRef RangeDWOSection;
   StringRef AddrSection;
 
+  SmallVector<MemoryBuffer*, 4> UncompressedSections;
+
 public:
   DWARFContextInMemory(object::ObjectFile *);
+  ~DWARFContextInMemory();
   virtual bool isLittleEndian() const { return IsLittleEndian; }
   virtual uint8_t getAddressSize() const { return AddressSize; }
   virtual const RelocAddrMap &infoRelocMap() const { return InfoRelocMap; }
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
index 02b15d6..10be7b4 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
@@ -11,7 +11,7 @@
 #include "DWARFCompileUnit.h"
 #include "DWARFContext.h"
 #include "DWARFDebugAbbrev.h"
-#include "DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
@@ -94,279 +94,87 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
   OS << ")\n";
 }
 
-bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu,
-                                             const uint8_t *fixed_form_sizes,
-                                             uint32_t *offset_ptr) {
-  Offset = *offset_ptr;
-
-  DataExtractor debug_info_data = cu->getDebugInfoExtractor();
-  uint64_t abbrCode = debug_info_data.getULEB128(offset_ptr);
-
-  assert(fixed_form_sizes); // For best performance this should be specified!
-
-  if (abbrCode) {
-    uint32_t offset = *offset_ptr;
-
-    AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode);
-
-    // Skip all data in the .debug_info for the attributes
-    const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
-    uint32_t i;
-    uint16_t form;
-    for (i=0; i<numAttributes; ++i) {
-
-      form = AbbrevDecl->getFormByIndex(i);
-
-      // FIXME: Currently we're checking if this is less than the last
-      // entry in the fixed_form_sizes table, but this should be changed
-      // to use dynamic dispatch.
-      const uint8_t fixed_skip_size = (form < DW_FORM_ref_sig8) ?
-                                       fixed_form_sizes[form] : 0;
-      if (fixed_skip_size)
-        offset += fixed_skip_size;
-      else {
-        bool form_is_indirect = false;
-        do {
-          form_is_indirect = false;
-          uint32_t form_size = 0;
-          switch (form) {
-          // Blocks if inlined data that have a length field and the data bytes
-          // inlined in the .debug_info.
-          case DW_FORM_exprloc:
-          case DW_FORM_block:
-            form_size = debug_info_data.getULEB128(&offset);
-            break;
-          case DW_FORM_block1:
-            form_size = debug_info_data.getU8(&offset);
-            break;
-          case DW_FORM_block2:
-            form_size = debug_info_data.getU16(&offset);
-            break;
-          case DW_FORM_block4:
-            form_size = debug_info_data.getU32(&offset);
-            break;
-
-          // Inlined NULL terminated C-strings
-          case DW_FORM_string:
-            debug_info_data.getCStr(&offset);
-            break;
-
-          // Compile unit address sized values
-          case DW_FORM_addr:
-          case DW_FORM_ref_addr:
-            form_size = cu->getAddressByteSize();
-            break;
-
-          // 0 sized form.
-          case DW_FORM_flag_present:
-            form_size = 0;
-            break;
-
-          // 1 byte values
-          case DW_FORM_data1:
-          case DW_FORM_flag:
-          case DW_FORM_ref1:
-            form_size = 1;
-            break;
-
-          // 2 byte values
-          case DW_FORM_data2:
-          case DW_FORM_ref2:
-            form_size = 2;
-            break;
-
-          // 4 byte values
-          case DW_FORM_strp:
-          case DW_FORM_data4:
-          case DW_FORM_ref4:
-            form_size = 4;
-            break;
-
-          // 8 byte values
-          case DW_FORM_data8:
-          case DW_FORM_ref8:
-          case DW_FORM_ref_sig8:
-            form_size = 8;
-            break;
-
-          // signed or unsigned LEB 128 values
-          case DW_FORM_sdata:
-          case DW_FORM_udata:
-          case DW_FORM_ref_udata:
-          case DW_FORM_GNU_str_index:
-          case DW_FORM_GNU_addr_index:
-            debug_info_data.getULEB128(&offset);
-            break;
-
-          case DW_FORM_indirect:
-            form_is_indirect = true;
-            form = debug_info_data.getULEB128(&offset);
-            break;
-
-            // FIXME: 64-bit for DWARF64
-          case DW_FORM_sec_offset:
-            debug_info_data.getU32(offset_ptr);
-            break;
-
-          default:
-            *offset_ptr = Offset;
-            return false;
-          }
-          offset += form_size;
-        } while (form_is_indirect);
-      }
-    }
-    *offset_ptr = offset;
-    return true;
-  } else {
+bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *CU,
+                                             const uint8_t *FixedFormSizes,
+                                             uint32_t *OffsetPtr) {
+  Offset = *OffsetPtr;
+  DataExtractor DebugInfoData = CU->getDebugInfoExtractor();
+  uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
+  if (0 == AbbrCode) {
+    // NULL debug tag entry.
     AbbrevDecl = NULL;
-    return true; // NULL debug tag entry
+    return true;
+  }
+  AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
+  assert(AbbrevDecl);
+  assert(FixedFormSizes); // For best performance this should be specified!
+
+  // Skip all data in the .debug_info for the attributes
+  for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) {
+    uint16_t Form = AbbrevDecl->getFormByIndex(i);
+
+    // FIXME: Currently we're checking if this is less than the last
+    // entry in the fixed_form_sizes table, but this should be changed
+    // to use dynamic dispatch.
+    uint8_t FixedFormSize =
+        (Form < DW_FORM_ref_sig8) ? FixedFormSizes[Form] : 0;
+    if (FixedFormSize)
+      *OffsetPtr += FixedFormSize;
+    else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr,
+                                        CU)) {
+      // Restore the original offset.
+      *OffsetPtr = Offset;
+      return false;
+    }
   }
+  return true;
 }
 
 bool
-DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu,
-                                    uint32_t *offset_ptr) {
-  DataExtractor debug_info_data = cu->getDebugInfoExtractor();
-  const uint32_t cu_end_offset = cu->getNextCompileUnitOffset();
-  const uint8_t cu_addr_size = cu->getAddressByteSize();
-  uint32_t offset = *offset_ptr;
-  if ((offset < cu_end_offset) && debug_info_data.isValidOffset(offset)) {
-    Offset = offset;
-
-    uint64_t abbrCode = debug_info_data.getULEB128(&offset);
-
-    if (abbrCode) {
-      AbbrevDecl = cu->getAbbreviations()->getAbbreviationDeclaration(abbrCode);
-
-      if (AbbrevDecl) {
-        uint16_t tag = AbbrevDecl->getTag();
-
-        bool isCompileUnitTag = tag == DW_TAG_compile_unit;
-        if(cu && isCompileUnitTag)
-          const_cast<DWARFCompileUnit*>(cu)->setBaseAddress(0);
-
-        // Skip all data in the .debug_info for the attributes
-        const uint32_t numAttributes = AbbrevDecl->getNumAttributes();
-        for (uint32_t i = 0; i != numAttributes; ++i) {
-          uint16_t attr = AbbrevDecl->getAttrByIndex(i);
-          uint16_t form = AbbrevDecl->getFormByIndex(i);
-
-          if (isCompileUnitTag &&
-              ((attr == DW_AT_entry_pc) || (attr == DW_AT_low_pc))) {
-            DWARFFormValue form_value(form);
-            if (form_value.extractValue(debug_info_data, &offset, cu)) {
-              if (attr == DW_AT_low_pc || attr == DW_AT_entry_pc)
-                const_cast<DWARFCompileUnit*>(cu)
-                  ->setBaseAddress(form_value.getUnsigned());
-            }
-          } else {
-            bool form_is_indirect = false;
-            do {
-              form_is_indirect = false;
-              register uint32_t form_size = 0;
-              switch (form) {
-              // Blocks if inlined data that have a length field and the data
-              // bytes // inlined in the .debug_info
-              case DW_FORM_exprloc:
-              case DW_FORM_block:
-                form_size = debug_info_data.getULEB128(&offset);
-                break;
-              case DW_FORM_block1:
-                form_size = debug_info_data.getU8(&offset);
-                break;
-              case DW_FORM_block2:
-                form_size = debug_info_data.getU16(&offset);
-                break;
-              case DW_FORM_block4:
-                form_size = debug_info_data.getU32(&offset);
-                break;
-
-              // Inlined NULL terminated C-strings
-              case DW_FORM_string:
-                debug_info_data.getCStr(&offset);
-                break;
-
-              // Compile unit address sized values
-              case DW_FORM_addr:
-              case DW_FORM_ref_addr:
-                form_size = cu_addr_size;
-                break;
-
-              // 0 byte value
-              case DW_FORM_flag_present:
-                form_size = 0;
-                break;
-
-              // 1 byte values
-              case DW_FORM_data1:
-              case DW_FORM_flag:
-              case DW_FORM_ref1:
-                form_size = 1;
-                break;
-
-              // 2 byte values
-              case DW_FORM_data2:
-              case DW_FORM_ref2:
-                form_size = 2;
-                break;
-
-                // 4 byte values
-              case DW_FORM_strp:
-                form_size = 4;
-                break;
-
-              case DW_FORM_data4:
-              case DW_FORM_ref4:
-                form_size = 4;
-                break;
-
-              // 8 byte values
-              case DW_FORM_data8:
-              case DW_FORM_ref8:
-              case DW_FORM_ref_sig8:
-                form_size = 8;
-                break;
-
-              // signed or unsigned LEB 128 values
-              case DW_FORM_sdata:
-              case DW_FORM_udata:
-              case DW_FORM_ref_udata:
-              case DW_FORM_GNU_str_index:
-              case DW_FORM_GNU_addr_index:
-                debug_info_data.getULEB128(&offset);
-                break;
-
-              case DW_FORM_indirect:
-                form = debug_info_data.getULEB128(&offset);
-                form_is_indirect = true;
-                break;
-
-                // FIXME: 64-bit for DWARF64.
-              case DW_FORM_sec_offset:
-                debug_info_data.getU32(offset_ptr);
-                break;
-
-              default:
-                *offset_ptr = offset;
-                return false;
-              }
-
-              offset += form_size;
-            } while (form_is_indirect);
-          }
-        }
-        *offset_ptr = offset;
-        return true;
+DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *CU,
+                                    uint32_t *OffsetPtr) {
+  DataExtractor DebugInfoData = CU->getDebugInfoExtractor();
+  const uint32_t CUEndOffset = CU->getNextCompileUnitOffset();
+  Offset = *OffsetPtr;
+  if ((Offset >= CUEndOffset) || !DebugInfoData.isValidOffset(Offset))
+    return false;
+  uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
+  if (0 == AbbrCode) {
+    // NULL debug tag entry.
+    AbbrevDecl = NULL;
+    return true;
+  }
+  AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
+  if (0 == AbbrevDecl) {
+    // Restore the original offset.
+    *OffsetPtr = Offset;
+    return false;
+  }
+  bool IsCompileUnitTag = (AbbrevDecl->getTag() == DW_TAG_compile_unit);
+  if (IsCompileUnitTag)
+    const_cast<DWARFCompileUnit*>(CU)->setBaseAddress(0);
+
+  // Skip all data in the .debug_info for the attributes
+  for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) {
+    uint16_t Attr = AbbrevDecl->getAttrByIndex(i);
+    uint16_t Form = AbbrevDecl->getFormByIndex(i);
+
+    if (IsCompileUnitTag &&
+        ((Attr == DW_AT_entry_pc) || (Attr == DW_AT_low_pc))) {
+      DWARFFormValue FormValue(Form);
+      if (FormValue.extractValue(DebugInfoData, OffsetPtr, CU)) {
+        if (Attr == DW_AT_low_pc || Attr == DW_AT_entry_pc)
+          const_cast<DWARFCompileUnit*>(CU)
+            ->setBaseAddress(FormValue.getUnsigned());
       }
-    } else {
-      AbbrevDecl = NULL;
-      *offset_ptr = offset;
-      return true;    // NULL debug tag entry
+    } else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr,
+                                          CU)) {
+      // Restore the original offset.
+      *OffsetPtr = Offset;
+      return false;
     }
   }
-
-  return false;
+  return true;
 }
 
 bool DWARFDebugInfoEntryMinimal::isSubprogramDIE() const {
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h
index 9c1b2be..9003591 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.h
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.h
@@ -45,12 +45,17 @@ public:
                      uint32_t *offset_ptr, uint16_t attr, uint16_t form,
                      unsigned indent = 0) const;
 
-  bool extractFast(const DWARFCompileUnit *cu, const uint8_t *fixed_form_sizes,
-                   uint32_t *offset_ptr);
+  /// Extracts a debug info entry, which is a child of a given compile unit,
+  /// starting at a given offset. If DIE can't be extracted, returns false and
+  /// doesn't change OffsetPtr.
+  bool extractFast(const DWARFCompileUnit *CU, const uint8_t *FixedFormSizes,
+                   uint32_t *OffsetPtr);
 
   /// Extract a debug info entry for a given compile unit from the
   /// .debug_info and .debug_abbrev data starting at the given offset.
-  bool extract(const DWARFCompileUnit *cu, uint32_t *offset_ptr);
+  /// If compile unit can't be parsed, returns false and doesn't change
+  /// OffsetPtr.
+  bool extract(const DWARFCompileUnit *CU, uint32_t *OffsetPtr);
 
   uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; }
   bool isNULL() const { return AbbrevDecl == 0; }
diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp
index 9f807aa..c5583f9 100644
--- a/lib/DebugInfo/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARFFormValue.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARFFormValue.h"
 #include "DWARFCompileUnit.h"
 #include "DWARFContext.h"
 #include "llvm/Support/Debug.h"
@@ -18,39 +18,16 @@
 using namespace llvm;
 using namespace dwarf;
 
-static const uint8_t form_sizes_addr4[] = {
-  0, // 0x00 unused
-  4, // 0x01 DW_FORM_addr
-  0, // 0x02 unused
-  0, // 0x03 DW_FORM_block2
-  0, // 0x04 DW_FORM_block4
-  2, // 0x05 DW_FORM_data2
-  4, // 0x06 DW_FORM_data4
-  8, // 0x07 DW_FORM_data8
-  0, // 0x08 DW_FORM_string
-  0, // 0x09 DW_FORM_block
-  0, // 0x0a DW_FORM_block1
-  1, // 0x0b DW_FORM_data1
-  1, // 0x0c DW_FORM_flag
-  0, // 0x0d DW_FORM_sdata
-  4, // 0x0e DW_FORM_strp
-  0, // 0x0f DW_FORM_udata
-  4, // 0x10 DW_FORM_ref_addr
-  1, // 0x11 DW_FORM_ref1
-  2, // 0x12 DW_FORM_ref2
-  4, // 0x13 DW_FORM_ref4
-  8, // 0x14 DW_FORM_ref8
-  0, // 0x15 DW_FORM_ref_udata
-  0, // 0x16 DW_FORM_indirect
-  4, // 0x17 DW_FORM_sec_offset
-  0, // 0x18 DW_FORM_exprloc
-  0, // 0x19 DW_FORM_flag_present
-  8, // 0x20 DW_FORM_ref_sig8
+namespace {
+template <uint8_t AddrSize, uint8_t RefAddrSize> struct FixedFormSizes {
+  static const uint8_t sizes[];
 };
+}
 
-static const uint8_t form_sizes_addr8[] = {
+template <uint8_t AddrSize, uint8_t RefAddrSize>
+const uint8_t FixedFormSizes<AddrSize, RefAddrSize>::sizes[] = {
   0, // 0x00 unused
-  8, // 0x01 DW_FORM_addr
+  AddrSize, // 0x01 DW_FORM_addr
   0, // 0x02 unused
   0, // 0x03 DW_FORM_block2
   0, // 0x04 DW_FORM_block4
@@ -65,7 +42,7 @@ static const uint8_t form_sizes_addr8[] = {
   0, // 0x0d DW_FORM_sdata
   4, // 0x0e DW_FORM_strp
   0, // 0x0f DW_FORM_udata
-  8, // 0x10 DW_FORM_ref_addr
+  RefAddrSize, // 0x10 DW_FORM_ref_addr
   1, // 0x11 DW_FORM_ref1
   2, // 0x12 DW_FORM_ref2
   4, // 0x13 DW_FORM_ref4
@@ -78,13 +55,23 @@ static const uint8_t form_sizes_addr8[] = {
   8, // 0x20 DW_FORM_ref_sig8
 };
 
+static uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) {
+  // FIXME: Support DWARF64.
+  return (Version == 2) ? AddrSize : 4;
+}
+
 const uint8_t *
-DWARFFormValue::getFixedFormSizesForAddressSize(uint8_t addr_size) {
-  switch (addr_size) {
-  case 4: return form_sizes_addr4;
-  case 8: return form_sizes_addr8;
-  }
-  return NULL;
+DWARFFormValue::getFixedFormSizes(uint8_t AddrSize, uint16_t Version) {
+  uint8_t RefAddrSize = getRefAddrSize(AddrSize, Version);
+  if (AddrSize == 4 && RefAddrSize == 4)
+    return FixedFormSizes<4, 4>::sizes;
+  if (AddrSize == 4 && RefAddrSize == 8)
+    return FixedFormSizes<4, 8>::sizes;
+  if (AddrSize == 8 && RefAddrSize == 4)
+    return FixedFormSizes<8, 4>::sizes;
+  if (AddrSize == 8 && RefAddrSize == 8)
+    return FixedFormSizes<8, 8>::sizes;
+  return 0;
 }
 
 bool
@@ -100,14 +87,16 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
     switch (Form) {
     case DW_FORM_addr:
     case DW_FORM_ref_addr: {
-      RelocAddrMap::const_iterator AI
-        = cu->getRelocMap()->find(*offset_ptr);
+      uint16_t AddrSize =
+          (Form == DW_FORM_addr)
+              ? cu->getAddressByteSize()
+              : getRefAddrSize(cu->getAddressByteSize(), cu->getVersion());
+      RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr);
       if (AI != cu->getRelocMap()->end()) {
         const std::pair<uint8_t, int64_t> &R = AI->second;
-        Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()) +
-                     R.second;
+        Value.uval = data.getUnsigned(offset_ptr, AddrSize) + R.second;
       } else
-        Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize());
+        Value.uval = data.getUnsigned(offset_ptr, AddrSize);
       break;
     }
     case DW_FORM_exprloc:
@@ -172,10 +161,17 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
       Form = data.getULEB128(offset_ptr);
       indirect = true;
       break;
-    case DW_FORM_sec_offset:
+    case DW_FORM_sec_offset: {
       // FIXME: This is 64-bit for DWARF64.
-      Value.uval = data.getU32(offset_ptr);
+      RelocAddrMap::const_iterator AI
+        = cu->getRelocMap()->find(*offset_ptr);
+      if (AI != cu->getRelocMap()->end()) {
+        const std::pair<uint8_t, int64_t> &R = AI->second;
+        Value.uval = data.getU32(offset_ptr) + R.second;
+      } else
+        Value.uval = data.getU32(offset_ptr);
       break;
+    }
     case DW_FORM_flag_present:
       Value.uval = 1;
       break;
@@ -216,7 +212,6 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
                           uint32_t *offset_ptr, const DWARFCompileUnit *cu) {
   bool indirect = false;
   do {
-    indirect = false;
     switch (form) {
     // Blocks if inlined data that have a length field and the data bytes
     // inlined in the .debug_info
@@ -249,9 +244,11 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
 
     // Compile unit address sized values
     case DW_FORM_addr:
-    case DW_FORM_ref_addr:
       *offset_ptr += cu->getAddressByteSize();
       return true;
+    case DW_FORM_ref_addr:
+      *offset_ptr += getRefAddrSize(cu->getAddressByteSize(), cu->getVersion());
+      return true;
 
     // 0 byte values - implied from the form.
     case DW_FORM_flag_present:
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 906a3a3..e43ba4f 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -948,7 +948,7 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
 
-  if (sys::isLittleEndianHost()) {
+  if (sys::IsLittleEndianHost) {
     // Little-endian host - the source is ordered from LSB to MSB.  Order the
     // destination from LSB to MSB: Do a straight copy.
     memcpy(Dst, Src, StoreBytes);
@@ -1009,7 +1009,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
     break;
   }
 
-  if (sys::isLittleEndianHost() != getDataLayout()->isLittleEndian())
+  if (sys::IsLittleEndianHost != getDataLayout()->isLittleEndian())
     // Host and target are different endian - reverse the stored bytes.
     std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
 }
@@ -1021,7 +1021,7 @@ static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
   uint8_t *Dst = reinterpret_cast<uint8_t *>(
                    const_cast<uint64_t *>(IntVal.getRawData()));
 
-  if (sys::isLittleEndianHost())
+  if (sys::IsLittleEndianHost)
     // Little-endian host - the destination must be ordered from LSB to MSB.
     // The source is ordered from LSB to MSB: Do a straight copy.
     memcpy(Dst, Src, LoadBytes);
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index f4e8246..f9b08a0 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -15,11 +15,33 @@
 #include "llvm-c/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cstring>
 
 using namespace llvm;
 
+// Wrapping the C bindings types.
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue, LLVMGenericValueRef)
+
+inline DataLayout *unwrap(LLVMTargetDataRef P) {
+  return reinterpret_cast<DataLayout*>(P);
+}
+  
+inline LLVMTargetDataRef wrap(const DataLayout *P) {
+  return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
+}
+
+inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
+  return reinterpret_cast<TargetLibraryInfo*>(P);
+}
+
+inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
+  TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
+  return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
+}
+
 /*===-- Operations on generic values --------------------------------------===*/
 
 LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty,
@@ -132,6 +154,59 @@ LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
   return 1;
 }
 
+void LLVMInitializeMCJITCompilerOptions(LLVMMCJITCompilerOptions *PassedOptions,
+                                        size_t SizeOfPassedOptions) {
+  LLVMMCJITCompilerOptions options;
+  options.OptLevel = 0;
+  options.CodeModel = LLVMCodeModelJITDefault;
+  options.NoFramePointerElim = false;
+  options.EnableFastISel = false;
+  
+  memcpy(PassedOptions, &options,
+         std::min(sizeof(options), SizeOfPassedOptions));
+}
+
+LLVMBool LLVMCreateMCJITCompilerForModule(
+    LLVMExecutionEngineRef *OutJIT, LLVMModuleRef M,
+    LLVMMCJITCompilerOptions *PassedOptions, size_t SizeOfPassedOptions,
+    char **OutError) {
+  LLVMMCJITCompilerOptions options;
+  // If the user passed a larger sized options struct, then they were compiled
+  // against a newer LLVM. Tell them that something is wrong.
+  if (SizeOfPassedOptions > sizeof(options)) {
+    *OutError = strdup(
+      "Refusing to use options struct that is larger than my own; assuming "
+      "LLVM library mismatch.");
+    return 1;
+  }
+  
+  // Defend against the user having an old version of the API by ensuring that
+  // any fields they didn't see are cleared. We must defend against fields being
+  // set to the bitwise equivalent of zero, and assume that this means "do the
+  // default" as if that option hadn't been available.
+  LLVMInitializeMCJITCompilerOptions(&options, sizeof(options));
+  memcpy(&options, PassedOptions, SizeOfPassedOptions);
+  
+  TargetOptions targetOptions;
+  targetOptions.NoFramePointerElim = options.NoFramePointerElim;
+  targetOptions.EnableFastISel = options.EnableFastISel;
+
+  std::string Error;
+  EngineBuilder builder(unwrap(M));
+  builder.setEngineKind(EngineKind::JIT)
+         .setErrorStr(&Error)
+         .setUseMCJIT(true)
+         .setOptLevel((CodeGenOpt::Level)options.OptLevel)
+         .setCodeModel(unwrap(options.CodeModel))
+         .setTargetOptions(targetOptions);
+  if (ExecutionEngine *JIT = builder.create()) {
+    *OutJIT = wrap(JIT);
+    return 0;
+  }
+  *OutError = strdup(Error.c_str());
+  return 1;
+}
+
 LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
                                    LLVMModuleProviderRef MP,
                                    char **OutError) {
@@ -176,6 +251,8 @@ void LLVMRunStaticDestructors(LLVMExecutionEngineRef EE) {
 int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
                           unsigned ArgC, const char * const *ArgV,
                           const char * const *EnvP) {
+  unwrap(EE)->finalizeObject();
+  
   std::vector<std::string> ArgVec;
   for (unsigned I = 0; I != ArgC; ++I)
     ArgVec.push_back(ArgV[I]);
@@ -186,6 +263,8 @@ int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
 LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F,
                                     unsigned NumArgs,
                                     LLVMGenericValueRef *Args) {
+  unwrap(EE)->finalizeObject();
+  
   std::vector<GenericValue> ArgVec;
   ArgVec.reserve(NumArgs);
   for (unsigned I = 0; I != NumArgs; ++I)
@@ -234,7 +313,8 @@ LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
   return 1;
 }
 
-void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) {
+void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE,
+                                     LLVMValueRef Fn) {
   return unwrap(EE)->recompileAndRelinkFunction(unwrap<Function>(Fn));
 }
 
@@ -248,5 +328,7 @@ void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
 }
 
 void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) {
+  unwrap(EE)->finalizeObject();
+  
   return unwrap(EE)->getPointerToGlobal(unwrap<GlobalValue>(Global));
 }
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 526c04e..b95a9e8 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -114,6 +114,15 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
       Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \
       break;
 
+#define IMPLEMENT_VECTOR_INTEGER_ICMP(OP, TY)                        \
+  case Type::VectorTyID: {                                           \
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());    \
+    Dest.AggregateVal.resize( Src1.AggregateVal.size() );            \
+    for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++)             \
+      Dest.AggregateVal[_i].IntVal = APInt(1,                        \
+      Src1.AggregateVal[_i].IntVal.OP(Src2.AggregateVal[_i].IntVal));\
+  } break;
+
 // Handle pointers specially because they must be compared with only as much
 // width as the host has.  We _do not_ want to be comparing 64 bit values when
 // running on a 32-bit target, otherwise the upper 32 bits might mess up
@@ -129,6 +138,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(eq,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(eq,Ty);
     IMPLEMENT_POINTER_ICMP(==);
   default:
     dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
@@ -142,6 +152,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(ne,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ne,Ty);
     IMPLEMENT_POINTER_ICMP(!=);
   default:
     dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
@@ -155,6 +166,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(ult,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ult,Ty);
     IMPLEMENT_POINTER_ICMP(<);
   default:
     dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
@@ -168,6 +180,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(slt,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(slt,Ty);
     IMPLEMENT_POINTER_ICMP(<);
   default:
     dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
@@ -181,6 +194,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(ugt,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ugt,Ty);
     IMPLEMENT_POINTER_ICMP(>);
   default:
     dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
@@ -194,6 +208,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(sgt,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(sgt,Ty);
     IMPLEMENT_POINTER_ICMP(>);
   default:
     dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
@@ -207,6 +222,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(ule,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ule,Ty);
     IMPLEMENT_POINTER_ICMP(<=);
   default:
     dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
@@ -220,6 +236,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(sle,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(sle,Ty);
     IMPLEMENT_POINTER_ICMP(<=);
   default:
     dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
@@ -233,6 +250,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(uge,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(uge,Ty);
     IMPLEMENT_POINTER_ICMP(>=);
   default:
     dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
@@ -246,6 +264,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(sge,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(sge,Ty);
     IMPLEMENT_POINTER_ICMP(>=);
   default:
     dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
@@ -285,12 +304,29 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
      Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \
      break
 
+#define IMPLEMENT_VECTOR_FCMP_T(OP, TY)                             \
+  assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());     \
+  Dest.AggregateVal.resize( Src1.AggregateVal.size() );             \
+  for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++)              \
+    Dest.AggregateVal[_i].IntVal = APInt(1,                         \
+    Src1.AggregateVal[_i].TY##Val OP Src2.AggregateVal[_i].TY##Val);\
+  break;
+
+#define IMPLEMENT_VECTOR_FCMP(OP)                                   \
+  case Type::VectorTyID:                                            \
+    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {   \
+      IMPLEMENT_VECTOR_FCMP_T(OP, Float);                           \
+    } else {                                                        \
+        IMPLEMENT_VECTOR_FCMP_T(OP, Double);                        \
+    }
+
 static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(==, Float);
     IMPLEMENT_FCMP(==, Double);
+    IMPLEMENT_VECTOR_FCMP(==);
   default:
     dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -298,17 +334,65 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
   return Dest;
 }
 
+#define IMPLEMENT_SCALAR_NANS(TY, X,Y)                                      \
+  if (TY->isFloatTy()) {                                                    \
+    if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) {             \
+      Dest.IntVal = APInt(1,false);                                         \
+      return Dest;                                                          \
+    }                                                                       \
+  } else {                                                                  \
+    if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) {         \
+      Dest.IntVal = APInt(1,false);                                         \
+      return Dest;                                                          \
+    }                                                                       \
+  }
+
+#define MASK_VECTOR_NANS_T(X,Y, TZ, FLAG)                                   \
+  assert(X.AggregateVal.size() == Y.AggregateVal.size());                   \
+  Dest.AggregateVal.resize( X.AggregateVal.size() );                        \
+  for( uint32_t _i=0;_i<X.AggregateVal.size();_i++) {                       \
+    if (X.AggregateVal[_i].TZ##Val != X.AggregateVal[_i].TZ##Val ||         \
+        Y.AggregateVal[_i].TZ##Val != Y.AggregateVal[_i].TZ##Val)           \
+      Dest.AggregateVal[_i].IntVal = APInt(1,FLAG);                         \
+    else  {                                                                 \
+      Dest.AggregateVal[_i].IntVal = APInt(1,!FLAG);                        \
+    }                                                                       \
+  }
+
+#define MASK_VECTOR_NANS(TY, X,Y, FLAG)                                     \
+  if (TY->isVectorTy()) {                                                   \
+    if (dyn_cast<VectorType>(TY)->getElementType()->isFloatTy()) {          \
+      MASK_VECTOR_NANS_T(X, Y, Float, FLAG)                                 \
+    } else {                                                                \
+      MASK_VECTOR_NANS_T(X, Y, Double, FLAG)                                \
+    }                                                                       \
+  }                                                                         \
+
+
+
 static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
-                                   Type *Ty) {
+                                    Type *Ty)
+{
   GenericValue Dest;
+  // if input is scalar value and Src1 or Src2 is NaN return false
+  IMPLEMENT_SCALAR_NANS(Ty, Src1, Src2)
+  // if vector input detect NaNs and fill mask
+  MASK_VECTOR_NANS(Ty, Src1, Src2, false)
+  GenericValue DestMask = Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(!=, Float);
     IMPLEMENT_FCMP(!=, Double);
-
-  default:
-    dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
-    llvm_unreachable(0);
+    IMPLEMENT_VECTOR_FCMP(!=);
+    default:
+      dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
+      llvm_unreachable(0);
   }
+  // in vector case mask out NaN elements
+  if (Ty->isVectorTy())
+    for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)
+      if (DestMask.AggregateVal[_i].IntVal == false)
+        Dest.AggregateVal[_i].IntVal = APInt(1,false);
+
   return Dest;
 }
 
@@ -318,6 +402,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(<=, Float);
     IMPLEMENT_FCMP(<=, Double);
+    IMPLEMENT_VECTOR_FCMP(<=);
   default:
     dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -331,6 +416,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(>=, Float);
     IMPLEMENT_FCMP(>=, Double);
+    IMPLEMENT_VECTOR_FCMP(>=);
   default:
     dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -344,6 +430,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(<, Float);
     IMPLEMENT_FCMP(<, Double);
+    IMPLEMENT_VECTOR_FCMP(<);
   default:
     dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -357,6 +444,7 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(>, Float);
     IMPLEMENT_FCMP(>, Double);
+    IMPLEMENT_VECTOR_FCMP(>);
   default:
     dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -375,18 +463,32 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
     return Dest;                                                         \
   }
 
+#define IMPLEMENT_VECTOR_UNORDERED(TY, X,Y, _FUNC)                       \
+  if (TY->isVectorTy()) {                                                \
+    GenericValue DestMask = Dest;                                        \
+    Dest = _FUNC(Src1, Src2, Ty);                                        \
+      for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)               \
+        if (DestMask.AggregateVal[_i].IntVal == true)                    \
+          Dest.AggregateVal[_i].IntVal = APInt(1,true);                  \
+      return Dest;                                                       \
+  }
 
 static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OEQ)
   return executeFCMP_OEQ(Src1, Src2, Ty);
+
 }
 
 static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_ONE)
   return executeFCMP_ONE(Src1, Src2, Ty);
 }
 
@@ -394,6 +496,8 @@ static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLE)
   return executeFCMP_OLE(Src1, Src2, Ty);
 }
 
@@ -401,6 +505,8 @@ static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGE)
   return executeFCMP_OGE(Src1, Src2, Ty);
 }
 
@@ -408,6 +514,8 @@ static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLT)
   return executeFCMP_OLT(Src1, Src2, Ty);
 }
 
@@ -415,33 +523,88 @@ static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2,
                                      Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGT)
   return executeFCMP_OGT(Src1, Src2, Ty);
 }
 
 static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
                                      Type *Ty) {
   GenericValue Dest;
-  if (Ty->isFloatTy())
+  if(Ty->isVectorTy()) {
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+    Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+      for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,
+        ( (Src1.AggregateVal[_i].FloatVal ==
+        Src1.AggregateVal[_i].FloatVal) &&
+        (Src2.AggregateVal[_i].FloatVal ==
+        Src2.AggregateVal[_i].FloatVal)));
+    } else {
+      for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,
+        ( (Src1.AggregateVal[_i].DoubleVal ==
+        Src1.AggregateVal[_i].DoubleVal) &&
+        (Src2.AggregateVal[_i].DoubleVal ==
+        Src2.AggregateVal[_i].DoubleVal)));
+    }
+  } else if (Ty->isFloatTy())
     Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && 
                            Src2.FloatVal == Src2.FloatVal));
-  else
+  else {
     Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal && 
                            Src2.DoubleVal == Src2.DoubleVal));
+  }
   return Dest;
 }
 
 static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
                                      Type *Ty) {
   GenericValue Dest;
-  if (Ty->isFloatTy())
+  if(Ty->isVectorTy()) {
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+    Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+      for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,
+        ( (Src1.AggregateVal[_i].FloatVal !=
+           Src1.AggregateVal[_i].FloatVal) ||
+          (Src2.AggregateVal[_i].FloatVal !=
+           Src2.AggregateVal[_i].FloatVal)));
+      } else {
+        for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+          Dest.AggregateVal[_i].IntVal = APInt(1,
+          ( (Src1.AggregateVal[_i].DoubleVal !=
+             Src1.AggregateVal[_i].DoubleVal) ||
+            (Src2.AggregateVal[_i].DoubleVal !=
+             Src2.AggregateVal[_i].DoubleVal)));
+      }
+  } else if (Ty->isFloatTy())
     Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || 
                            Src2.FloatVal != Src2.FloatVal));
-  else
+  else {
     Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal || 
                            Src2.DoubleVal != Src2.DoubleVal));
+  }
   return Dest;
 }
 
+static GenericValue executeFCMP_BOOL(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty, const bool val) {
+  GenericValue Dest;
+    if(Ty->isVectorTy()) {
+      assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+      Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+      for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,val);
+    } else {
+      Dest.IntVal = APInt(1, val);
+    }
+
+    return Dest;
+}
+
 void Interpreter::visitFCmpInst(FCmpInst &I) {
   ExecutionContext &SF = ECStack.back();
   Type *Ty    = I.getOperand(0)->getType();
@@ -450,8 +613,14 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
   GenericValue R;   // Result
   
   switch (I.getPredicate()) {
-  case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break;
-  case FCmpInst::FCMP_TRUE:  R.IntVal = APInt(1,true); break;
+  default:
+    dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
+    llvm_unreachable(0);
+  break;
+  case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false); 
+  break;
+  case FCmpInst::FCMP_TRUE:  R = executeFCMP_BOOL(Src1, Src2, Ty, true); 
+  break;
   case FCmpInst::FCMP_ORD:   R = executeFCMP_ORD(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_UNO:   R = executeFCMP_UNO(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_UEQ:   R = executeFCMP_UEQ(Src1, Src2, Ty); break;
@@ -466,9 +635,6 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
   case FCmpInst::FCMP_OLE:   R = executeFCMP_OLE(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_UGE:   R = executeFCMP_UGE(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_OGE:   R = executeFCMP_OGE(Src1, Src2, Ty); break;
-  default:
-    dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
-    llvm_unreachable(0);
   }
  
   SetValue(&I, R, SF);
@@ -502,16 +668,8 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
   case FCmpInst::FCMP_ULE:   return executeFCMP_ULE(Src1, Src2, Ty);
   case FCmpInst::FCMP_OGE:   return executeFCMP_OGE(Src1, Src2, Ty);
   case FCmpInst::FCMP_UGE:   return executeFCMP_UGE(Src1, Src2, Ty);
-  case FCmpInst::FCMP_FALSE: { 
-    GenericValue Result;
-    Result.IntVal = APInt(1, false);
-    return Result;
-  }
-  case FCmpInst::FCMP_TRUE: {
-    GenericValue Result;
-    Result.IntVal = APInt(1, true);
-    return Result;
-  }
+  case FCmpInst::FCMP_FALSE: return executeFCMP_BOOL(Src1, Src2, Ty, false);
+  case FCmpInst::FCMP_TRUE:  return executeFCMP_BOOL(Src1, Src2, Ty, true);
   default:
     dbgs() << "Unhandled Cmp predicate\n";
     llvm_unreachable(0);
@@ -525,27 +683,105 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue R;   // Result
 
-  switch (I.getOpcode()) {
-  case Instruction::Add:   R.IntVal = Src1.IntVal + Src2.IntVal; break;
-  case Instruction::Sub:   R.IntVal = Src1.IntVal - Src2.IntVal; break;
-  case Instruction::Mul:   R.IntVal = Src1.IntVal * Src2.IntVal; break;
-  case Instruction::FAdd:  executeFAddInst(R, Src1, Src2, Ty); break;
-  case Instruction::FSub:  executeFSubInst(R, Src1, Src2, Ty); break;
-  case Instruction::FMul:  executeFMulInst(R, Src1, Src2, Ty); break;
-  case Instruction::FDiv:  executeFDivInst(R, Src1, Src2, Ty); break;
-  case Instruction::FRem:  executeFRemInst(R, Src1, Src2, Ty); break;
-  case Instruction::UDiv:  R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
-  case Instruction::SDiv:  R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
-  case Instruction::URem:  R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
-  case Instruction::SRem:  R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
-  case Instruction::And:   R.IntVal = Src1.IntVal & Src2.IntVal; break;
-  case Instruction::Or:    R.IntVal = Src1.IntVal | Src2.IntVal; break;
-  case Instruction::Xor:   R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
-  default:
-    dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
-    llvm_unreachable(0);
+  // First process vector operation
+  if (Ty->isVectorTy()) {
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+    R.AggregateVal.resize(Src1.AggregateVal.size());
+
+    // Macros to execute binary operation 'OP' over integer vectors
+#define INTEGER_VECTOR_OPERATION(OP)                               \
+    for (unsigned i = 0; i < R.AggregateVal.size(); ++i)           \
+      R.AggregateVal[i].IntVal =                                   \
+      Src1.AggregateVal[i].IntVal OP Src2.AggregateVal[i].IntVal;
+
+    // Additional macros to execute binary operations udiv/sdiv/urem/srem since
+    // they have different notation.
+#define INTEGER_VECTOR_FUNCTION(OP)                                \
+    for (unsigned i = 0; i < R.AggregateVal.size(); ++i)           \
+      R.AggregateVal[i].IntVal =                                   \
+      Src1.AggregateVal[i].IntVal.OP(Src2.AggregateVal[i].IntVal);
+
+    // Macros to execute binary operation 'OP' over floating point type TY
+    // (float or double) vectors
+#define FLOAT_VECTOR_FUNCTION(OP, TY)                               \
+      for (unsigned i = 0; i < R.AggregateVal.size(); ++i)          \
+        R.AggregateVal[i].TY =                                      \
+        Src1.AggregateVal[i].TY OP Src2.AggregateVal[i].TY;
+
+    // Macros to choose appropriate TY: float or double and run operation
+    // execution
+#define FLOAT_VECTOR_OP(OP) {                                         \
+  if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())        \
+    FLOAT_VECTOR_FUNCTION(OP, FloatVal)                               \
+  else {                                                              \
+    if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())     \
+      FLOAT_VECTOR_FUNCTION(OP, DoubleVal)                            \
+    else {                                                            \
+      dbgs() << "Unhandled type for OP instruction: " << *Ty << "\n"; \
+      llvm_unreachable(0);                                            \
+    }                                                                 \
+  }                                                                   \
+}
+
+    switch(I.getOpcode()){
+    default:
+      dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+      llvm_unreachable(0);
+      break;
+    case Instruction::Add:   INTEGER_VECTOR_OPERATION(+) break;
+    case Instruction::Sub:   INTEGER_VECTOR_OPERATION(-) break;
+    case Instruction::Mul:   INTEGER_VECTOR_OPERATION(*) break;
+    case Instruction::UDiv:  INTEGER_VECTOR_FUNCTION(udiv) break;
+    case Instruction::SDiv:  INTEGER_VECTOR_FUNCTION(sdiv) break;
+    case Instruction::URem:  INTEGER_VECTOR_FUNCTION(urem) break;
+    case Instruction::SRem:  INTEGER_VECTOR_FUNCTION(srem) break;
+    case Instruction::And:   INTEGER_VECTOR_OPERATION(&) break;
+    case Instruction::Or:    INTEGER_VECTOR_OPERATION(|) break;
+    case Instruction::Xor:   INTEGER_VECTOR_OPERATION(^) break;
+    case Instruction::FAdd:  FLOAT_VECTOR_OP(+) break;
+    case Instruction::FSub:  FLOAT_VECTOR_OP(-) break;
+    case Instruction::FMul:  FLOAT_VECTOR_OP(*) break;
+    case Instruction::FDiv:  FLOAT_VECTOR_OP(/) break;
+    case Instruction::FRem:
+      if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())
+        for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+          R.AggregateVal[i].FloatVal = 
+          fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal);
+      else {
+        if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+          for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+            R.AggregateVal[i].DoubleVal = 
+            fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal);
+        else {
+          dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
+          llvm_unreachable(0);
+        }
+      }
+      break;
+    }
+  } else {
+    switch (I.getOpcode()) {
+    default:
+      dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+      llvm_unreachable(0);
+      break;
+    case Instruction::Add:   R.IntVal = Src1.IntVal + Src2.IntVal; break;
+    case Instruction::Sub:   R.IntVal = Src1.IntVal - Src2.IntVal; break;
+    case Instruction::Mul:   R.IntVal = Src1.IntVal * Src2.IntVal; break;
+    case Instruction::FAdd:  executeFAddInst(R, Src1, Src2, Ty); break;
+    case Instruction::FSub:  executeFSubInst(R, Src1, Src2, Ty); break;
+    case Instruction::FMul:  executeFMulInst(R, Src1, Src2, Ty); break;
+    case Instruction::FDiv:  executeFDivInst(R, Src1, Src2, Ty); break;
+    case Instruction::FRem:  executeFRemInst(R, Src1, Src2, Ty); break;
+    case Instruction::UDiv:  R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
+    case Instruction::SDiv:  R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
+    case Instruction::URem:  R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
+    case Instruction::SRem:  R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
+    case Instruction::And:   R.IntVal = Src1.IntVal & Src2.IntVal; break;
+    case Instruction::Or:    R.IntVal = Src1.IntVal | Src2.IntVal; break;
+    case Instruction::Xor:   R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
+    }
   }
-
   SetValue(&I, R, SF);
 }
 
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index fee10e1..38aa547 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ExecutionEngine/MCJIT.h"
 #include "llvm/ExecutionEngine/ObjectBuffer.h"
 #include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
@@ -46,13 +47,14 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
   // FIXME: Don't do this here.
   sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
 
-  return new MCJIT(M, TM, JMM, GVsWithCode);
+  return new MCJIT(M, TM, JMM ? JMM : new SectionMemoryManager(), GVsWithCode);
 }
 
 MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM,
              bool AllocateGVsWithCode)
-  : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM),
-    isCompiled(false), M(m)  {
+  : ExecutionEngine(m), TM(tm), Ctx(0),
+    MemMgr(MM ? MM : new SectionMemoryManager()), Dyld(MemMgr),
+    IsLoaded(false), M(m), ObjCache(0)  {
 
   setDataLayout(TM->getDataLayout());
 }
@@ -64,7 +66,11 @@ MCJIT::~MCJIT() {
   delete TM;
 }
 
-void MCJIT::emitObject(Module *m) {
+void MCJIT::setObjectCache(ObjectCache* NewCache) {
+  ObjCache = NewCache;
+}
+
+ObjectBufferStream* MCJIT::emitObject(Module *m) {
   /// Currently, MCJIT only supports a single module and the module passed to
   /// this function call is expected to be the contained module.  The module
   /// is passed as a parameter here to prepare for multiple module support in
@@ -77,30 +83,66 @@ void MCJIT::emitObject(Module *m) {
   // FIXME: Track compilation state on a per-module basis when multiple modules
   //        are supported.
   // Re-compilation is not supported
-  if (isCompiled)
-    return;
+  assert(!IsLoaded);
 
   PassManager PM;
 
   PM.add(new DataLayout(*TM->getDataLayout()));
 
   // The RuntimeDyld will take ownership of this shortly
-  OwningPtr<ObjectBufferStream> Buffer(new ObjectBufferStream());
+  OwningPtr<ObjectBufferStream> CompiledObject(new ObjectBufferStream());
 
   // Turn the machine code intermediate representation into bytes in memory
   // that may be executed.
-  if (TM->addPassesToEmitMC(PM, Ctx, Buffer->getOStream(), false)) {
+  if (TM->addPassesToEmitMC(PM, Ctx, CompiledObject->getOStream(), false)) {
     report_fatal_error("Target does not support MC emission!");
   }
 
   // Initialize passes.
   PM.run(*m);
   // Flush the output buffer to get the generated code into memory
-  Buffer->flush();
+  CompiledObject->flush();
+
+  // If we have an object cache, tell it about the new object.
+  // Note that we're using the compiled image, not the loaded image (as below).
+  if (ObjCache) {
+    // MemoryBuffer is a thin wrapper around the actual memory, so it's OK
+    // to create a temporary object here and delete it after the call.
+    OwningPtr<MemoryBuffer> MB(CompiledObject->getMemBuffer());
+    ObjCache->notifyObjectCompiled(m, MB.get());
+  }
+
+  return CompiledObject.take();
+}
+
+void MCJIT::loadObject(Module *M) {
+
+  // Get a thread lock to make sure we aren't trying to load multiple times
+  MutexGuard locked(lock);
+
+  // FIXME: Track compilation state on a per-module basis when multiple modules
+  //        are supported.
+  // Re-compilation is not supported
+  if (IsLoaded)
+    return;
+
+  OwningPtr<ObjectBuffer> ObjectToLoad;
+  // Try to load the pre-compiled object from cache if possible
+  if (0 != ObjCache) {
+    OwningPtr<MemoryBuffer> PreCompiledObject(ObjCache->getObjectCopy(M));
+    if (0 != PreCompiledObject.get())
+      ObjectToLoad.reset(new ObjectBuffer(PreCompiledObject.take()));
+  }
+
+  // If the cache did not contain a suitable object, compile the object
+  if (!ObjectToLoad) {
+    ObjectToLoad.reset(emitObject(M));
+    assert(ObjectToLoad.get() && "Compilation did not produce an object.");
+  }
 
   // Load the object into the dynamic linker.
   // handing off ownership of the buffer
-  LoadedObject.reset(Dyld.loadObject(Buffer.take()));
+  LoadedObject.reset(Dyld.loadObject(ObjectToLoad.take()));
   if (!LoadedObject)
     report_fatal_error(Dyld.getErrorString());
 
@@ -113,7 +155,7 @@ void MCJIT::emitObject(Module *m) {
   NotifyObjectEmitted(*LoadedObject);
 
   // FIXME: Add support for per-module compilation state
-  isCompiled = true;
+  IsLoaded = true;
 }
 
 // FIXME: Add a parameter to identify which object is being finalized when
@@ -122,19 +164,18 @@ void MCJIT::emitObject(Module *m) {
 // protection in the interface.
 void MCJIT::finalizeObject() {
   // If the module hasn't been compiled, just do that.
-  if (!isCompiled) {
-    // If the call to Dyld.resolveRelocations() is removed from emitObject()
+  if (!IsLoaded) {
+    // If the call to Dyld.resolveRelocations() is removed from loadObject()
     // we'll need to do that here.
-    emitObject(M);
-
-    // Set page permissions.
-    MemMgr->applyPermissions();
-
-    return;
+    loadObject(M);
+  } else {
+    // Resolve any relocations.
+    Dyld.resolveRelocations();
   }
 
-  // Resolve any relocations.
-  Dyld.resolveRelocations();
+  StringRef EHData = Dyld.getEHFrameSection();
+  if (!EHData.empty())
+    MemMgr->registerEHFrames(EHData);
 
   // Set page permissions.
   MemMgr->applyPermissions();
@@ -151,8 +192,8 @@ void *MCJIT::getPointerToFunction(Function *F) {
   // dies.
 
   // FIXME: Add support for per-module compilation state
-  if (!isCompiled)
-    emitObject(M);
+  if (!IsLoaded)
+    loadObject(M);
 
   if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
     bool AbortOnFailure = !F->hasExternalWeakLinkage();
@@ -284,8 +325,8 @@ GenericValue MCJIT::runFunction(Function *F,
 void *MCJIT::getPointerToNamedFunction(const std::string &Name,
                                        bool AbortOnFailure) {
   // FIXME: Add support for per-module compilation state
-  if (!isCompiled)
-    emitObject(M);
+  if (!IsLoaded)
+    loadObject(M);
 
   if (!isSymbolSearchingDisabled() && MemMgr) {
     void *ptr = MemMgr->getPointerToNamedFunction(Name, false);
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 283a8e5..8c4bf6e 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -12,6 +12,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/PassManager.h"
 
@@ -34,16 +35,23 @@ class MCJIT : public ExecutionEngine {
   SmallVector<JITEventListener*, 2> EventListeners;
 
   // FIXME: Add support for multiple modules
-  bool isCompiled;
+  bool IsLoaded;
   Module *M;
   OwningPtr<ObjectImage> LoadedObject;
 
+  // An optional ObjectCache to be notified of compiled objects and used to
+  // perform lookup of pre-compiled code to avoid re-compilation.
+  ObjectCache *ObjCache;
+
 public:
   ~MCJIT();
 
   /// @name ExecutionEngine interface implementation
   /// @{
 
+  /// Sets the object manager that MCJIT should use to avoid compilation.
+  virtual void setObjectCache(ObjectCache *manager);
+
   virtual void finalizeObject();
 
   virtual void *getPointerToBasicBlock(BasicBlock *BB);
@@ -102,7 +110,9 @@ protected:
   /// this function call is expected to be the contained module.  The module
   /// is passed as a parameter here to prepare for multiple module support in 
   /// the future.
-  void emitObject(Module *M);
+  ObjectBufferStream* emitObject(Module *M);
+
+  void loadObject(Module *M);
 
   void NotifyObjectEmitted(const ObjectImage& Obj);
   void NotifyFreeingObject(const ObjectImage& Obj);
diff --git a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
index fa35acd..bac77ce 100644
--- a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
+++ b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
@@ -138,9 +138,46 @@ bool SectionMemoryManager::applyPermissions(std::string *ErrMsg)
 
   // Read-write data memory already has the correct permissions
 
+  // Some platforms with separate data cache and instruction cache require
+  // explicit cache flush, otherwise JIT code manipulations (like resolved
+  // relocations) will get to the data cache but not to the instruction cache.
+  invalidateInstructionCache();
+
   return false;
 }
 
+// Determine whether we can register EH tables.
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \
+     !defined(__USING_SJLJ_EXCEPTIONS__))
+#define HAVE_EHTABLE_SUPPORT 1
+#else
+#define HAVE_EHTABLE_SUPPORT 0
+#endif
+
+#if HAVE_EHTABLE_SUPPORT
+extern "C" void __register_frame(void*);
+
+static const char *processFDE(const char *Entry) {
+  const char *P = Entry;
+  uint32_t Length = *((uint32_t*)P);
+  P += 4;
+  uint32_t Offset = *((uint32_t*)P);
+  if (Offset != 0)
+    __register_frame((void*)Entry);
+  return P + Length;
+}
+#endif
+
+void SectionMemoryManager::registerEHFrames(StringRef SectionData) {
+#if HAVE_EHTABLE_SUPPORT
+  const char *P = SectionData.data();
+  const char *End = SectionData.data() + SectionData.size();
+  do  {
+    P = processFDE(P);
+  } while(P != End);
+#endif
+}
+
 error_code SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
                                                              unsigned Permissions) {
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 409b25f..a08b508 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -25,10 +25,15 @@ using namespace llvm::object;
 
 // Empty out-of-line virtual destructor as the key function.
 RTDyldMemoryManager::~RTDyldMemoryManager() {}
+void RTDyldMemoryManager::registerEHFrames(StringRef SectionData) {}
 RuntimeDyldImpl::~RuntimeDyldImpl() {}
 
 namespace llvm {
 
+StringRef RuntimeDyldImpl::getEHFrameSection() {
+  return StringRef();
+}
+
 // Resolve the relocations for all symbols we currently know about.
 void RuntimeDyldImpl::resolveRelocations() {
   // First, resolve relocations associated with external symbols.
@@ -96,7 +101,8 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
     bool isCommon = flags & SymbolRef::SF_Common;
     if (isCommon) {
       // Add the common symbols to a list.  We'll allocate them all below.
-      uint64_t Align = getCommonSymbolAlignment(*i);
+      uint32_t Align;
+      Check(i->getAlignment(Align));
       uint64_t Size = 0;
       Check(i->getSize(Size));
       CommonSize += Size + Align;
@@ -154,18 +160,8 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
         isFirstRelocation = false;
       }
 
-      ObjRelocationInfo RI;
-      RI.SectionID = SectionID;
-      Check(i->getAdditionalInfo(RI.AdditionalInfo));
-      Check(i->getOffset(RI.Offset));
-      Check(i->getSymbol(RI.Symbol));
-      Check(i->getType(RI.Type));
-
-      DEBUG(dbgs() << "\t\tAddend: " << RI.AdditionalInfo
-                   << " Offset: " << format("%p", (uintptr_t)RI.Offset)
-                   << " Type: " << (uint32_t)(RI.Type & 0xffffffffL)
-                   << "\n");
-      processRelocationRef(RI, *obj, LocalSections, LocalSymbols, Stubs);
+      processRelocationRef(SectionID, *i, *obj, LocalSections, LocalSymbols,
+			   Stubs);
     }
   }
 
@@ -183,7 +179,7 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj,
   if (!Addr)
     report_fatal_error("Unable to allocate memory for common symbols!");
   uint64_t Offset = 0;
-  Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, TotalSize, 0));
+  Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, 0));
   memset(Addr, 0, TotalSize);
 
   DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID
@@ -243,6 +239,12 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
   Check(Section.isReadOnlyData(IsReadOnly));
   Check(Section.getSize(DataSize));
   Check(Section.getName(Name));
+  if (StubSize > 0) {
+    unsigned StubAlignment = getStubAlignment();
+    unsigned EndAlignment = (DataSize | Alignment) & -(DataSize | Alignment);
+    if (StubAlignment > EndAlignment)
+      StubBufSize += StubAlignment - EndAlignment;
+  }
 
   unsigned Allocate;
   unsigned SectionID = Sections.size();
@@ -295,8 +297,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
                  << "\n");
   }
 
-  Sections.push_back(SectionEntry(Name, Addr, Allocate, DataSize,
-				  (uintptr_t)pData));
+  Sections.push_back(SectionEntry(Name, Addr, DataSize, (uintptr_t)pData));
   return SectionID;
 }
 
@@ -339,7 +340,25 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,
 }
 
 uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
-  if (Arch == Triple::arm) {
+  if (Arch == Triple::aarch64) {
+    // This stub has to be able to access the full address space,
+    // since symbol lookup won't necessarily find a handy, in-range,
+    // PLT stub for functions which could be anywhere.
+    uint32_t *StubAddr = (uint32_t*)Addr;
+
+    // Stub can use ip0 (== x16) to calculate address
+    *StubAddr = 0xd2e00010; // movz ip0, #:abs_g3:<addr>
+    StubAddr++;
+    *StubAddr = 0xf2c00010; // movk ip0, #:abs_g2_nc:<addr>
+    StubAddr++;
+    *StubAddr = 0xf2a00010; // movk ip0, #:abs_g1_nc:<addr>
+    StubAddr++;
+    *StubAddr = 0xf2800010; // movk ip0, #:abs_g0_nc:<addr>
+    StubAddr++;
+    *StubAddr = 0xd61f0200; // br ip0
+
+    return Addr;
+  } else if (Arch == Triple::arm) {
     // TODO: There is only ARM far stub now. We should add the Thumb stub,
     // and stubs for branches Thumb - ARM and ARM - Thumb.
     uint32_t *StubAddr = (uint32_t*)Addr;
@@ -380,6 +399,13 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
     writeInt32BE(Addr+40, 0x4E800420); // bctr
 
     return Addr;
+  } else if (Arch == Triple::systemz) {
+    writeInt16BE(Addr,    0xC418);     // lgrl %r1,.+8
+    writeInt16BE(Addr+2,  0x0000);
+    writeInt16BE(Addr+4,  0x0004);
+    writeInt16BE(Addr+6,  0x07F1);     // brc 15,%r1
+    // 8-byte address stored at Addr + 8
+    return Addr;
   }
   return Addr;
 }
@@ -401,26 +427,14 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
   Sections[SectionID].LoadAddress = Addr;
 }
 
-void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE,
-                                             uint64_t Value) {
-  // Ignore relocations for sections that were not loaded
-  if (Sections[RE.SectionID].Address != 0) {
-    DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
-          << " + " << RE.Offset << " ("
-          << format("%p", Sections[RE.SectionID].Address + RE.Offset) << ")"
-          << " RelType: " << RE.RelType
-          << " Addend: " << RE.Addend
-          << "\n");
-
-    resolveRelocation(Sections[RE.SectionID], RE.Offset,
-                      Value, RE.RelType, RE.Addend);
-  }
-}
-
 void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
                                             uint64_t Value) {
   for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
-    resolveRelocationEntry(Relocs[i], Value);
+    const RelocationEntry &RE = Relocs[i];
+    // Ignore relocations for sections that were not loaded
+    if (Sections[RE.SectionID].Address == 0)
+      continue;
+    resolveRelocation(RE, Value);
   }
 }
 
@@ -534,4 +548,8 @@ StringRef RuntimeDyld::getErrorString() {
   return Dyld->getErrorString();
 }
 
+StringRef RuntimeDyld::getEHFrameSection() {
+  return Dyld->getEHFrameSection();
+}
+
 } // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index b8537b1..d4d84d3 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -41,7 +41,7 @@ error_code check(error_code Err) {
 template<class ELFT>
 class DyldELFObject
   : public ELFObjectFile<ELFT> {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
 
   typedef Elf_Shdr_Impl<ELFT> Elf_Shdr;
   typedef Elf_Sym_Impl<ELFT> Elf_Sym;
@@ -151,6 +151,14 @@ void DyldELFObject<ELFT>::updateSymbolAddress(const SymbolRef &SymRef,
 
 namespace llvm {
 
+StringRef RuntimeDyldELF::getEHFrameSection() {
+  for (int i = 0, e = Sections.size(); i != e; ++i) {
+    if (Sections[i].Name == ".eh_frame")
+      return StringRef((const char*)Sections[i].Address, Sections[i].Size);
+  }
+  return StringRef();
+}
+
 ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) {
   if (Buffer->getBufferSize() < ELF::EI_NIDENT)
     llvm_unreachable("Unexpected ELF object size");
@@ -269,6 +277,85 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section,
   }
 }
 
+void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
+                                              uint64_t Offset,
+                                              uint64_t Value,
+                                              uint32_t Type,
+                                              int64_t Addend) {
+  uint32_t *TargetPtr = reinterpret_cast<uint32_t*>(Section.Address + Offset);
+  uint64_t FinalAddress = Section.LoadAddress + Offset;
+
+  DEBUG(dbgs() << "resolveAArch64Relocation, LocalAddress: 0x"
+               << format("%llx", Section.Address + Offset)
+               << " FinalAddress: 0x" << format("%llx",FinalAddress)
+               << " Value: 0x" << format("%llx",Value)
+               << " Type: 0x" << format("%x",Type)
+               << " Addend: 0x" << format("%llx",Addend)
+               << "\n");
+
+  switch (Type) {
+  default:
+    llvm_unreachable("Relocation type not implemented yet!");
+    break;
+  case ELF::R_AARCH64_ABS64: {
+    uint64_t *TargetPtr = reinterpret_cast<uint64_t*>(Section.Address + Offset);
+    *TargetPtr = Value + Addend;
+    break;
+  }
+  case ELF::R_AARCH64_PREL32: { // test-shift.ll (.eh_frame)
+    uint64_t Result = Value + Addend - FinalAddress;
+    assert(static_cast<int64_t>(Result) >= INT32_MIN && 
+           static_cast<int64_t>(Result) <= UINT32_MAX);
+    *TargetPtr = static_cast<uint32_t>(Result & 0xffffffffU);
+    break;
+  }
+  case ELF::R_AARCH64_CALL26: // fallthrough
+  case ELF::R_AARCH64_JUMP26: {
+    // Operation: S+A-P. Set Call or B immediate value to bits fff_fffc of the
+    // calculation.
+    uint64_t BranchImm = Value + Addend - FinalAddress;
+
+    // "Check that -2^27 <= result < 2^27".
+    assert(-(1LL << 27) <= static_cast<int64_t>(BranchImm) && 
+           static_cast<int64_t>(BranchImm) < (1LL << 27));
+    // Immediate goes in bits 25:0 of B and BL.
+    *TargetPtr |= static_cast<uint32_t>(BranchImm & 0xffffffcU) >> 2;
+    break;
+  }
+  case ELF::R_AARCH64_MOVW_UABS_G3: {
+    uint64_t Result = Value + Addend;
+    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+    *TargetPtr |= Result >> (48 - 5);
+    // Shift is "lsl #48", in bits 22:21
+    *TargetPtr |= 3 << 21;
+    break;
+  }
+  case ELF::R_AARCH64_MOVW_UABS_G2_NC: {
+    uint64_t Result = Value + Addend;
+    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+    *TargetPtr |= ((Result & 0xffff00000000ULL) >> (32 - 5));
+    // Shift is "lsl #32", in bits 22:21
+    *TargetPtr |= 2 << 21;
+    break;
+  }
+  case ELF::R_AARCH64_MOVW_UABS_G1_NC: {
+    uint64_t Result = Value + Addend;
+    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+    *TargetPtr |= ((Result & 0xffff0000U) >> (16 - 5));
+    // Shift is "lsl #16", in bits 22:21
+    *TargetPtr |= 1 << 21;
+    break;
+  }
+  case ELF::R_AARCH64_MOVW_UABS_G0_NC: {
+    uint64_t Result = Value + Addend;
+    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+    *TargetPtr |= ((Result & 0xffffU) << 5);
+    // Shift is "lsl #0", in bits 22:21. No action needed.
+    break;
+  }
+  }
+}
+
 void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
                                           uint64_t Offset,
                                           uint32_t Value,
@@ -541,6 +628,11 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
       llvm_unreachable("Relocation R_PPC64_REL32 overflow");
     writeInt32BE(LocalAddress, delta);
   } break;
+  case ELF::R_PPC64_REL64: {
+    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t Delta = Value - FinalAddress + Addend;
+    writeInt64BE(LocalAddress, Delta);
+  } break;
   case ELF::R_PPC64_ADDR64 :
     writeInt64BE(LocalAddress, Value + Addend);
     break;
@@ -560,6 +652,48 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
   }
 }
 
+void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section,
+                                              uint64_t Offset,
+                                              uint64_t Value,
+                                              uint32_t Type,
+                                              int64_t Addend) {
+  uint8_t *LocalAddress = Section.Address + Offset;
+  switch (Type) {
+  default:
+    llvm_unreachable("Relocation type not implemented yet!");
+    break;
+  case ELF::R_390_PC16DBL:
+  case ELF::R_390_PLT16DBL: {
+    int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+    assert(int16_t(Delta / 2) * 2 == Delta && "R_390_PC16DBL overflow");
+    writeInt16BE(LocalAddress, Delta / 2);
+    break;
+  }
+  case ELF::R_390_PC32DBL:
+  case ELF::R_390_PLT32DBL: {
+    int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+    assert(int32_t(Delta / 2) * 2 == Delta && "R_390_PC32DBL overflow");
+    writeInt32BE(LocalAddress, Delta / 2);
+    break;
+  }
+  case ELF::R_390_PC32: {
+    int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+    assert(int32_t(Delta) == Delta && "R_390_PC32 overflow");
+    writeInt32BE(LocalAddress, Delta);
+    break;
+  }
+  case ELF::R_390_64:
+    writeInt64BE(LocalAddress, Value + Addend);
+    break;
+  }
+}
+
+void RuntimeDyldELF::resolveRelocation(const RelocationEntry &RE,
+				       uint64_t Value) {
+  const SectionEntry &Section = Sections[RE.SectionID];
+  return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend);
+}
+
 void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
                                        uint64_t Offset,
                                        uint64_t Value,
@@ -574,6 +708,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
                          (uint32_t)(Value & 0xffffffffL), Type,
                          (uint32_t)(Addend & 0xffffffffL));
     break;
+  case Triple::aarch64:
+    resolveAArch64Relocation(Section, Offset, Value, Type, Addend);
+    break;
   case Triple::arm:    // Fall through.
   case Triple::thumb:
     resolveARMRelocation(Section, Offset,
@@ -589,19 +726,25 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
   case Triple::ppc64:
     resolvePPC64Relocation(Section, Offset, Value, Type, Addend);
     break;
+  case Triple::systemz:
+    resolveSystemZRelocation(Section, Offset, Value, Type, Addend);
+    break;
   default: llvm_unreachable("Unsupported CPU type!");
   }
 }
 
-void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
+void RuntimeDyldELF::processRelocationRef(unsigned SectionID,
+                                          RelocationRef RelI,
                                           ObjectImage &Obj,
                                           ObjSectionToIDMap &ObjSectionToID,
                                           const SymbolTableMap &Symbols,
                                           StubMap &Stubs) {
-
-  uint32_t RelType = (uint32_t)(Rel.Type & 0xffffffffL);
-  intptr_t Addend = (intptr_t)Rel.AdditionalInfo;
-  const SymbolRef &Symbol = Rel.Symbol;
+  uint64_t RelType;
+  Check(RelI.getType(RelType));
+  int64_t Addend;
+  Check(RelI.getAdditionalInfo(Addend));
+  SymbolRef Symbol;
+  Check(RelI.getSymbol(Symbol));
 
   // Obtain the symbol name which is referenced in the relocation
   StringRef TargetName;
@@ -617,14 +760,14 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
   Symbol.getType(SymType);
   if (lsi != Symbols.end()) {
     Value.SectionID = lsi->second.first;
-    Value.Addend = lsi->second.second;
+    Value.Addend = lsi->second.second + Addend;
   } else {
     // Search for the symbol in the global symbol table
     SymbolTableMap::const_iterator gsi =
         GlobalSymbolTable.find(TargetName.data());
     if (gsi != GlobalSymbolTable.end()) {
       Value.SectionID = gsi->second.first;
-      Value.Addend = gsi->second.second;
+      Value.Addend = gsi->second.second + Addend;
     } else {
       switch (SymType) {
         case SymbolRef::ST_Debug: {
@@ -657,21 +800,73 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
       }
     }
   }
-  DEBUG(dbgs() << "\t\tRel.SectionID: " << Rel.SectionID
-               << " Rel.Offset: " << Rel.Offset
+  uint64_t Offset;
+  Check(RelI.getOffset(Offset));
+
+  DEBUG(dbgs() << "\t\tSectionID: " << SectionID
+               << " Offset: " << Offset
                << "\n");
-  if (Arch == Triple::arm &&
+  if (Arch == Triple::aarch64 &&
+      (RelType == ELF::R_AARCH64_CALL26 ||
+       RelType == ELF::R_AARCH64_JUMP26)) {
+    // This is an AArch64 branch relocation, need to use a stub function.
+    DEBUG(dbgs() << "\t\tThis is an AArch64 branch relocation.");
+    SectionEntry &Section = Sections[SectionID];
+
+    // Look for an existing stub.
+    StubMap::const_iterator i = Stubs.find(Value);
+    if (i != Stubs.end()) {
+        resolveRelocation(Section, Offset,
+                          (uint64_t)Section.Address + i->second, RelType, 0);
+      DEBUG(dbgs() << " Stub function found\n");
+    } else {
+      // Create a new stub function.
+      DEBUG(dbgs() << " Create a new stub function\n");
+      Stubs[Value] = Section.StubOffset;
+      uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+                                                   Section.StubOffset);
+
+      RelocationEntry REmovz_g3(SectionID,
+                                StubTargetAddr - Section.Address,
+                                ELF::R_AARCH64_MOVW_UABS_G3, Value.Addend);
+      RelocationEntry REmovk_g2(SectionID,
+                                StubTargetAddr - Section.Address + 4,
+                                ELF::R_AARCH64_MOVW_UABS_G2_NC, Value.Addend);
+      RelocationEntry REmovk_g1(SectionID,
+                                StubTargetAddr - Section.Address + 8,
+                                ELF::R_AARCH64_MOVW_UABS_G1_NC, Value.Addend);
+      RelocationEntry REmovk_g0(SectionID,
+                                StubTargetAddr - Section.Address + 12,
+                                ELF::R_AARCH64_MOVW_UABS_G0_NC, Value.Addend);
+
+      if (Value.SymbolName) {
+        addRelocationForSymbol(REmovz_g3, Value.SymbolName);
+        addRelocationForSymbol(REmovk_g2, Value.SymbolName);
+        addRelocationForSymbol(REmovk_g1, Value.SymbolName);
+        addRelocationForSymbol(REmovk_g0, Value.SymbolName);
+      } else {
+        addRelocationForSection(REmovz_g3, Value.SectionID);
+        addRelocationForSection(REmovk_g2, Value.SectionID);
+        addRelocationForSection(REmovk_g1, Value.SectionID);
+        addRelocationForSection(REmovk_g0, Value.SectionID);
+      }
+      resolveRelocation(Section, Offset,
+                        (uint64_t)Section.Address + Section.StubOffset,
+                        RelType, 0);
+      Section.StubOffset += getMaxStubSize();
+    }
+  } else if (Arch == Triple::arm &&
       (RelType == ELF::R_ARM_PC24 ||
        RelType == ELF::R_ARM_CALL ||
        RelType == ELF::R_ARM_JUMP24)) {
     // This is an ARM branch relocation, need to use a stub function.
     DEBUG(dbgs() << "\t\tThis is an ARM branch relocation.");
-    SectionEntry &Section = Sections[Rel.SectionID];
+    SectionEntry &Section = Sections[SectionID];
 
     // Look for an existing stub.
     StubMap::const_iterator i = Stubs.find(Value);
     if (i != Stubs.end()) {
-        resolveRelocation(Section, Rel.Offset,
+        resolveRelocation(Section, Offset,
                           (uint64_t)Section.Address + i->second, RelType, 0);
       DEBUG(dbgs() << " Stub function found\n");
     } else {
@@ -680,14 +875,14 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
       Stubs[Value] = Section.StubOffset;
       uint8_t *StubTargetAddr = createStubFunction(Section.Address +
                                                    Section.StubOffset);
-      RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+      RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
                          ELF::R_ARM_ABS32, Value.Addend);
       if (Value.SymbolName)
         addRelocationForSymbol(RE, Value.SymbolName);
       else
         addRelocationForSection(RE, Value.SectionID);
 
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + Section.StubOffset,
                         RelType, 0);
       Section.StubOffset += getMaxStubSize();
@@ -696,8 +891,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
              RelType == ELF::R_MIPS_26) {
     // This is an Mips branch relocation, need to use a stub function.
     DEBUG(dbgs() << "\t\tThis is a Mips branch relocation.");
-    SectionEntry &Section = Sections[Rel.SectionID];
-    uint8_t *Target = Section.Address + Rel.Offset;
+    SectionEntry &Section = Sections[SectionID];
+    uint8_t *Target = Section.Address + Offset;
     uint32_t *TargetAddress = (uint32_t *)Target;
 
     // Extract the addend from the instruction.
@@ -708,7 +903,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
     //  Look up for existing stub.
     StubMap::const_iterator i = Stubs.find(Value);
     if (i != Stubs.end()) {
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + i->second, RelType, 0);
       DEBUG(dbgs() << " Stub function found\n");
     } else {
@@ -719,10 +914,10 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
                                                    Section.StubOffset);
 
       // Creating Hi and Lo relocations for the filled stub instructions.
-      RelocationEntry REHi(Rel.SectionID,
+      RelocationEntry REHi(SectionID,
                            StubTargetAddr - Section.Address,
                            ELF::R_MIPS_HI16, Value.Addend);
-      RelocationEntry RELo(Rel.SectionID,
+      RelocationEntry RELo(SectionID,
                            StubTargetAddr - Section.Address + 4,
                            ELF::R_MIPS_LO16, Value.Addend);
 
@@ -734,7 +929,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
         addRelocationForSection(RELo, Value.SectionID);
       }
 
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + Section.StubOffset,
                         RelType, 0);
       Section.StubOffset += getMaxStubSize();
@@ -744,8 +939,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
       // A PPC branch relocation will need a stub function if the target is
       // an external symbol (Symbol::ST_Unknown) or if the target address
       // is not within the signed 24-bits branch address.
-      SectionEntry &Section = Sections[Rel.SectionID];
-      uint8_t *Target = Section.Address + Rel.Offset;
+      SectionEntry &Section = Sections[SectionID];
+      uint8_t *Target = Section.Address + Offset;
       bool RangeOverflow = false;
       if (SymType != SymbolRef::ST_Unknown) {
         // A function call may points to the .opd entry, so the final symbol value
@@ -755,7 +950,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
         int32_t delta = static_cast<int32_t>(Target - RelocTarget);
         // If it is within 24-bits branch range, just set the branch target
         if (SignExtend32<24>(delta) == delta) {
-          RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+          RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
           if (Value.SymbolName)
             addRelocationForSymbol(RE, Value.SymbolName);
           else
@@ -770,7 +965,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
         StubMap::const_iterator i = Stubs.find(Value);
         if (i != Stubs.end()) {
           // Symbol function stub already created, just relocate to it
-          resolveRelocation(Section, Rel.Offset,
+          resolveRelocation(Section, Offset,
                             (uint64_t)Section.Address + i->second, RelType, 0);
           DEBUG(dbgs() << " Stub function found\n");
         } else {
@@ -779,21 +974,21 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
           Stubs[Value] = Section.StubOffset;
           uint8_t *StubTargetAddr = createStubFunction(Section.Address +
                                                        Section.StubOffset);
-          RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+          RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
                              ELF::R_PPC64_ADDR64, Value.Addend);
 
           // Generates the 64-bits address loads as exemplified in section
           // 4.5.1 in PPC64 ELF ABI.
-          RelocationEntry REhst(Rel.SectionID,
+          RelocationEntry REhst(SectionID,
                                 StubTargetAddr - Section.Address + 2,
                                 ELF::R_PPC64_ADDR16_HIGHEST, Value.Addend);
-          RelocationEntry REhr(Rel.SectionID,
+          RelocationEntry REhr(SectionID,
                                StubTargetAddr - Section.Address + 6,
                                ELF::R_PPC64_ADDR16_HIGHER, Value.Addend);
-          RelocationEntry REh(Rel.SectionID,
+          RelocationEntry REh(SectionID,
                               StubTargetAddr - Section.Address + 14,
                               ELF::R_PPC64_ADDR16_HI, Value.Addend);
-          RelocationEntry REl(Rel.SectionID,
+          RelocationEntry REl(SectionID,
                               StubTargetAddr - Section.Address + 18,
                               ELF::R_PPC64_ADDR16_LO, Value.Addend);
 
@@ -809,7 +1004,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
             addRelocationForSection(REl,   Value.SectionID);
           }
 
-          resolveRelocation(Section, Rel.Offset,
+          resolveRelocation(Section, Offset,
                             (uint64_t)Section.Address + Section.StubOffset,
                             RelType, 0);
           if (SymType == SymbolRef::ST_Unknown)
@@ -819,7 +1014,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
         }
       }
     } else {
-      RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+      RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
       // Extra check to avoid relocation againt empty symbols (usually
       // the R_PPC64_TOC).
       if (Value.SymbolName && !TargetName.empty())
@@ -827,8 +1022,55 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
       else
         addRelocationForSection(RE, Value.SectionID);
     }
+  } else if (Arch == Triple::systemz &&
+             (RelType == ELF::R_390_PLT32DBL ||
+              RelType == ELF::R_390_GOTENT)) {
+    // Create function stubs for both PLT and GOT references, regardless of
+    // whether the GOT reference is to data or code.  The stub contains the
+    // full address of the symbol, as needed by GOT references, and the
+    // executable part only adds an overhead of 8 bytes.
+    //
+    // We could try to conserve space by allocating the code and data
+    // parts of the stub separately.  However, as things stand, we allocate
+    // a stub for every relocation, so using a GOT in JIT code should be
+    // no less space efficient than using an explicit constant pool.
+    DEBUG(dbgs() << "\t\tThis is a SystemZ indirect relocation.");
+    SectionEntry &Section = Sections[SectionID];
+
+    // Look for an existing stub.
+    StubMap::const_iterator i = Stubs.find(Value);
+    uintptr_t StubAddress;
+    if (i != Stubs.end()) {
+      StubAddress = uintptr_t(Section.Address) + i->second;
+      DEBUG(dbgs() << " Stub function found\n");
+    } else {
+      // Create a new stub function.
+      DEBUG(dbgs() << " Create a new stub function\n");
+
+      uintptr_t BaseAddress = uintptr_t(Section.Address);
+      uintptr_t StubAlignment = getStubAlignment();
+      StubAddress = (BaseAddress + Section.StubOffset +
+                     StubAlignment - 1) & -StubAlignment;
+      unsigned StubOffset = StubAddress - BaseAddress;
+
+      Stubs[Value] = StubOffset;
+      createStubFunction((uint8_t *)StubAddress);
+      RelocationEntry RE(SectionID, StubOffset + 8,
+                         ELF::R_390_64, Value.Addend - Addend);
+      if (Value.SymbolName)
+        addRelocationForSymbol(RE, Value.SymbolName);
+      else
+        addRelocationForSection(RE, Value.SectionID);
+      Section.StubOffset = StubOffset + getMaxStubSize();
+    }
+
+    if (RelType == ELF::R_390_GOTENT)
+      resolveRelocation(Section, Offset, StubAddress + 8,
+                        ELF::R_390_PC32DBL, Addend);
+    else
+      resolveRelocation(Section, Offset, StubAddress, RelType, Addend);
   } else {
-    RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+    RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
     if (Value.SymbolName)
       addRelocationForSymbol(RE, Value.SymbolName);
     else
@@ -836,13 +1078,6 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
   }
 }
 
-unsigned RuntimeDyldELF::getCommonSymbolAlignment(const SymbolRef &Sym) {
-  // In ELF, the value of an SHN_COMMON symbol is its alignment requirement.
-  uint64_t Align;
-  Check(Sym.getValue(Align));
-  return Align;
-}
-
 bool RuntimeDyldELF::isCompatibleFormat(const ObjectBuffer *Buffer) const {
   if (Buffer->getBufferSize() < strlen(ELF::ElfMagic))
     return false;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 07e704b..794c7ec 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -31,7 +31,12 @@ namespace {
 } // end anonymous namespace
 
 class RuntimeDyldELF : public RuntimeDyldImpl {
-protected:
+  void resolveRelocation(const SectionEntry &Section,
+                         uint64_t Offset,
+                         uint64_t Value,
+                         uint32_t Type,
+                         int64_t Addend);
+
   void resolveX86_64Relocation(const SectionEntry &Section,
                                uint64_t Offset,
                                uint64_t Value,
@@ -44,6 +49,12 @@ protected:
                             uint32_t Type,
                             int32_t Addend);
 
+  void resolveAArch64Relocation(const SectionEntry &Section,
+                                uint64_t Offset,
+                                uint64_t Value,
+                                uint32_t Type,
+                                int64_t Addend);
+
   void resolveARMRelocation(const SectionEntry &Section,
                             uint64_t Offset,
                             uint32_t Value,
@@ -62,21 +73,11 @@ protected:
                               uint32_t Type,
                               int64_t Addend);
 
-  virtual void resolveRelocation(const SectionEntry &Section,
-                                 uint64_t Offset,
-                                 uint64_t Value,
-                                 uint32_t Type,
-                                 int64_t Addend);
-
-  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
-                                    ObjectImage &Obj,
-                                    ObjSectionToIDMap &ObjSectionToID,
-                                    const SymbolTableMap &Symbols,
-                                    StubMap &Stubs);
-
-  unsigned getCommonSymbolAlignment(const SymbolRef &Sym);
-
-  virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
+  void resolveSystemZRelocation(const SectionEntry &Section,
+                                uint64_t Offset,
+                                uint64_t Value,
+                                uint32_t Type,
+                                int64_t Addend);
 
   uint64_t findPPC64TOC() const;
   void findOPDEntrySection(ObjectImage &Obj,
@@ -84,12 +85,19 @@ protected:
                            RelocationValueRef &Rel);
 
 public:
-  RuntimeDyldELF(RTDyldMemoryManager *mm)
-      : RuntimeDyldImpl(mm) {}
+  RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
 
+  virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value);
+  virtual void processRelocationRef(unsigned SectionID,
+                                    RelocationRef RelI,
+                                    ObjectImage &Obj,
+                                    ObjSectionToIDMap &ObjSectionToID,
+                                    const SymbolTableMap &Symbols,
+                                    StubMap &Stubs);
+  virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+  virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
+  virtual StringRef getEHFrameSection();
   virtual ~RuntimeDyldELF();
-
-  bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
 };
 
 } // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index f100994..383ffab 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -49,7 +49,7 @@ public:
   /// Address - address in the linker's memory where the section resides.
   uint8_t *Address;
 
-  /// Size - section size.
+  /// Size - section size. Doesn't include the stubs.
   size_t Size;
 
   /// LoadAddress - the address of the section in the target process's memory.
@@ -67,9 +67,9 @@ public:
   uintptr_t ObjAddress;
 
   SectionEntry(StringRef name, uint8_t *address, size_t size,
-	       uintptr_t stubOffset, uintptr_t objAddress)
+               uintptr_t objAddress)
     : Name(name), Address(address), Size(size), LoadAddress((uintptr_t)address),
-      StubOffset(stubOffset), ObjAddress(objAddress) {}
+      StubOffset(size), ObjAddress(objAddress) {}
 };
 
 /// RelocationEntry - used to represent relocations internally in the dynamic
@@ -89,20 +89,20 @@ public:
   /// used to make a relocation section relative instead of symbol relative.
   intptr_t Addend;
 
+  /// True if this is a PCRel relocation (MachO specific).
+  bool IsPCRel;
+
+  /// The size of this relocation (MachO specific).
+  unsigned Size;
+
   RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend)
-    : SectionID(id), Offset(offset), RelType(type), Addend(addend) {}
-};
+    : SectionID(id), Offset(offset), RelType(type), Addend(addend),
+      IsPCRel(false), Size(0) {}
 
-/// ObjRelocationInfo - relocation information as read from the object file.
-/// Used to pass around data taken from object::RelocationRef, together with
-/// the section to which the relocation points (represented by a SectionID).
-class ObjRelocationInfo {
-public:
-  unsigned  SectionID;
-  uint64_t  Offset;
-  SymbolRef Symbol;
-  uint64_t  Type;
-  int64_t   AdditionalInfo;
+  RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend,
+                  bool IsPCRel, unsigned Size)
+    : SectionID(id), Offset(offset), RelType(type), Addend(addend),
+      IsPCRel(IsPCRel), Size(Size) {}
 };
 
 class RelocationValueRef {
@@ -166,16 +166,29 @@ protected:
   Triple::ArchType Arch;
 
   inline unsigned getMaxStubSize() {
+    if (Arch == Triple::aarch64)
+      return 20; // movz; movk; movk; movk; br
     if (Arch == Triple::arm || Arch == Triple::thumb)
       return 8; // 32-bit instruction and 32-bit address
     else if (Arch == Triple::mipsel || Arch == Triple::mips)
       return 16;
     else if (Arch == Triple::ppc64)
       return 44;
+    else if (Arch == Triple::x86_64)
+      return 8; // GOT
+    else if (Arch == Triple::systemz)
+      return 16;
     else
       return 0;
   }
 
+  inline unsigned getStubAlignment() {
+    if (Arch == Triple::systemz)
+      return 8;
+    else
+      return 1;
+  }
+
   bool HasError;
   std::string ErrorStr;
 
@@ -194,22 +207,15 @@ protected:
     return (uint8_t*)Sections[SectionID].Address;
   }
 
-  // Subclasses can override this method to get the alignment requirement of
-  // a common symbol. Returns no alignment requirement if not implemented.
-  virtual unsigned getCommonSymbolAlignment(const SymbolRef &Sym) {
-    return 0;
-  }
-
-
   void writeInt16BE(uint8_t *Addr, uint16_t Value) {
-    if (sys::isLittleEndianHost())
+    if (sys::IsLittleEndianHost)
       Value = sys::SwapByteOrder(Value);
     *Addr     = (Value >> 8) & 0xFF;
     *(Addr+1) = Value & 0xFF;
   }
 
   void writeInt32BE(uint8_t *Addr, uint32_t Value) {
-    if (sys::isLittleEndianHost())
+    if (sys::IsLittleEndianHost)
       Value = sys::SwapByteOrder(Value);
     *Addr     = (Value >> 24) & 0xFF;
     *(Addr+1) = (Value >> 16) & 0xFF;
@@ -218,7 +224,7 @@ protected:
   }
 
   void writeInt64BE(uint8_t *Addr, uint64_t Value) {
-    if (sys::isLittleEndianHost())
+    if (sys::IsLittleEndianHost)
       Value = sys::SwapByteOrder(Value);
     *Addr     = (Value >> 56) & 0xFF;
     *(Addr+1) = (Value >> 48) & 0xFF;
@@ -269,24 +275,16 @@ protected:
 
   /// \brief Resolves relocations from Relocs list with address from Value.
   void resolveRelocationList(const RelocationList &Relocs, uint64_t Value);
-  void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value);
 
   /// \brief A object file specific relocation resolver
-  /// \param Section The section where the relocation is being applied
-  /// \param Offset The offset into the section for this relocation
+  /// \param RE The relocation to be resolved
   /// \param Value Target symbol address to apply the relocation action
-  /// \param Type object file specific relocation type
-  /// \param Addend A constant addend used to compute the value to be stored
-  ///        into the relocatable field
-  virtual void resolveRelocation(const SectionEntry &Section,
-                                 uint64_t Offset,
-                                 uint64_t Value,
-                                 uint32_t Type,
-                                 int64_t Addend) = 0;
+  virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value) = 0;
 
   /// \brief Parses the object file relocation and stores it to Relocations
   ///        or SymbolRelocations (this depends on the object file type).
-  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+  virtual void processRelocationRef(unsigned SectionID,
+                                    RelocationRef RelI,
                                     ObjectImage &Obj,
                                     ObjSectionToIDMap &ObjSectionToID,
                                     const SymbolTableMap &Symbols,
@@ -336,6 +334,8 @@ public:
   StringRef getErrorString() { return ErrorStr; }
 
   virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const = 0;
+
+  virtual StringRef getEHFrameSection();
 };
 
 } // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index bcc3df1..01a3fd9 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -21,16 +21,87 @@ using namespace llvm::object;
 
 namespace llvm {
 
+static unsigned char *processFDE(unsigned char *P, intptr_t DeltaForText, intptr_t DeltaForEH) {
+  uint32_t Length = *((uint32_t*)P);
+  P += 4;
+  unsigned char *Ret = P + Length;
+  uint32_t Offset = *((uint32_t*)P);
+  if (Offset == 0) // is a CIE
+    return Ret;
+
+  P += 4;
+  intptr_t FDELocation = *((intptr_t*)P);
+  intptr_t NewLocation = FDELocation - DeltaForText;
+  *((intptr_t*)P) = NewLocation;
+  P += sizeof(intptr_t);
+
+  // Skip the FDE address range
+  P += sizeof(intptr_t);
+
+  uint8_t Augmentationsize = *P;
+  P += 1;
+  if (Augmentationsize != 0) {
+    intptr_t LSDA = *((intptr_t*)P);
+    intptr_t NewLSDA = LSDA - DeltaForEH;
+    *((intptr_t*)P) = NewLSDA;
+  }
+
+  return Ret;
+}
+
+static intptr_t computeDelta(SectionEntry *A, SectionEntry *B) {
+  intptr_t ObjDistance = A->ObjAddress  - B->ObjAddress;
+  intptr_t MemDistance = A->LoadAddress - B->LoadAddress;
+  return ObjDistance - MemDistance;
+}
+
+StringRef RuntimeDyldMachO::getEHFrameSection() {
+  SectionEntry *Text = NULL;
+  SectionEntry *EHFrame = NULL;
+  SectionEntry *ExceptTab = NULL;
+  for (int i = 0, e = Sections.size(); i != e; ++i) {
+    if (Sections[i].Name == "__eh_frame")
+      EHFrame = &Sections[i];
+    else if (Sections[i].Name == "__text")
+      Text = &Sections[i];
+    else if (Sections[i].Name == "__gcc_except_tab")
+      ExceptTab = &Sections[i];
+  }
+  if (Text == NULL || EHFrame == NULL)
+    return StringRef();
+
+  intptr_t DeltaForText = computeDelta(Text, EHFrame);
+  intptr_t DeltaForEH = 0;
+  if (ExceptTab)
+    DeltaForEH = computeDelta(ExceptTab, EHFrame);
+
+  unsigned char *P = EHFrame->Address;
+  unsigned char *End = P + EHFrame->Size;
+  do  {
+    P = processFDE(P, DeltaForText, DeltaForEH);
+  } while(P != End);
+
+  return StringRef((char*)EHFrame->Address, EHFrame->Size);
+}
+
+void RuntimeDyldMachO::resolveRelocation(const RelocationEntry &RE,
+                                         uint64_t Value) {
+  const SectionEntry &Section = Sections[RE.SectionID];
+  return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend,
+                           RE.IsPCRel, RE.Size);
+}
+
 void RuntimeDyldMachO::resolveRelocation(const SectionEntry &Section,
                                          uint64_t Offset,
                                          uint64_t Value,
                                          uint32_t Type,
-                                         int64_t Addend) {
+                                         int64_t Addend,
+                                         bool isPCRel,
+                                         unsigned LogSize) {
   uint8_t *LocalAddress = Section.Address + Offset;
   uint64_t FinalAddress = Section.LoadAddress + Offset;
-  bool isPCRel = (Type >> 24) & 1;
-  unsigned MachoType = (Type >> 28) & 0xf;
-  unsigned Size = 1 << ((Type >> 25) & 3);
+  unsigned MachoType = Type;
+  unsigned Size = 1 << LogSize;
 
   DEBUG(dbgs() << "resolveRelocation LocalAddress: " 
         << format("%p", LocalAddress)
@@ -205,89 +276,111 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
   return false;
 }
 
-void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel,
+void RuntimeDyldMachO::processRelocationRef(unsigned SectionID,
+                                            RelocationRef RelI,
                                             ObjectImage &Obj,
                                             ObjSectionToIDMap &ObjSectionToID,
                                             const SymbolTableMap &Symbols,
                                             StubMap &Stubs) {
+  const ObjectFile *OF = Obj.getObjectFile();
+  const MachOObjectFile *MachO = static_cast<const MachOObjectFile*>(OF);
+  macho::RelocationEntry RE = MachO->getRelocation(RelI.getRawDataRefImpl());
 
-  uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL);
+  uint32_t RelType = MachO->getAnyRelocationType(RE);
   RelocationValueRef Value;
-  SectionEntry &Section = Sections[Rel.SectionID];
+  SectionEntry &Section = Sections[SectionID];
+
+  bool isExtern = MachO->getPlainRelocationExternal(RE);
+  bool IsPCRel = MachO->getAnyRelocationPCRel(RE);
+  unsigned Size = MachO->getAnyRelocationLength(RE);
+  uint64_t Offset;
+  RelI.getOffset(Offset);
+  uint8_t *LocalAddress = Section.Address + Offset;
+  unsigned NumBytes = 1 << Size;
+  uint64_t Addend = 0;
+  memcpy(&Addend, LocalAddress, NumBytes);
 
-  bool isExtern = (RelType >> 27) & 1;
   if (isExtern) {
     // Obtain the symbol name which is referenced in the relocation
+    SymbolRef Symbol;
+    RelI.getSymbol(Symbol);
     StringRef TargetName;
-    const SymbolRef &Symbol = Rel.Symbol;
     Symbol.getName(TargetName);
     // First search for the symbol in the local symbol table
     SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data());
     if (lsi != Symbols.end()) {
       Value.SectionID = lsi->second.first;
-      Value.Addend = lsi->second.second;
+      Value.Addend = lsi->second.second + Addend;
     } else {
       // Search for the symbol in the global symbol table
       SymbolTableMap::const_iterator gsi = GlobalSymbolTable.find(TargetName.data());
       if (gsi != GlobalSymbolTable.end()) {
         Value.SectionID = gsi->second.first;
-        Value.Addend = gsi->second.second;
-      } else
+        Value.Addend = gsi->second.second + Addend;
+      } else {
         Value.SymbolName = TargetName.data();
+        Value.Addend = Addend;
+      }
     }
   } else {
-    error_code err;
-    uint8_t sectionIndex = static_cast<uint8_t>(RelType & 0xFF);
-    section_iterator si = Obj.begin_sections(),
-                     se = Obj.end_sections();
-    for (uint8_t i = 1; i < sectionIndex; i++) {
-      error_code err;
-      si.increment(err);
-      if (si == se)
-        break;
-    }
-    assert(si != se && "No section containing relocation!");
-    Value.SectionID = findOrEmitSection(Obj, *si, true, ObjSectionToID);
-    Value.Addend = 0;
-    // FIXME: The size and type of the relocation determines if we can
-    // encode an Addend in the target location itself, and if so, how many
-    // bytes we should read in order to get it. We don't yet support doing
-    // that, and just assuming it's sizeof(intptr_t) is blatantly wrong.
-    //Value.Addend = *(const intptr_t *)Target;
-    if (Value.Addend) {
-      // The MachO addend is an offset from the current section.  We need it
-      // to be an offset from the destination section
-      Value.Addend += Section.ObjAddress - Sections[Value.SectionID].ObjAddress;
-    }
+    SectionRef Sec = MachO->getRelocationSection(RE);
+    Value.SectionID = findOrEmitSection(Obj, Sec, true, ObjSectionToID);
+    uint64_t Addr;
+    Sec.getAddress(Addr);
+    Value.Addend = Addend - Addr;
   }
 
-  if (Arch == Triple::arm && (RelType & 0xf) == macho::RIT_ARM_Branch24Bit) {
+  if (Arch == Triple::x86_64 && RelType == macho::RIT_X86_64_GOT) {
+    assert(IsPCRel);
+    assert(Size == 2);
+    StubMap::const_iterator i = Stubs.find(Value);
+    uint8_t *Addr;
+    if (i != Stubs.end()) {
+      Addr = Section.Address + i->second;
+    } else {
+      Stubs[Value] = Section.StubOffset;
+      uint8_t *GOTEntry = Section.Address + Section.StubOffset;
+      RelocationEntry RE(SectionID, Section.StubOffset,
+                         macho::RIT_X86_64_Unsigned, Value.Addend - 4, false,
+                         3);
+      if (Value.SymbolName)
+        addRelocationForSymbol(RE, Value.SymbolName);
+      else
+        addRelocationForSection(RE, Value.SectionID);
+      Section.StubOffset += 8;
+      Addr = GOTEntry;
+    }
+    resolveRelocation(Section, Offset, (uint64_t)Addr,
+                      macho::RIT_X86_64_Unsigned, 4, true, 2);
+  } else if (Arch == Triple::arm &&
+             (RelType & 0xf) == macho::RIT_ARM_Branch24Bit) {
     // This is an ARM branch relocation, need to use a stub function.
 
     //  Look up for existing stub.
     StubMap::const_iterator i = Stubs.find(Value);
     if (i != Stubs.end())
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + i->second,
-                        RelType, 0);
+                        RelType, 0, IsPCRel, Size);
     else {
       // Create a new stub function.
       Stubs[Value] = Section.StubOffset;
       uint8_t *StubTargetAddr = createStubFunction(Section.Address +
                                                    Section.StubOffset);
-      RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+      RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
                          macho::RIT_Vanilla, Value.Addend);
       if (Value.SymbolName)
         addRelocationForSymbol(RE, Value.SymbolName);
       else
         addRelocationForSection(RE, Value.SectionID);
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + Section.StubOffset,
-                        RelType, 0);
+                        RelType, 0, IsPCRel, Size);
       Section.StubOffset += getMaxStubSize();
     }
   } else {
-    RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+    RelocationEntry RE(SectionID, Offset, RelType, Value.Addend,
+                       IsPCRel, Size);
     if (Value.SymbolName)
       addRelocationForSymbol(RE, Value.SymbolName);
     else
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 62d8487..df8d3bb 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -16,7 +16,7 @@
 
 #include "RuntimeDyldImpl.h"
 #include "llvm/ADT/IndexedMap.h"
-#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/Support/Format.h"
 
 using namespace llvm;
@@ -25,7 +25,6 @@ using namespace llvm::object;
 
 namespace llvm {
 class RuntimeDyldMachO : public RuntimeDyldImpl {
-protected:
   bool resolveI386Relocation(uint8_t *LocalAddress,
                              uint64_t FinalAddress,
                              uint64_t Value,
@@ -48,22 +47,25 @@ protected:
                             unsigned Size,
                             int64_t Addend);
 
-  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+  void resolveRelocation(const SectionEntry &Section,
+                         uint64_t Offset,
+                         uint64_t Value,
+                         uint32_t Type,
+                         int64_t Addend,
+                         bool isPCRel,
+                         unsigned Size);
+public:
+  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+
+  virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value);
+  virtual void processRelocationRef(unsigned SectionID,
+                                    RelocationRef RelI,
                                     ObjectImage &Obj,
                                     ObjSectionToIDMap &ObjSectionToID,
                                     const SymbolTableMap &Symbols,
                                     StubMap &Stubs);
-
-public:
-  virtual void resolveRelocation(const SectionEntry &Section,
-                                 uint64_t Offset,
-                                 uint64_t Value,
-                                 uint32_t Type,
-                                 int64_t Addend);
-
-  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
-
-  bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+  virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+  virtual StringRef getEHFrameSection();
 };
 
 } // end namespace llvm
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index fb591a8..7761127d 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -1605,6 +1605,29 @@ void AssemblyWriter::printFunction(const Function *F) {
   if (F->isMaterializable())
     Out << "; Materializable\n";
 
+  const AttributeSet &Attrs = F->getAttributes();
+  if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) {
+    AttributeSet AS = Attrs.getFnAttributes();
+    std::string AttrStr;
+
+    unsigned Idx = 0;
+    for (unsigned E = AS.getNumSlots(); Idx != E; ++Idx)
+      if (AS.getSlotIndex(Idx) == AttributeSet::FunctionIndex)
+        break;
+
+    for (AttributeSet::iterator I = AS.begin(Idx), E = AS.end(Idx);
+         I != E; ++I) {
+      Attribute Attr = *I;
+      if (!Attr.isStringAttribute()) {
+        if (!AttrStr.empty()) AttrStr += ' ';
+        AttrStr += Attr.getAsString();
+      }
+    }
+
+    if (!AttrStr.empty())
+      Out << "; Function Attrs: " << AttrStr << '\n';
+  }
+
   if (F->isDeclaration())
     Out << "declare ";
   else
@@ -1620,7 +1643,6 @@ void AssemblyWriter::printFunction(const Function *F) {
   }
 
   FunctionType *FT = F->getFunctionType();
-  const AttributeSet &Attrs = F->getAttributes();
   if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
     Out <<  Attrs.getAsString(AttributeSet::ReturnIndex) << ' ';
   TypePrinter.print(F->getReturnType(), Out);
@@ -1761,10 +1783,8 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
 /// which slot it occupies.
 ///
 void AssemblyWriter::printInfoComment(const Value &V) {
-  if (AnnotationWriter) {
+  if (AnnotationWriter)
     AnnotationWriter->printInfoComment(V, Out);
-    return;
-  }
 }
 
 // This member is called for each Instruction in a function..
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index ad2670d..0b6228b 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -228,7 +228,7 @@ public:
   /// is the index of the return, parameter, or function object that the
   /// attributes are applied to, not the index into the AttrNodes list where the
   /// attributes reside.
-  uint64_t getSlotIndex(unsigned Slot) const {
+  unsigned getSlotIndex(unsigned Slot) const {
     return AttrNodes[Slot].first;
   }
 
@@ -248,15 +248,15 @@ public:
   typedef AttributeSetNode::iterator       iterator;
   typedef AttributeSetNode::const_iterator const_iterator;
 
-  iterator begin(unsigned Idx)
-    { return AttrNodes[Idx].second->begin(); }
-  iterator end(unsigned Idx)
-    { return AttrNodes[Idx].second->end(); }
+  iterator begin(unsigned Slot)
+    { return AttrNodes[Slot].second->begin(); }
+  iterator end(unsigned Slot)
+    { return AttrNodes[Slot].second->end(); }
 
-  const_iterator begin(unsigned Idx) const
-    { return AttrNodes[Idx].second->begin(); }
-  const_iterator end(unsigned Idx) const
-    { return AttrNodes[Idx].second->end(); }
+  const_iterator begin(unsigned Slot) const
+    { return AttrNodes[Slot].second->begin(); }
+  const_iterator end(unsigned Slot) const
+    { return AttrNodes[Slot].second->end(); }
 
   void Profile(FoldingSetNodeID &ID) const {
     Profile(ID, AttrNodes);
@@ -270,7 +270,7 @@ public:
   }
 
   // FIXME: This atrocity is temporary.
-  uint64_t Raw(uint64_t Index) const;
+  uint64_t Raw(unsigned Index) const;
 };
 
 } // end llvm namespace
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index 2d82891..4fe6f9d 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -195,6 +195,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "readnone";
   if (hasAttribute(Attribute::ReadOnly))
     return "readonly";
+  if (hasAttribute(Attribute::Returned))
+    return "returned";
   if (hasAttribute(Attribute::ReturnsTwice))
     return "returns_twice";
   if (hasAttribute(Attribute::SExt))
@@ -393,6 +395,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
   case Attribute::SanitizeThread:  return 1ULL << 36;
   case Attribute::SanitizeMemory:  return 1ULL << 37;
   case Attribute::NoBuiltin:       return 1ULL << 38;
+  case Attribute::Returned:        return 1ULL << 39;
   }
   llvm_unreachable("Unsupported attribute type");
 }
@@ -481,11 +484,12 @@ unsigned AttributeSetNode::getStackAlignment() const {
 }
 
 std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
-  std::string Str = "";
+  std::string Str;
   for (SmallVectorImpl<Attribute>::const_iterator I = AttrList.begin(),
-         E = AttrList.end(); I != E; ) {
+         E = AttrList.end(); I != E; ++I) {
+    if (I != AttrList.begin())
+      Str += ' ';
     Str += I->getAsString(InAttrGrp);
-    if (++I != E) Str += " ";
   }
   return Str;
 }
@@ -494,7 +498,7 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
 // AttributeSetImpl Definition
 //===----------------------------------------------------------------------===//
 
-uint64_t AttributeSetImpl::Raw(uint64_t Index) const {
+uint64_t AttributeSetImpl::Raw(unsigned Index) const {
   for (unsigned I = 0, E = getNumAttributes(); I != E; ++I) {
     if (getSlotIndex(I) != Index) continue;
     const AttributeSetNode *ASN = AttrNodes[I].second;
@@ -592,7 +596,7 @@ AttributeSet AttributeSet::get(LLVMContext &C,
   return getImpl(C, Attrs);
 }
 
-AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) {
+AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, AttrBuilder &B) {
   if (!B.hasAttributes())
     return AttributeSet();
 
@@ -604,29 +608,29 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx, AttrBuilder &B) {
       continue;
 
     if (Kind == Attribute::Alignment)
-      Attrs.push_back(std::make_pair(Idx, Attribute::
+      Attrs.push_back(std::make_pair(Index, Attribute::
                                      getWithAlignment(C, B.getAlignment())));
     else if (Kind == Attribute::StackAlignment)
-      Attrs.push_back(std::make_pair(Idx, Attribute::
+      Attrs.push_back(std::make_pair(Index, Attribute::
                               getWithStackAlignment(C, B.getStackAlignment())));
     else
-      Attrs.push_back(std::make_pair(Idx, Attribute::get(C, Kind)));
+      Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind)));
   }
 
   // Add target-dependent (string) attributes.
   for (AttrBuilder::td_iterator I = B.td_begin(), E = B.td_end();
        I != E; ++I)
-    Attrs.push_back(std::make_pair(Idx, Attribute::get(C, I->first,I->second)));
+    Attrs.push_back(std::make_pair(Index, Attribute::get(C, I->first,I->second)));
 
   return get(C, Attrs);
 }
 
-AttributeSet AttributeSet::get(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
                                ArrayRef<Attribute::AttrKind> Kind) {
   SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
   for (ArrayRef<Attribute::AttrKind>::iterator I = Kind.begin(),
          E = Kind.end(); I != E; ++I)
-    Attrs.push_back(std::make_pair(Idx, Attribute::get(C, *I)));
+    Attrs.push_back(std::make_pair(Index, Attribute::get(C, *I)));
   return get(C, Attrs);
 }
 
@@ -643,20 +647,20 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<AttributeSet> Attrs) {
   return getImpl(C, AttrNodeVec);
 }
 
-AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index,
                                         Attribute::AttrKind Attr) const {
-  if (hasAttribute(Idx, Attr)) return *this;
-  return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
+  if (hasAttribute(Index, Attr)) return *this;
+  return addAttributes(C, Index, AttributeSet::get(C, Index, Attr));
 }
 
-AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index,
                                         StringRef Kind) const {
   llvm::AttrBuilder B;
   B.addAttribute(Kind);
-  return addAttributes(C, Idx, AttributeSet::get(C, Idx, B));
+  return addAttributes(C, Index, AttributeSet::get(C, Index, B));
 }
 
-AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Index,
                                          AttributeSet Attrs) const {
   if (!pImpl) return Attrs;
   if (!Attrs.pImpl) return *this;
@@ -664,8 +668,8 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
 #ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment. For now, say
   // we can't change a known alignment.
-  unsigned OldAlign = getParamAlignment(Idx);
-  unsigned NewAlign = Attrs.getParamAlignment(Idx);
+  unsigned OldAlign = getParamAlignment(Index);
+  unsigned NewAlign = Attrs.getParamAlignment(Index);
   assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
          "Attempt to change alignment!");
 #endif
@@ -676,8 +680,8 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
   AttributeSet AS;
   uint64_t LastIndex = 0;
   for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
-    if (getSlotIndex(I) >= Idx) {
-      if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++);
+    if (getSlotIndex(I) >= Index) {
+      if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++);
       break;
     }
     LastIndex = I + 1;
@@ -686,17 +690,17 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
 
   // Now add the attribute into the correct slot. There may already be an
   // AttributeSet there.
-  AttrBuilder B(AS, Idx);
+  AttrBuilder B(AS, Index);
 
   for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I)
-    if (Attrs.getSlotIndex(I) == Idx) {
+    if (Attrs.getSlotIndex(I) == Index) {
       for (AttributeSetImpl::const_iterator II = Attrs.pImpl->begin(I),
              IE = Attrs.pImpl->end(I); II != IE; ++II)
         B.addAttribute(*II);
       break;
     }
 
-  AttrSet.push_back(AttributeSet::get(C, Idx, B));
+  AttrSet.push_back(AttributeSet::get(C, Index, B));
 
   // Add the remaining attribute slots.
   for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
@@ -705,13 +709,13 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx,
   return get(C, AttrSet);
 }
 
-AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Index,
                                            Attribute::AttrKind Attr) const {
-  if (!hasAttribute(Idx, Attr)) return *this;
-  return removeAttributes(C, Idx, AttributeSet::get(C, Idx, Attr));
+  if (!hasAttribute(Index, Attr)) return *this;
+  return removeAttributes(C, Index, AttributeSet::get(C, Index, Attr));
 }
 
-AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
                                             AttributeSet Attrs) const {
   if (!pImpl) return AttributeSet();
   if (!Attrs.pImpl) return *this;
@@ -719,7 +723,7 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
 #ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment.
   // For now, say we can't pass in alignment, which no current use does.
-  assert(!Attrs.hasAttribute(Idx, Attribute::Alignment) &&
+  assert(!Attrs.hasAttribute(Index, Attribute::Alignment) &&
          "Attempt to change alignment!");
 #endif
 
@@ -729,8 +733,8 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
   AttributeSet AS;
   uint64_t LastIndex = 0;
   for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
-    if (getSlotIndex(I) >= Idx) {
-      if (getSlotIndex(I) == Idx) AS = getSlotAttributes(LastIndex++);
+    if (getSlotIndex(I) >= Index) {
+      if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++);
       break;
     }
     LastIndex = I + 1;
@@ -739,15 +743,15 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Idx,
 
   // Now remove the attribute from the correct slot. There may already be an
   // AttributeSet there.
-  AttrBuilder B(AS, Idx);
+  AttrBuilder B(AS, Index);
 
   for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I)
-    if (Attrs.getSlotIndex(I) == Idx) {
-      B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Idx);
+    if (Attrs.getSlotIndex(I) == Index) {
+      B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Index);
       break;
     }
 
-  AttrSet.push_back(AttributeSet::get(C, Idx, B));
+  AttrSet.push_back(AttributeSet::get(C, Index, B));
 
   // Add the remaining attribute slots.
   for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
@@ -764,11 +768,11 @@ LLVMContext &AttributeSet::getContext() const {
   return pImpl->getContext();
 }
 
-AttributeSet AttributeSet::getParamAttributes(unsigned Idx) const {
-  return pImpl && hasAttributes(Idx) ?
+AttributeSet AttributeSet::getParamAttributes(unsigned Index) const {
+  return pImpl && hasAttributes(Index) ?
     AttributeSet::get(pImpl->getContext(),
                       ArrayRef<std::pair<unsigned, AttributeSetNode*> >(
-                        std::make_pair(Idx, getAttributes(Idx)))) :
+                        std::make_pair(Index, getAttributes(Index)))) :
     AttributeSet();
 }
 
@@ -848,27 +852,27 @@ std::string AttributeSet::getAsString(unsigned Index,
 }
 
 /// \brief The attributes for the specified index are returned.
-AttributeSetNode *AttributeSet::getAttributes(unsigned Idx) const {
+AttributeSetNode *AttributeSet::getAttributes(unsigned Index) const {
   if (!pImpl) return 0;
 
   // Loop through to find the attribute node we want.
   for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I)
-    if (pImpl->getSlotIndex(I) == Idx)
+    if (pImpl->getSlotIndex(I) == Index)
       return pImpl->getSlotNode(I);
 
   return 0;
 }
 
-AttributeSet::iterator AttributeSet::begin(unsigned Idx) const {
+AttributeSet::iterator AttributeSet::begin(unsigned Slot) const {
   if (!pImpl)
     return ArrayRef<Attribute>().begin();
-  return pImpl->begin(Idx);
+  return pImpl->begin(Slot);
 }
 
-AttributeSet::iterator AttributeSet::end(unsigned Idx) const {
+AttributeSet::iterator AttributeSet::end(unsigned Slot) const {
   if (!pImpl)
     return ArrayRef<Attribute>().end();
-  return pImpl->end(Idx);
+  return pImpl->end(Slot);
 }
 
 //===----------------------------------------------------------------------===//
@@ -882,7 +886,7 @@ unsigned AttributeSet::getNumSlots() const {
   return pImpl ? pImpl->getNumAttributes() : 0;
 }
 
-uint64_t AttributeSet::getSlotIndex(unsigned Slot) const {
+unsigned AttributeSet::getSlotIndex(unsigned Slot) const {
   assert(pImpl && Slot < pImpl->getNumAttributes() &&
          "Slot # out of range!");
   return pImpl->getSlotIndex(Slot);
@@ -919,13 +923,13 @@ void AttributeSet::dump() const {
 // AttrBuilder Method Implementations
 //===----------------------------------------------------------------------===//
 
-AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Idx)
+AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
   : Attrs(0), Alignment(0), StackAlignment(0) {
   AttributeSetImpl *pImpl = AS.pImpl;
   if (!pImpl) return;
 
   for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) {
-    if (pImpl->getSlotIndex(I) != Idx) continue;
+    if (pImpl->getSlotIndex(I) != Index) continue;
 
     for (AttributeSetImpl::const_iterator II = pImpl->begin(I),
            IE = pImpl->end(I); II != IE; ++II)
@@ -982,16 +986,16 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
 }
 
 AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
-  unsigned Idx = ~0U;
+  unsigned Slot = ~0U;
   for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
     if (A.getSlotIndex(I) == Index) {
-      Idx = I;
+      Slot = I;
       break;
     }
 
-  assert(Idx != ~0U && "Couldn't find index in AttributeSet!");
+  assert(Slot != ~0U && "Couldn't find index in AttributeSet!");
 
-  for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx); I != E; ++I) {
+  for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) {
     Attribute Attr = *I;
     if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
       Attribute::AttrKind Kind = I->getKindAsEnum();
@@ -1069,16 +1073,16 @@ bool AttrBuilder::hasAttributes() const {
 }
 
 bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const {
-  unsigned Idx = ~0U;
+  unsigned Slot = ~0U;
   for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
     if (A.getSlotIndex(I) == Index) {
-      Idx = I;
+      Slot = I;
       break;
     }
 
-  assert(Idx != ~0U && "Couldn't find the index!");
+  assert(Slot != ~0U && "Couldn't find the index!");
 
-  for (AttributeSet::iterator I = A.begin(Idx), E = A.end(Idx);
+  for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot);
        I != E; ++I) {
     Attribute Attr = *I;
     if (Attr.isEnumAttribute() || Attr.isAlignAttribute()) {
@@ -1109,33 +1113,6 @@ bool AttrBuilder::operator==(const AttrBuilder &B) {
   return Alignment == B.Alignment && StackAlignment == B.StackAlignment;
 }
 
-void AttrBuilder::removeFunctionOnlyAttrs() {
-  removeAttribute(Attribute::NoReturn)
-    .removeAttribute(Attribute::NoUnwind)
-    .removeAttribute(Attribute::ReadNone)
-    .removeAttribute(Attribute::ReadOnly)
-    .removeAttribute(Attribute::NoInline)
-    .removeAttribute(Attribute::AlwaysInline)
-    .removeAttribute(Attribute::OptimizeForSize)
-    .removeAttribute(Attribute::StackProtect)
-    .removeAttribute(Attribute::StackProtectReq)
-    .removeAttribute(Attribute::StackProtectStrong)
-    .removeAttribute(Attribute::NoRedZone)
-    .removeAttribute(Attribute::NoImplicitFloat)
-    .removeAttribute(Attribute::Naked)
-    .removeAttribute(Attribute::InlineHint)
-    .removeAttribute(Attribute::StackAlignment)
-    .removeAttribute(Attribute::UWTable)
-    .removeAttribute(Attribute::NonLazyBind)
-    .removeAttribute(Attribute::ReturnsTwice)
-    .removeAttribute(Attribute::SanitizeAddress)
-    .removeAttribute(Attribute::SanitizeThread)
-    .removeAttribute(Attribute::SanitizeMemory)
-    .removeAttribute(Attribute::MinSize)
-    .removeAttribute(Attribute::NoDuplicate)
-    .removeAttribute(Attribute::NoBuiltin);
-}
-
 AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
   // FIXME: Remove this in 4.0.
   if (!Val) return *this;
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 1abb656..2c6971c 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -237,18 +237,21 @@ void Constant::destroyConstantImpl() {
   delete this;
 }
 
-/// canTrap - Return true if evaluation of this constant could trap.  This is
-/// true for things like constant expressions that could divide by zero.
-bool Constant::canTrap() const {
-  assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
+static bool canTrapImpl(const Constant *C,
+                        SmallPtrSet<const ConstantExpr *, 4> &NonTrappingOps) {
+  assert(C->getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
   // The only thing that could possibly trap are constant exprs.
-  const ConstantExpr *CE = dyn_cast<ConstantExpr>(this);
-  if (!CE) return false;
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+  if (!CE)
+    return false;
 
   // ConstantExpr traps if any operands can trap.
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (CE->getOperand(i)->canTrap())
-      return true;
+  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+    if (ConstantExpr *Op = dyn_cast<ConstantExpr>(CE->getOperand(i))) {
+      if (NonTrappingOps.insert(Op) && canTrapImpl(Op, NonTrappingOps))
+        return true;
+    }
+  }
 
   // Otherwise, only specific operations can trap.
   switch (CE->getOpcode()) {
@@ -267,6 +270,13 @@ bool Constant::canTrap() const {
   }
 }
 
+/// canTrap - Return true if evaluation of this constant could trap.  This is
+/// true for things like constant expressions that could divide by zero.
+bool Constant::canTrap() const {
+  SmallPtrSet<const ConstantExpr *, 4> NonTrappingOps;
+  return canTrapImpl(this, NonTrappingOps);
+}
+
 /// isThreadDependent - Return true if the value can vary between threads.
 bool Constant::isThreadDependent() const {
   SmallPtrSet<const Constant*, 64> Visited;
diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h
index e995858..32bed95 100644
--- a/lib/IR/ConstantsContext.h
+++ b/lib/IR/ConstantsContext.h
@@ -318,7 +318,7 @@ struct ExprMapKeyType {
       ArrayRef<Constant*> ops,
       unsigned short flags = 0,
       unsigned short optionalflags = 0,
-      ArrayRef<unsigned> inds = ArrayRef<unsigned>())
+      ArrayRef<unsigned> inds = None)
         : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags),
         operands(ops.begin(), ops.end()), indices(inds.begin(), inds.end()) {}
   uint8_t opcode;
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 983b49c..889d574 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -21,7 +21,9 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
@@ -1301,6 +1303,53 @@ void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) {
   unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0);
 }
 
+LLVMThreadLocalMode LLVMGetThreadLocalMode(LLVMValueRef GlobalVar) {
+  switch (unwrap<GlobalVariable>(GlobalVar)->getThreadLocalMode()) {
+  case GlobalVariable::NotThreadLocal:
+    return LLVMNotThreadLocal;
+  case GlobalVariable::GeneralDynamicTLSModel:
+    return LLVMGeneralDynamicTLSModel;
+  case GlobalVariable::LocalDynamicTLSModel:
+    return LLVMLocalDynamicTLSModel;
+  case GlobalVariable::InitialExecTLSModel:
+    return LLVMInitialExecTLSModel;
+  case GlobalVariable::LocalExecTLSModel:
+    return LLVMLocalExecTLSModel;
+  }
+
+  llvm_unreachable("Invalid GlobalVariable thread local mode");
+}
+
+void LLVMSetThreadLocalMode(LLVMValueRef GlobalVar, LLVMThreadLocalMode Mode) {
+  GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+
+  switch (Mode) {
+  case LLVMNotThreadLocal:
+    GV->setThreadLocalMode(GlobalVariable::NotThreadLocal);
+    break;
+  case LLVMGeneralDynamicTLSModel:
+    GV->setThreadLocalMode(GlobalVariable::GeneralDynamicTLSModel);
+    break;
+  case LLVMLocalDynamicTLSModel:
+    GV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel);
+    break;
+  case LLVMInitialExecTLSModel:
+    GV->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
+    break;
+  case LLVMLocalExecTLSModel:
+    GV->setThreadLocalMode(GlobalVariable::LocalExecTLSModel);
+    break;
+  }
+}
+
+LLVMBool LLVMIsExternallyInitialized(LLVMValueRef GlobalVar) {
+  return unwrap<GlobalVariable>(GlobalVar)->isExternallyInitialized();
+}
+
+void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) {
+  unwrap<GlobalVariable>(GlobalVar)->setExternallyInitialized(IsExtInit);
+}
+
 /*--.. Operations on aliases ......................................--*/
 
 LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
@@ -1396,6 +1445,18 @@ void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Func->setAttributes(PALnew);
 }
 
+void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
+                                        const char *V) {
+  Function *Func = unwrap<Function>(Fn);
+  AttributeSet::AttrIndex Idx =
+    AttributeSet::AttrIndex(AttributeSet::FunctionIndex);
+  AttrBuilder B;
+
+  B.addAttribute(A, V);
+  AttributeSet Set = AttributeSet::get(Func->getContext(), Idx, B);
+  Func->addAttributes(Idx, Set);
+}
+
 void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Function *Func = unwrap<Function>(Fn);
   const AttributeSet PAL = Func->getAttributes();
@@ -2331,6 +2392,42 @@ LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
   return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name));
 }
 
+LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, 
+                               LLVMValueRef PTR, LLVMValueRef Val, 
+                               LLVMAtomicOrdering ordering, 
+                               LLVMBool singleThread) {
+  AtomicRMWInst::BinOp intop;
+  switch (op) {
+    case LLVMAtomicRMWBinOpXchg: intop = AtomicRMWInst::Xchg; break;
+    case LLVMAtomicRMWBinOpAdd: intop = AtomicRMWInst::Add; break;
+    case LLVMAtomicRMWBinOpSub: intop = AtomicRMWInst::Sub; break;
+    case LLVMAtomicRMWBinOpAnd: intop = AtomicRMWInst::And; break;
+    case LLVMAtomicRMWBinOpNand: intop = AtomicRMWInst::Nand; break;
+    case LLVMAtomicRMWBinOpOr: intop = AtomicRMWInst::Or; break;
+    case LLVMAtomicRMWBinOpXor: intop = AtomicRMWInst::Xor; break;
+    case LLVMAtomicRMWBinOpMax: intop = AtomicRMWInst::Max; break;
+    case LLVMAtomicRMWBinOpMin: intop = AtomicRMWInst::Min; break;
+    case LLVMAtomicRMWBinOpUMax: intop = AtomicRMWInst::UMax; break;
+    case LLVMAtomicRMWBinOpUMin: intop = AtomicRMWInst::UMin; break;
+  }
+  AtomicOrdering intordering;
+  switch (ordering) {
+    case LLVMAtomicOrderingNotAtomic: intordering = NotAtomic; break;
+    case LLVMAtomicOrderingUnordered: intordering = Unordered; break;
+    case LLVMAtomicOrderingMonotonic: intordering = Monotonic; break;
+    case LLVMAtomicOrderingAcquire: intordering = Acquire; break;
+    case LLVMAtomicOrderingRelease: intordering = Release; break;
+    case LLVMAtomicOrderingAcquireRelease: 
+      intordering = AcquireRelease; 
+      break;
+    case LLVMAtomicOrderingSequentiallyConsistent: 
+      intordering = SequentiallyConsistent; 
+      break;
+  }
+  return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val), 
+    intordering, singleThread ? SingleThread : CrossThread));
+}
+
 
 /*===-- Module providers --------------------------------------------------===*/
 
@@ -2397,6 +2494,13 @@ LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy(
       StringRef(BufferName)));
 }
 
+const char *LLVMGetBufferStart(LLVMMemoryBufferRef MemBuf) {
+  return unwrap(MemBuf)->getBufferStart();
+}
+
+size_t LLVMGetBufferSize(LLVMMemoryBufferRef MemBuf) {
+  return unwrap(MemBuf)->getBufferSize();
+}
 
 void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
   delete unwrap(MemBuf);
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 9d6e840..0980e80 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -61,6 +61,9 @@ void DIBuilder::finalize() {
 
   DIArray GVs = getOrCreateArray(AllGVs);
   DIType(TempGVs).replaceAllUsesWith(GVs);
+
+  DIArray IMs = getOrCreateArray(AllImportedModules);
+  DIType(TempImportedModules).replaceAllUsesWith(IMs);
 }
 
 /// getNonCompileUnitScope - If N is compile unit return NULL otherwise return
@@ -101,6 +104,8 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
 
   TempGVs = MDNode::getTemporary(VMContext, TElts);
 
+  TempImportedModules = MDNode::getTemporary(VMContext, TElts);
+
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
     createFilePathPair(VMContext, Filename, Directory),
@@ -113,6 +118,7 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
     TempRetainTypes,
     TempSubprograms,
     TempGVs,
+    TempImportedModules,
     MDString::get(VMContext, SplitName)
   };
   TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
@@ -122,6 +128,21 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
   NMD->addOperand(TheCU);
 }
 
+DIImportedModule DIBuilder::createImportedModule(DIScope Context,
+                                                 DINameSpace NS,
+                                                 unsigned Line) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_imported_module),
+    Context,
+    NS,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+  };
+  DIImportedModule M(MDNode::get(VMContext, Elts));
+  assert(M.Verify() && "Imported module should be valid");
+  AllImportedModules.push_back(M);
+  return M;
+}
+
 /// createFile - Create a file descriptor to hold debugging information
 /// for a file.
 DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
@@ -225,7 +246,8 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
   return DIDerivedType(MDNode::get(VMContext, Elts));
 }
 
-DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base) {
+DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy,
+                                                 DIType Base) {
   // Pointer types are encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type),
@@ -427,7 +449,7 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
 DIObjCProperty DIBuilder::createObjCProperty(StringRef Name,
                                              DIFile File, unsigned LineNumber,
                                              StringRef GetterName,
-                                             StringRef SetterName, 
+                                             StringRef SetterName,
                                              unsigned PropertyAttributes,
                                              DIType Ty) {
   Value *Elts[] = {
@@ -601,7 +623,7 @@ DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
 DICompositeType DIBuilder::createEnumerationType(
     DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber,
     uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements,
-    DIType ClassType) {
+    DIType UnderlyingType) {
   // TAG_enumeration_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
@@ -613,7 +635,7 @@ DICompositeType DIBuilder::createEnumerationType(
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ClassType,
+    UnderlyingType,
     Elements,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     Constant::getNullValue(Type::getInt32Ty(VMContext))
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index ecd5216..5658f56 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -41,7 +41,7 @@ char DataLayout::ID = 0;
 // Support for StructLayout
 //===----------------------------------------------------------------------===//
 
-StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
+StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
   assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
   StructAlignment = 0;
   StructSize = 0;
@@ -50,7 +50,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
   // Loop over each of the elements, placing them in memory.
   for (unsigned i = 0, e = NumElements; i != e; ++i) {
     Type *Ty = ST->getElementType(i);
-    unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
+    unsigned TyAlign = ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty);
 
     // Add padding if necessary to align the data element properly.
     if ((StructSize & (TyAlign-1)) != 0)
@@ -60,7 +60,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &TD) {
     StructAlignment = std::max(TyAlign, StructAlignment);
 
     MemberOffsets[i] = StructSize;
-    StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
+    StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item
   }
 
   // Empty structures have alignment of 1 byte.
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index 0ffe99d..ec83dca 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -64,7 +64,8 @@ bool DIDescriptor::Verify() const {
           DISubrange(DbgNode).Verify() || DIEnumerator(DbgNode).Verify() ||
           DIObjCProperty(DbgNode).Verify() ||
           DITemplateTypeParameter(DbgNode).Verify() ||
-          DITemplateValueParameter(DbgNode).Verify());
+          DITemplateValueParameter(DbgNode).Verify() ||
+          DIImportedModule(DbgNode).Verify());
 }
 
 static Value *getField(const MDNode *DbgNode, unsigned Elt) {
@@ -336,6 +337,12 @@ bool DIDescriptor::isEnumerator() const {
 bool DIDescriptor::isObjCProperty() const {
   return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property;
 }
+
+/// \brief Return true if the specified tag is DW_TAG_imported_module.
+bool DIDescriptor::isImportedModule() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_imported_module;
+}
+
 //===----------------------------------------------------------------------===//
 // Simple Descriptor Constructors and other Methods
 //===----------------------------------------------------------------------===//
@@ -418,7 +425,7 @@ bool DICompileUnit::Verify() const {
   if (N.empty())
     return false;
   // It is possible that directory and produce string is empty.
-  return DbgNode->getNumOperands() == 12;
+  return DbgNode->getNumOperands() == 13;
 }
 
 /// Verify - Verify that an ObjC property is well formed.
@@ -580,6 +587,11 @@ bool DITemplateValueParameter::Verify() const {
   return isTemplateValueParameter() && DbgNode->getNumOperands() == 8;
 }
 
+/// \brief Verify that the imported module descriptor is well formed.
+bool DIImportedModule::Verify() const {
+  return isImportedModule() && DbgNode->getNumOperands() == 4;
+}
+
 /// getOriginalTypeSize - If this type is derived from a base type then
 /// return base type size.
 uint64_t DIDerivedType::getOriginalTypeSize() const {
@@ -694,7 +706,7 @@ StringRef DIScope::getDirectory() const {
 }
 
 DIArray DICompileUnit::getEnumTypes() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 12)
+  if (!DbgNode || DbgNode->getNumOperands() < 13)
     return DIArray();
 
   if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(7)))
@@ -703,7 +715,7 @@ DIArray DICompileUnit::getEnumTypes() const {
 }
 
 DIArray DICompileUnit::getRetainedTypes() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 12)
+  if (!DbgNode || DbgNode->getNumOperands() < 13)
     return DIArray();
 
   if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8)))
@@ -712,7 +724,7 @@ DIArray DICompileUnit::getRetainedTypes() const {
 }
 
 DIArray DICompileUnit::getSubprograms() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 12)
+  if (!DbgNode || DbgNode->getNumOperands() < 13)
     return DIArray();
 
   if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9)))
@@ -722,7 +734,7 @@ DIArray DICompileUnit::getSubprograms() const {
 
 
 DIArray DICompileUnit::getGlobalVariables() const {
-  if (!DbgNode || DbgNode->getNumOperands() < 12)
+  if (!DbgNode || DbgNode->getNumOperands() < 13)
     return DIArray();
 
   if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)))
@@ -730,6 +742,15 @@ DIArray DICompileUnit::getGlobalVariables() const {
   return DIArray();
 }
 
+DIArray DICompileUnit::getImportedModules() const {
+  if (!DbgNode || DbgNode->getNumOperands() < 13)
+    return DIArray();
+
+  if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11)))
+    return DIArray(N);
+  return DIArray();
+}
+
 /// fixupObjcLikeName - Replace contains special characters used
 /// in a typical Objective-C names with '.' in a given string.
 static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
@@ -1054,8 +1075,13 @@ void DIScope::printInternal(raw_ostream &OS) const {
 
 void DICompileUnit::printInternal(raw_ostream &OS) const {
   DIScope::printInternal(OS);
-  if (const char *Lang = dwarf::LanguageString(getLanguage()))
-    OS << " [" << Lang << ']';
+  OS << " [";
+  unsigned Lang = getLanguage();
+  if (const char *LangStr = dwarf::LanguageString(Lang))
+    OS << LangStr;
+  else
+    (OS << "lang 0x").write_hex(Lang);
+  OS << ']';
 }
 
 void DIEnumerator::printInternal(raw_ostream &OS) const {
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index 1e72b90..7f7efab 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -124,6 +124,13 @@ bool Argument::hasStructRetAttr() const {
     hasAttribute(1, Attribute::StructRet);
 }
 
+/// hasReturnedAttr - Return true if this argument has the returned attribute on
+/// it in its containing function.
+bool Argument::hasReturnedAttr() const {
+  return getParent()->getAttributes().
+    hasAttribute(getArgNo()+1, Attribute::Returned);
+}
+
 /// addAttr - Add attributes to an argument.
 void Argument::addAttr(AttributeSet AS) {
   assert(AS.getNumSlots() <= 1 &&
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 0228aeb..6a6b7af 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -403,42 +403,6 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
   }
 }
 
-MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
-  if (!A || !B)
-    return NULL;
-
-  if (A == B)
-    return A;
-
-  SmallVector<MDNode *, 4> PathA;
-  MDNode *T = A;
-  while (T) {
-    PathA.push_back(T);
-    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
-  }
-
-  SmallVector<MDNode *, 4> PathB;
-  T = B;
-  while (T) {
-    PathB.push_back(T);
-    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
-  }
-
-  int IA = PathA.size() - 1;
-  int IB = PathB.size() - 1;
-
-  MDNode *Ret = 0;
-  while (IA >= 0 && IB >=0) {
-    if (PathA[IA] == PathB[IB])
-      Ret = PathA[IA];
-    else
-      break;
-    --IA;
-    --IB;
-  }
-  return Ret;
-}
-
 MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
   if (!A || !B)
     return NULL;
diff --git a/lib/IR/PassManager.cpp b/lib/IR/PassManager.cpp
index 3c968aa..387094a 100644
--- a/lib/IR/PassManager.cpp
+++ b/lib/IR/PassManager.cpp
@@ -42,14 +42,14 @@ namespace llvm {
 
 // Different debug levels that can be enabled...
 enum PassDebugLevel {
-  None, Arguments, Structure, Executions, Details
+  Disabled, Arguments, Structure, Executions, Details
 };
 
 static cl::opt<enum PassDebugLevel>
 PassDebugging("debug-pass", cl::Hidden,
                   cl::desc("Print PassManager debugging information"),
                   cl::values(
-  clEnumVal(None      , "disable debug output"),
+  clEnumVal(Disabled  , "disable debug output"),
   clEnumVal(Arguments , "print pass arguments to pass to 'opt'"),
   clEnumVal(Structure , "print pass structure before run()"),
   clEnumVal(Executions, "print pass name before it is executed"),
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
index 1e6a51a..46c61fc 100644
--- a/lib/IR/Type.cpp
+++ b/lib/IR/Type.cpp
@@ -380,7 +380,7 @@ FunctionType *FunctionType::get(Type *ReturnType,
 }
 
 FunctionType *FunctionType::get(Type *Result, bool isVarArg) {
-  return get(Result, ArrayRef<Type *>(), isVarArg);
+  return get(Result, None, isVarArg);
 }
 
 /// isValidReturnType - Return true if the specified type is valid as a return
@@ -499,7 +499,7 @@ StructType *StructType::create(LLVMContext &Context, StringRef Name) {
 }
 
 StructType *StructType::get(LLVMContext &Context, bool isPacked) {
-  return get(Context, llvm::ArrayRef<Type*>(), isPacked);
+  return get(Context, None, isPacked);
 }
 
 StructType *StructType::get(Type *type, ...) {
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index adc702e..89a3c05 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -118,7 +118,7 @@ bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
   for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
     if (std::find(I->op_begin(), I->op_end(), this) != I->op_end())
       return true;
-    if (MaxBlockSize-- == 0) // If the block is larger fall back to use_iterator
+    if (--MaxBlockSize == 0) // If the block is larger fall back to use_iterator
       break;
   }
 
@@ -333,6 +333,7 @@ namespace {
 // Various metrics for how much to strip off of pointers.
 enum PointerStripKind {
   PSK_ZeroIndices,
+  PSK_ZeroIndicesAndAliases,
   PSK_InBoundsConstantIndices,
   PSK_InBounds
 };
@@ -350,6 +351,7 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
   do {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
       switch (StripKind) {
+      case PSK_ZeroIndicesAndAliases:
       case PSK_ZeroIndices:
         if (!GEP->hasAllZeroIndices())
           return V;
@@ -367,7 +369,7 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
       V = cast<Operator>(V)->getOperand(0);
     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
-      if (GA->mayBeOverridden())
+      if (StripKind == PSK_ZeroIndices || GA->mayBeOverridden())
         return V;
       V = GA->getAliasee();
     } else {
@@ -381,6 +383,10 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
 } // namespace
 
 Value *Value::stripPointerCasts() {
+  return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndAliases>(this);
+}
+
+Value *Value::stripPointerCastsNoFollowAliases() {
   return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
 }
 
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 8bfbb32..d106173 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -301,9 +301,12 @@ namespace {
     bool VerifyIntrinsicType(Type *Ty,
                              ArrayRef<Intrinsic::IITDescriptor> &Infos,
                              SmallVectorImpl<Type*> &ArgTys);
-    void VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
+    bool VerifyAttributeCount(AttributeSet Attrs, unsigned Params);
+    void VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
+                              bool isFunction, const Value *V);
+    void VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
                               bool isReturnValue, const Value *V);
-    void VerifyFunctionAttrs(FunctionType *FT, const AttributeSet &Attrs,
+    void VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
                              const Value *V);
 
     void WriteValue(const Value *V) {
@@ -446,6 +449,30 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) {
     }
   }
 
+  if (GV.hasName() && (GV.getName() == "llvm.used" ||
+                       GV.getName() == "llvm.compiler_used")) {
+    Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(),
+            "invalid linkage for intrinsic global variable", &GV);
+    Type *GVType = GV.getType()->getElementType();
+    if (ArrayType *ATy = dyn_cast<ArrayType>(GVType)) {
+      PointerType *PTy = dyn_cast<PointerType>(ATy->getElementType());
+      Assert1(PTy, "wrong type for intrinsic global variable", &GV);
+      if (GV.hasInitializer()) {
+        Constant *Init = GV.getInitializer();
+        ConstantArray *InitArray = dyn_cast<ConstantArray>(Init);
+        Assert1(InitArray, "wrong initalizer for intrinsic global variable",
+                Init);
+        for (unsigned i = 0, e = InitArray->getNumOperands(); i != e; ++i) {
+          Value *V = Init->getOperand(i)->stripPointerCasts();
+          // stripPointerCasts strips aliases, so we only need to check for
+          // variables and functions.
+          Assert1(isa<GlobalVariable>(V) || isa<Function>(V),
+                  "invalid llvm.used member", V);
+        }
+      }
+    }
+  }
+
   visitGlobalValue(GV);
 }
 
@@ -626,44 +653,74 @@ void Verifier::visitModuleFlag(MDNode *Op, DenseMap<MDString*, MDNode*>&SeenIDs,
   }
 }
 
+void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
+                                    bool isFunction, const Value* V) {
+  unsigned Slot = ~0U;
+  for (unsigned I = 0, E = Attrs.getNumSlots(); I != E; ++I)
+    if (Attrs.getSlotIndex(I) == Idx) {
+      Slot = I;
+      break;
+    }
+
+  assert(Slot != ~0U && "Attribute set inconsistency!");
+
+  for (AttributeSet::iterator I = Attrs.begin(Slot), E = Attrs.end(Slot);
+         I != E; ++I) {
+    if (I->isStringAttribute())
+      continue;
+
+    if (I->getKindAsEnum() == Attribute::NoReturn ||
+        I->getKindAsEnum() == Attribute::NoUnwind ||
+        I->getKindAsEnum() == Attribute::ReadNone ||
+        I->getKindAsEnum() == Attribute::ReadOnly ||
+        I->getKindAsEnum() == Attribute::NoInline ||
+        I->getKindAsEnum() == Attribute::AlwaysInline ||
+        I->getKindAsEnum() == Attribute::OptimizeForSize ||
+        I->getKindAsEnum() == Attribute::StackProtect ||
+        I->getKindAsEnum() == Attribute::StackProtectReq ||
+        I->getKindAsEnum() == Attribute::StackProtectStrong ||
+        I->getKindAsEnum() == Attribute::NoRedZone ||
+        I->getKindAsEnum() == Attribute::NoImplicitFloat ||
+        I->getKindAsEnum() == Attribute::Naked ||
+        I->getKindAsEnum() == Attribute::InlineHint ||
+        I->getKindAsEnum() == Attribute::StackAlignment ||
+        I->getKindAsEnum() == Attribute::UWTable ||
+        I->getKindAsEnum() == Attribute::NonLazyBind ||
+        I->getKindAsEnum() == Attribute::ReturnsTwice ||
+        I->getKindAsEnum() == Attribute::SanitizeAddress ||
+        I->getKindAsEnum() == Attribute::SanitizeThread ||
+        I->getKindAsEnum() == Attribute::SanitizeMemory ||
+        I->getKindAsEnum() == Attribute::MinSize ||
+        I->getKindAsEnum() == Attribute::NoDuplicate ||
+        I->getKindAsEnum() == Attribute::NoBuiltin) {
+      if (!isFunction)
+          CheckFailed("Attribute '" + I->getKindAsString() +
+                      "' only applies to functions!", V);
+          return;
+    } else if (isFunction) {
+        CheckFailed("Attribute '" + I->getKindAsString() +
+                    "' does not apply to functions!", V);
+        return;
+    }
+  }
+}
+
 // VerifyParameterAttrs - Check the given attributes for an argument or return
 // value of the specified type.  The value V is printed in error messages.
-void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
+void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
                                     bool isReturnValue, const Value *V) {
   if (!Attrs.hasAttributes(Idx))
     return;
 
-  Assert1(!Attrs.hasAttribute(Idx, Attribute::NoReturn) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoUnwind) &&
-          !Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
-          !Attrs.hasAttribute(Idx, Attribute::ReadOnly) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoInline) &&
-          !Attrs.hasAttribute(Idx, Attribute::AlwaysInline) &&
-          !Attrs.hasAttribute(Idx, Attribute::OptimizeForSize) &&
-          !Attrs.hasAttribute(Idx, Attribute::StackProtect) &&
-          !Attrs.hasAttribute(Idx, Attribute::StackProtectReq) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoRedZone) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoImplicitFloat) &&
-          !Attrs.hasAttribute(Idx, Attribute::Naked) &&
-          !Attrs.hasAttribute(Idx, Attribute::InlineHint) &&
-          !Attrs.hasAttribute(Idx, Attribute::StackAlignment) &&
-          !Attrs.hasAttribute(Idx, Attribute::UWTable) &&
-          !Attrs.hasAttribute(Idx, Attribute::NonLazyBind) &&
-          !Attrs.hasAttribute(Idx, Attribute::ReturnsTwice) &&
-          !Attrs.hasAttribute(Idx, Attribute::SanitizeAddress) &&
-          !Attrs.hasAttribute(Idx, Attribute::SanitizeThread) &&
-          !Attrs.hasAttribute(Idx, Attribute::SanitizeMemory) &&
-          !Attrs.hasAttribute(Idx, Attribute::MinSize) &&
-          !Attrs.hasAttribute(Idx, Attribute::NoBuiltin),
-          "Some attributes in '" + Attrs.getAsString(Idx) +
-          "' only apply to functions!", V);
+  VerifyAttributeTypes(Attrs, Idx, false, V);
 
   if (isReturnValue)
     Assert1(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
             !Attrs.hasAttribute(Idx, Attribute::Nest) &&
             !Attrs.hasAttribute(Idx, Attribute::StructRet) &&
-            !Attrs.hasAttribute(Idx, Attribute::NoCapture),
-            "Attribute 'byval', 'nest', 'sret', and 'nocapture' "
+            !Attrs.hasAttribute(Idx, Attribute::NoCapture) &&
+            !Attrs.hasAttribute(Idx, Attribute::Returned),
+            "Attribute 'byval', 'nest', 'sret', 'nocapture', and 'returned' "
             "do not apply to return values!", V);
 
   // Check for mutually incompatible attributes.
@@ -683,6 +740,10 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
              Attrs.hasAttribute(Idx, Attribute::InReg))), "Attributes "
           "'byval, nest, and inreg' are incompatible!", V);
 
+  Assert1(!(Attrs.hasAttribute(Idx, Attribute::StructRet) &&
+            Attrs.hasAttribute(Idx, Attribute::Returned)), "Attributes "
+          "'sret and returned' are incompatible!", V);
+
   Assert1(!(Attrs.hasAttribute(Idx, Attribute::ZExt) &&
             Attrs.hasAttribute(Idx, Attribute::SExt)), "Attributes "
           "'zeroext and signext' are incompatible!", V);
@@ -712,81 +773,51 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, uint64_t Idx, Type *Ty,
 
 // VerifyFunctionAttrs - Check parameter attributes against a function type.
 // The value V is printed in error messages.
-void Verifier::VerifyFunctionAttrs(FunctionType *FT,
-                                   const AttributeSet &Attrs,
+void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
                                    const Value *V) {
   if (Attrs.isEmpty())
     return;
 
   bool SawNest = false;
+  bool SawReturned = false;
 
   for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
-    unsigned Index = Attrs.getSlotIndex(i);
+    unsigned Idx = Attrs.getSlotIndex(i);
 
     Type *Ty;
-    if (Index == 0)
+    if (Idx == 0)
       Ty = FT->getReturnType();
-    else if (Index-1 < FT->getNumParams())
-      Ty = FT->getParamType(Index-1);
+    else if (Idx-1 < FT->getNumParams())
+      Ty = FT->getParamType(Idx-1);
     else
       break;  // VarArgs attributes, verified elsewhere.
 
-    VerifyParameterAttrs(Attrs, Index, Ty, Index == 0, V);
+    VerifyParameterAttrs(Attrs, Idx, Ty, Idx == 0, V);
 
-    if (Attrs.hasAttribute(i, Attribute::Nest)) {
+    if (Idx == 0)
+      continue;
+
+    if (Attrs.hasAttribute(Idx, Attribute::Nest)) {
       Assert1(!SawNest, "More than one parameter has attribute nest!", V);
       SawNest = true;
     }
 
-    if (Attrs.hasAttribute(Index, Attribute::StructRet))
-      Assert1(Index == 1, "Attribute sret is not on first parameter!", V);
+    if (Attrs.hasAttribute(Idx, Attribute::Returned)) {
+      Assert1(!SawReturned, "More than one parameter has attribute returned!",
+              V);
+      Assert1(Ty->canLosslesslyBitCastTo(FT->getReturnType()), "Incompatible "
+              "argument and return types for 'returned' attribute", V);
+      SawReturned = true;
+    }
+
+    if (Attrs.hasAttribute(Idx, Attribute::StructRet))
+      Assert1(Idx == 1, "Attribute sret is not on first parameter!", V);
   }
 
   if (!Attrs.hasAttributes(AttributeSet::FunctionIndex))
     return;
 
-  AttrBuilder NotFn(Attrs, AttributeSet::FunctionIndex);
-  NotFn.removeFunctionOnlyAttrs();
-  Assert1(NotFn.empty(), "Attributes '" +
-          AttributeSet::get(V->getContext(),
-                            AttributeSet::FunctionIndex,
-                            NotFn).getAsString(AttributeSet::FunctionIndex) +
-          "' do not apply to the function!", V);
-
-  // Check for mutually incompatible attributes.
-  Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::ByVal) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::Nest)) ||
-            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::ByVal) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::StructRet)) ||
-            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::Nest) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::StructRet))),
-          "Attributes 'byval, nest, and sret' are incompatible!", V);
-
-  Assert1(!((Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::ByVal) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::Nest)) ||
-            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::ByVal) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::InReg)) ||
-            (Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::Nest) &&
-             Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                                Attribute::InReg))),
-          "Attributes 'byval, nest, and inreg' are incompatible!", V);
-
-  Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                               Attribute::ZExt) &&
-            Attrs.hasAttribute(AttributeSet::FunctionIndex,
-                               Attribute::SExt)),
-          "Attributes 'zeroext and signext' are incompatible!", V);
+  VerifyAttributeTypes(Attrs, AttributeSet::FunctionIndex, true, V);
 
   Assert1(!(Attrs.hasAttribute(AttributeSet::FunctionIndex,
                                Attribute::ReadNone) &&
@@ -801,7 +832,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT,
           "Attributes 'noinline and alwaysinline' are incompatible!", V);
 }
 
-static bool VerifyAttributeCount(const AttributeSet &Attrs, unsigned Params) {
+bool Verifier::VerifyAttributeCount(AttributeSet Attrs, unsigned Params) {
   if (Attrs.getNumSlots() == 0)
     return true;
 
@@ -837,7 +868,7 @@ void Verifier::visitFunction(Function &F) {
   Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
           "Invalid struct return type!", &F);
 
-  const AttributeSet &Attrs = F.getAttributes();
+  AttributeSet Attrs = F.getAttributes();
 
   Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
           "Attribute after last parameter!", &F);
@@ -1350,7 +1381,7 @@ void Verifier::VerifyCallSite(CallSite CS) {
             "Call parameter type does not match function signature!",
             CS.getArgument(i), FTy->getParamType(i), I);
 
-  const AttributeSet &Attrs = CS.getAttributes();
+  AttributeSet Attrs = CS.getAttributes();
 
   Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
           "Attribute after last parameter!", I);
@@ -1358,15 +1389,41 @@ void Verifier::VerifyCallSite(CallSite CS) {
   // Verify call attributes.
   VerifyFunctionAttrs(FTy, Attrs, I);
 
-  if (FTy->isVarArg())
+  if (FTy->isVarArg()) {
+    // FIXME? is 'nest' even legal here?
+    bool SawNest = false;
+    bool SawReturned = false;
+
+    for (unsigned Idx = 1; Idx < 1 + FTy->getNumParams(); ++Idx) {
+      if (Attrs.hasAttribute(Idx, Attribute::Nest))
+        SawNest = true;
+      if (Attrs.hasAttribute(Idx, Attribute::Returned))
+        SawReturned = true;
+    }
+
     // Check attributes on the varargs part.
     for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) {
-      VerifyParameterAttrs(Attrs, Idx, CS.getArgument(Idx-1)->getType(),
-                           false, I);
+      Type *Ty = CS.getArgument(Idx-1)->getType(); 
+      VerifyParameterAttrs(Attrs, Idx, Ty, false, I);
+      
+      if (Attrs.hasAttribute(Idx, Attribute::Nest)) {
+        Assert1(!SawNest, "More than one parameter has attribute nest!", I);
+        SawNest = true;
+      }
+
+      if (Attrs.hasAttribute(Idx, Attribute::Returned)) {
+        Assert1(!SawReturned, "More than one parameter has attribute returned!",
+                I);
+        Assert1(Ty->canLosslesslyBitCastTo(FTy->getReturnType()),
+                "Incompatible argument and return types for 'returned' "
+                "attribute", I);
+        SawReturned = true;
+      }
 
       Assert1(!Attrs.hasAttribute(Idx, Attribute::StructRet),
               "Attribute 'sret' cannot be used for vararg call arguments!", I);
     }
+  }
 
   // Verify that there's no metadata unless it's a direct call to an intrinsic.
   if (CS.getCalledFunction() == 0 ||
diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt
index 28f1262..221b55a 100644
--- a/lib/Linker/CMakeLists.txt
+++ b/lib/Linker/CMakeLists.txt
@@ -1,4 +1,3 @@
 add_llvm_library(LLVMLinker
   LinkModules.cpp
-  Linker.cpp
   )
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 74cbdad..d2e13c9 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -13,21 +13,15 @@
 
 #include "llvm/Linker.h"
 #include "llvm-c/Linker.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/TypeFinder.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include <cctype>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -35,6 +29,8 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 namespace {
+  typedef SmallPtrSet<StructType*, 32> TypeSet;
+
 class TypeMapTy : public ValueMapTypeRemapper {
   /// MappedTypes - This is a mapping from a source type to a destination type
   /// to use.
@@ -55,6 +51,9 @@ class TypeMapTy : public ValueMapTypeRemapper {
   SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes;
 
 public:
+  TypeMapTy(TypeSet &Set) : DstStructTypesSet(Set) {}
+
+  TypeSet &DstStructTypesSet;
   /// addTypeMapping - Indicate that the specified type in the destination
   /// module is conceptually equivalent to the specified type in the source
   /// module.
@@ -331,13 +330,20 @@ Type *TypeMapTy::getImpl(Type *Ty) {
   StructType *STy = cast<StructType>(Ty);
   
   // If the type is opaque, we can just use it directly.
-  if (STy->isOpaque())
+  if (STy->isOpaque()) {
+    // A named structure type from src module is used. Add it to the Set of
+    // identified structs in the destination module.
+    DstStructTypesSet.insert(STy);
     return *Entry = STy;
+  }
   
   // Otherwise we create a new type and resolve its body later.  This will be
   // resolved by the top level of get().
   SrcDefinitionsToResolve.push_back(STy);
   StructType *DTy = StructType::create(STy->getContext());
+  // A new identified structure type was created. Add it to the set of
+  // identified structs in the destination module.
+  DstStructTypesSet.insert(DTy);
   DstResolvedOpaqueTypes.insert(DTy);
   return *Entry = DTy;
 }
@@ -379,8 +385,8 @@ namespace {
   public:
     std::string ErrorMsg;
     
-    ModuleLinker(Module *dstM, Module *srcM, unsigned mode)
-      : DstM(dstM), SrcM(srcM), Mode(mode) { }
+    ModuleLinker(Module *dstM, TypeSet &Set, Module *srcM, unsigned mode)
+      : DstM(dstM), SrcM(srcM), TypeMap(Set), Mode(mode) { }
     
     bool run();
     
@@ -594,11 +600,6 @@ void ModuleLinker::computeTypeMapping() {
   SmallPtrSet<StructType*, 32> SrcStructTypesSet(SrcStructTypes.begin(),
                                                  SrcStructTypes.end());
 
-  TypeFinder DstStructTypes;
-  DstStructTypes.run(*DstM, true);
-  SmallPtrSet<StructType*, 32> DstStructTypesSet(DstStructTypes.begin(),
-                                                 DstStructTypes.end());
-
   for (unsigned i = 0, e = SrcStructTypes.size(); i != e; ++i) {
     StructType *ST = SrcStructTypes[i];
     if (!ST->hasName()) continue;
@@ -629,7 +630,7 @@ void ModuleLinker::computeTypeMapping() {
       // we prefer to take the '%C' version. So we are then left with both
       // '%C.1' and '%C' being used for the same types. This leads to some
       // variables using one type and some using the other.
-      if (!SrcStructTypesSet.count(DST) && DstStructTypesSet.count(DST))
+      if (!SrcStructTypesSet.count(DST) && TypeMap.DstStructTypesSet.count(DST))
         TypeMap.addTypeMapping(DST, ST);
   }
 
@@ -1287,6 +1288,25 @@ bool ModuleLinker::run() {
   return false;
 }
 
+Linker::Linker(Module *M) : Composite(M) {
+  TypeFinder StructTypes;
+  StructTypes.run(*M, true);
+  IdentifiedStructTypes.insert(StructTypes.begin(), StructTypes.end());
+}
+
+Linker::~Linker() {
+}
+
+bool Linker::linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg) {
+  ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src, Mode);
+  if (TheLinker.run()) {
+    if (ErrorMsg)
+      *ErrorMsg = TheLinker.ErrorMsg;
+    return true;
+  }
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // LinkModules entrypoint.
 //===----------------------------------------------------------------------===//
@@ -1298,13 +1318,8 @@ bool ModuleLinker::run() {
 /// and shouldn't be relied on to be consistent.
 bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode, 
                          std::string *ErrorMsg) {
-  ModuleLinker TheLinker(Dest, Src, Mode);
-  if (TheLinker.run()) {
-    if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg;
-    return true;
-  }
-
-  return false;
+  Linker L(Dest);
+  return L.linkInModule(Src, Mode, ErrorMsg);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
deleted file mode 100644
index 74d24f2..0000000
--- a/lib/Linker/Linker.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//===- lib/Linker/Linker.cpp - Basic Linker functionality  ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains basic Linker functionality that all usages will need.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Linker.h"
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-using namespace llvm;
-
-Linker::Linker(StringRef progname, StringRef modname,
-               LLVMContext& C, unsigned flags):
-  Context(C),
-  Composite(new Module(modname, C)),
-  Flags(flags),
-  Error(),
-  ProgramName(progname) { }
-
-Linker::Linker(StringRef progname, Module* aModule, unsigned flags) :
-  Context(aModule->getContext()),
-  Composite(aModule),
-  Flags(flags),
-  Error(),
-  ProgramName(progname) { }
-
-Linker::~Linker() {
-  delete Composite;
-}
-
-bool
-Linker::error(StringRef message) {
-  Error = message;
-  if (!(Flags&QuietErrors))
-    errs() << ProgramName << ": error: " << message << "\n";
-  return true;
-}
-
-bool
-Linker::warning(StringRef message) {
-  Error = message;
-  if (!(Flags&QuietWarnings))
-    errs() << ProgramName << ": warning: " << message << "\n";
-  return false;
-}
-
-void
-Linker::verbose(StringRef message) {
-  if (Flags&Verbose)
-    errs() << "  " << message << "\n";
-}
-
-Module*
-Linker::releaseModule() {
-  Module* result = Composite;
-  Error.clear();
-  Composite = 0;
-  Flags = 0;
-  return result;
-}
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 51bb435..9e60884 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -87,10 +87,10 @@ MCAsmInfo::MCAsmInfo() {
   SupportsDebugInformation = false;
   ExceptionsType = ExceptionHandling::None;
   DwarfUsesInlineInfoSection = false;
-  DwarfSectionOffsetDirective = 0;
   DwarfUsesRelocationsAcrossSections = true;
   DwarfRegNumForCFI = false;
   HasMicrosoftFastStdCallMangling = false;
+  NeedsDwarfSectionOffsetDirective = false;
 }
 
 MCAsmInfo::~MCAsmInfo() {
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index fd79193..33350d9 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -36,8 +36,8 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
   // Set up DWARF directives
   HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
   SupportsDebugInformation = true;
-  DwarfSectionOffsetDirective = "\t.secrel32\t";
   HasMicrosoftFastStdCallMangling = true;
+  NeedsDwarfSectionOffsetDirective = true;
 }
 
 void MCAsmInfoMicrosoft::anchor() { }
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 35613b4..9e86785 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -124,19 +124,15 @@ public:
   /// @name MCStreamer Interface
   /// @{
 
-  virtual void ChangeSection(const MCSection *Section);
+  virtual void ChangeSection(const MCSection *Section,
+                             const MCExpr *Subsection);
 
   virtual void InitSections() {
     InitToTextSection();
   }
 
   virtual void InitToTextSection() {
-    // FIXME, this is MachO specific, but the testsuite
-    // expects this.
-    SwitchSection(getContext().getMachOSection(
-                                      "__TEXT", "__text",
-                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                      0, SectionKind::getText()));
+    SwitchSection(getContext().getObjectFileInfo()->getTextSection());
   }
 
   virtual void EmitLabel(MCSymbol *Symbol);
@@ -333,9 +329,10 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
   return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
 }
 
-void MCAsmStreamer::ChangeSection(const MCSection *Section) {
+void MCAsmStreamer::ChangeSection(const MCSection *Section,
+                                  const MCExpr *Subsection) {
   assert(Section && "Cannot switch to a null section!");
-  Section->PrintSwitchToSection(MAI, OS);
+  Section->PrintSwitchToSection(MAI, OS, Subsection);
 }
 
 void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
@@ -642,7 +639,8 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
 
 
 void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
-  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  assert(getCurrentSection().first &&
+         "Cannot emit contents before setting section!");
   if (Data.empty()) return;
 
   if (Data.size() == 1) {
@@ -673,7 +671,8 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size,
 
 void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
                                   unsigned AddrSpace) {
-  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  assert(getCurrentSection().first &&
+         "Cannot emit contents before setting section!");
   const char *Directive = 0;
   switch (Size) {
   default: break;
@@ -1368,7 +1367,8 @@ void MCAsmStreamer::EmitTCEntry(const MCSymbol &S) {
 }
 
 void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
-  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  assert(getCurrentSection().first &&
+         "Cannot emit contents before setting section!");
 
   // Show the encoding in a comment if we have a code emitter.
   if (Emitter)
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 1829266..fb5ab28 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -243,6 +243,36 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
     A->getSectionList().push_back(this);
 }
 
+MCSectionData::iterator
+MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) {
+  if (Subsection == 0 && SubsectionFragmentMap.empty())
+    return end();
+
+  SmallVectorImpl<std::pair<unsigned, MCFragment *> >::iterator MI =
+    std::lower_bound(SubsectionFragmentMap.begin(), SubsectionFragmentMap.end(),
+                     std::make_pair(Subsection, (MCFragment *)0));
+  bool ExactMatch = false;
+  if (MI != SubsectionFragmentMap.end()) {
+    ExactMatch = MI->first == Subsection;
+    if (ExactMatch)
+      ++MI;
+  }
+  iterator IP;
+  if (MI == SubsectionFragmentMap.end())
+    IP = end();
+  else
+    IP = MI->second;
+  if (!ExactMatch && Subsection != 0) {
+    // The GNU as documentation claims that subsections have an alignment of 4,
+    // although this appears not to be the case.
+    MCFragment *F = new MCDataFragment();
+    SubsectionFragmentMap.insert(MI, std::make_pair(Subsection, F));
+    getFragmentList().insert(IP, F);
+    F->setParent(this);
+  }
+  return IP;
+}
+
 /* *** */
 
 MCSymbolData::MCSymbolData() : Symbol(0) {}
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 0f8f074..18982e9 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -197,6 +197,8 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS,
   // actually a DW_LNE_end_sequence.
 
   // Switch to the section to be able to create a symbol at its end.
+  // TODO: keep track of the last subsection so that this symbol appears in the
+  // correct place.
   MCOS->SwitchSection(Section);
 
   MCContext &context = MCOS->getContext();
@@ -787,7 +789,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
   if (Symbol->isTemporary())
     return;
   MCContext &context = MCOS->getContext();
-  if (context.getGenDwarfSection() != MCOS->getCurrentSection())
+  if (context.getGenDwarfSection() != MCOS->getCurrentSection().first)
     return;
 
   // The dwarf label's name does not have the symbol name's leading
@@ -899,7 +901,7 @@ namespace {
     /// EmitCompactUnwind - Emit the unwind information in a compact way. If
     /// we're successful, return 'true'. Otherwise, return 'false' and it will
     /// emit the normal CIE and FDE.
-    bool EmitCompactUnwind(MCStreamer &streamer,
+    void EmitCompactUnwind(MCStreamer &streamer,
                            const MCDwarfFrameInfo &frame);
 
     const MCSymbol &EmitCIE(MCStreamer &streamer,
@@ -1139,7 +1141,7 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
 /// EmitCompactUnwind - Emit the unwind information in a compact way. If we're
 /// successful, return 'true'. Otherwise, return 'false' and it will emit the
 /// normal CIE and FDE.
-bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
+void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
                                          const MCDwarfFrameInfo &Frame) {
   MCContext &Context = Streamer.getContext();
   const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
@@ -1168,14 +1170,13 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
   //   .quad except_tab1
 
   uint32_t Encoding = Frame.CompactUnwindEncoding;
-  if (!Encoding) return false;
+  if (!Encoding) return;
+  bool DwarfEHFrameOnly = (Encoding == MOFI->getCompactUnwindDwarfEHFrameOnly());
 
   // The encoding needs to know we have an LSDA.
-  if (Frame.Lsda)
+  if (!DwarfEHFrameOnly && Frame.Lsda)
     Encoding |= 0x40000000;
 
-  Streamer.SwitchSection(MOFI->getCompactUnwindSection());
-
   // Range Start
   unsigned FDEEncoding = MOFI->getFDEEncoding(UsingCFI);
   unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
@@ -1194,11 +1195,10 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
                                       Twine::utohexstr(Encoding));
   Streamer.EmitIntValue(Encoding, Size);
 
-
   // Personality Function
   Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr);
   if (VerboseAsm) Streamer.AddComment("Personality Function");
-  if (Frame.Personality)
+  if (!DwarfEHFrameOnly && Frame.Personality)
     Streamer.EmitSymbolValue(Frame.Personality, Size);
   else
     Streamer.EmitIntValue(0, Size); // No personality fn
@@ -1206,12 +1206,10 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
   // LSDA
   Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding);
   if (VerboseAsm) Streamer.AddComment("LSDA");
-  if (Frame.Lsda)
+  if (!DwarfEHFrameOnly && Frame.Lsda)
     Streamer.EmitSymbolValue(Frame.Lsda, Size);
   else
     Streamer.EmitIntValue(0, Size); // No LSDA
-
-  return true;
 }
 
 const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
@@ -1421,7 +1419,6 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
   }
 
   // Call Frame Instructions
-
   EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
 
   // Padding
@@ -1482,12 +1479,23 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
   ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos();
 
   // Emit the compact unwind info if available.
-  if (IsEH && MOFI->getCompactUnwindSection())
-    for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
-      const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
-      if (Frame.CompactUnwindEncoding)
+  if (IsEH && MOFI->getCompactUnwindSection()) {
+    unsigned NumFrameInfos = Streamer.getNumFrameInfos();
+    bool SectionEmitted = false;
+
+    if (NumFrameInfos) {
+      for (unsigned i = 0; i < NumFrameInfos; ++i) {
+        const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
+        if (Frame.CompactUnwindEncoding == 0) continue;
+        if (!SectionEmitted) {
+          Streamer.SwitchSection(MOFI->getCompactUnwindSection());
+          Streamer.EmitValueToAlignment(Context.getAsmInfo().getPointerSize());
+          SectionEmitted = true;
+        }
         Emitter.EmitCompactUnwind(Streamer, Frame);
+      }
     }
+  }
 
   const MCSection &Section = IsEH ? *MOFI->getEHFrameSection() :
                                     *MOFI->getDwarfFrameSection();
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 7f5f1b6..116f86f 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
@@ -108,14 +109,15 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   llvm_unreachable("invalid assembler flag!");
 }
 
-void MCELFStreamer::ChangeSection(const MCSection *Section) {
+void MCELFStreamer::ChangeSection(const MCSection *Section,
+                                  const MCExpr *Subsection) {
   MCSectionData *CurSection = getCurrentSectionData();
   if (CurSection && CurSection->isBundleLocked())
     report_fatal_error("Unterminated .bundle_lock when changing a section");
   const MCSymbol *Grp = static_cast<const MCSectionELF *>(Section)->getGroup();
   if (Grp)
     getAssembler().getOrCreateSymbolData(*Grp);
-  this->MCObjectStreamer::ChangeSection(Section);
+  this->MCObjectStreamer::ChangeSection(Section, Subsection);
 }
 
 void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
@@ -126,6 +128,26 @@ void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
   Alias->setVariableValue(Value);
 }
 
+// When GNU as encounters more than one .type declaration for an object it seems
+// to use a mechanism similar to the one below to decide which type is actually
+// used in the object file.  The greater of T1 and T2 is selected based on the
+// following ordering:
+//  STT_NOTYPE < STT_OBJECT < STT_FUNC < STT_GNU_IFUNC < STT_TLS < anything else
+// If neither T1 < T2 nor T2 < T1 according to this ordering, use T2 (the user
+// provided type).
+static unsigned CombineSymbolTypes(unsigned T1, unsigned T2) {
+  unsigned TypeOrdering[] = {ELF::STT_NOTYPE, ELF::STT_OBJECT, ELF::STT_FUNC,
+                             ELF::STT_GNU_IFUNC, ELF::STT_TLS};
+  for (unsigned i = 0; i != array_lengthof(TypeOrdering); ++i) {
+    if (T1 == TypeOrdering[i])
+      return T2;
+    if (T2 == TypeOrdering[i])
+      return T1;
+  }
+
+  return T2;
+}
+
 void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
                                           MCSymbolAttr Attribute) {
   // Indirect symbols are handled differently, to match how 'as' handles
@@ -187,27 +209,34 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     break;
 
   case MCSA_ELF_TypeFunction:
-    MCELF::SetType(SD, ELF::STT_FUNC);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_FUNC));
     break;
 
   case MCSA_ELF_TypeIndFunction:
-    MCELF::SetType(SD, ELF::STT_GNU_IFUNC);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_GNU_IFUNC));
     break;
 
   case MCSA_ELF_TypeObject:
-    MCELF::SetType(SD, ELF::STT_OBJECT);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_OBJECT));
     break;
 
   case MCSA_ELF_TypeTLS:
-    MCELF::SetType(SD, ELF::STT_TLS);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_TLS));
     break;
 
   case MCSA_ELF_TypeCommon:
-    MCELF::SetType(SD, ELF::STT_COMMON);
+    // TODO: Emit these as a common symbol.
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_OBJECT));
     break;
 
   case MCSA_ELF_TypeNoType:
-    MCELF::SetType(SD, ELF::STT_NOTYPE);
+    MCELF::SetType(SD, CombineSymbolTypes(MCELF::GetType(SD),
+                                          ELF::STT_NOTYPE));
     break;
 
   case MCSA_Protected:
@@ -290,7 +319,7 @@ void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
 // entry in the module's symbol table (the first being the null symbol).
 void MCELFStreamer::EmitFileDirective(StringRef Filename) {
   MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename);
-  Symbol->setSection(*getCurrentSection());
+  Symbol->setSection(*getCurrentSection().first);
   Symbol->setAbsolute();
 
   MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
@@ -406,11 +435,13 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
       // Optimize memory usage by emitting the instruction to a
       // MCCompactEncodedInstFragment when not in a bundle-locked group and
       // there are no fixups registered.
-      MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment(SD);
+      MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment();
+      insert(CEIF);
       CEIF->getContents().append(Code.begin(), Code.end());
       return;
     } else {
-      DF = new MCDataFragment(SD);
+      DF = new MCDataFragment();
+      insert(DF);
       if (SD->getBundleLockState() == MCSectionData::BundleLockedAlignToEnd) {
         // If this is a new fragment created for a bundle-locked group, and the
         // group was marked as "align_to_end", set a flag in the fragment.
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index cd4d144..06bc72f 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -250,6 +250,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_Mips_GOT_LO16: return "GOT_LO16";
   case VK_Mips_CALL_HI16: return "CALL_HI16";
   case VK_Mips_CALL_LO16: return "CALL_LO16";
+  case VK_COFF_IMGREL32: return "IMGREL32";
   }
   llvm_unreachable("Invalid variant kind");
 }
@@ -285,6 +286,44 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
     .Case("dtpoff", VK_DTPOFF)
     .Case("TLVP", VK_TLVP)
     .Case("tlvp", VK_TLVP)
+    .Case("IMGREL", VK_COFF_IMGREL32)
+    .Case("imgrel", VK_COFF_IMGREL32)
+    .Case("SECREL32", VK_SECREL)
+    .Case("secrel32", VK_SECREL)
+    .Case("HA", VK_PPC_GAS_HA16)
+    .Case("ha", VK_PPC_GAS_HA16)
+    .Case("L", VK_PPC_GAS_LO16)
+    .Case("l", VK_PPC_GAS_LO16)
+    .Case("TOCBASE", VK_PPC_TOC)
+    .Case("tocbase", VK_PPC_TOC)
+    .Case("TOC", VK_PPC_TOC_ENTRY)
+    .Case("toc", VK_PPC_TOC_ENTRY)
+    .Case("TOC@HA", VK_PPC_TOC16_HA)
+    .Case("toc@ha", VK_PPC_TOC16_HA)
+    .Case("TOC@L", VK_PPC_TOC16_LO)
+    .Case("toc@l", VK_PPC_TOC16_LO)
+    .Case("TLS", VK_PPC_TLS)
+    .Case("tls", VK_PPC_TLS)
+    .Case("TPREL@HA", VK_PPC_TPREL16_HA)
+    .Case("tprel@ha", VK_PPC_TPREL16_HA)
+    .Case("TPREL@L", VK_PPC_TPREL16_LO)
+    .Case("tprel@l", VK_PPC_TPREL16_LO)
+    .Case("DTPREL@HA", VK_PPC_DTPREL16_HA)
+    .Case("dtprel@ha", VK_PPC_DTPREL16_HA)
+    .Case("DTPREL@L", VK_PPC_DTPREL16_LO)
+    .Case("dtprel@l", VK_PPC_DTPREL16_LO)
+    .Case("GOT@TPREL@HA", VK_PPC_GOT_TPREL16_HA)
+    .Case("got@tprel@ha", VK_PPC_GOT_TPREL16_HA)
+    .Case("GOT@TPREL@L", VK_PPC_GOT_TPREL16_LO)
+    .Case("got@tprel@l", VK_PPC_GOT_TPREL16_LO)
+    .Case("GOT@TLSGD@HA", VK_PPC_GOT_TLSGD16_HA)
+    .Case("got@tlsgd@ha", VK_PPC_GOT_TLSGD16_HA)
+    .Case("GOT@TLSGD@L", VK_PPC_GOT_TLSGD16_LO)
+    .Case("got@tlsgd@l", VK_PPC_GOT_TLSGD16_LO)
+    .Case("GOT@TLSLD@HA", VK_PPC_GOT_TLSLD16_HA)
+    .Case("got@tlsld@ha", VK_PPC_GOT_TLSLD16_HA)
+    .Case("GOT@TLSLD@L", VK_PPC_GOT_TLSLD16_LO)
+    .Case("got@tlsld@l", VK_PPC_GOT_TLSLD16_LO)
     .Default(VK_Invalid);
 }
 
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 7d08d0e..e08b01b 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -122,11 +122,11 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
 
   // isSymbolLinkerVisible uses the section.
-  Symbol->setSection(*getCurrentSection());
+  Symbol->setSection(*getCurrentSection().first);
   // We have to create a new fragment if this is an atom defining symbol,
   // fragments cannot span atoms.
   if (getAssembler().isSymbolLinkerVisible(*Symbol))
-    new MCDataFragment(getCurrentSectionData());
+    insert(new MCDataFragment());
 
   MCObjectStreamer::EmitLabel(Symbol);
 
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index c872b22..659706a 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -30,13 +30,14 @@ namespace {
     virtual void InitSections() {
     }
 
-    virtual void ChangeSection(const MCSection *Section) {
+    virtual void ChangeSection(const MCSection *Section,
+                               const MCExpr *Subsection) {
     }
 
     virtual void EmitLabel(MCSymbol *Symbol) {
       assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
-      assert(getCurrentSection() && "Cannot emit before setting section!");
-      Symbol->setSection(*getCurrentSection());
+      assert(getCurrentSection().first &&"Cannot emit before setting section!");
+      Symbol->setSection(*getCurrentSection().first);
     }
     virtual void EmitDebugLabel(MCSymbol *Symbol) {
       EmitLabel(Symbol);
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index d19e79a..96b62f1 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -145,12 +145,16 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
   LSDASection = Ctx->getMachOSection("__TEXT", "__gcc_except_tab", 0,
                                      SectionKind::getReadOnlyWithRel());
 
-  if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
+  if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) {
     CompactUnwindSection =
       Ctx->getMachOSection("__LD", "__compact_unwind",
                            MCSectionMachO::S_ATTR_DEBUG,
                            SectionKind::getReadOnly());
 
+    if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
+      CompactUnwindDwarfEHFrameOnly = 0x04000000;
+  }
+
   // Debug Information.
   DwarfAccelNamesSection =
     Ctx->getMachOSection("__DWARF", "__apple_names",
@@ -291,6 +295,22 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
     FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
     TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
       dwarf::DW_EH_PE_udata8;
+  } else if (T.getArch() == Triple::systemz) {
+    // All currently-defined code models guarantee that 4-byte PC-relative
+    // values will be in range.
+    if (RelocM == Reloc::PIC_) {
+      PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata4;
+      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+      FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+      TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+        dwarf::DW_EH_PE_sdata4;
+    } else {
+      PersonalityEncoding = dwarf::DW_EH_PE_absptr;
+      LSDAEncoding = dwarf::DW_EH_PE_absptr;
+      FDEEncoding = dwarf::DW_EH_PE_absptr;
+      TTypeEncoding = dwarf::DW_EH_PE_absptr;
+    }
   }
 
   // Solaris requires different flags for .eh_frame to seemingly every other
@@ -629,6 +649,8 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
   PersonalityEncoding = LSDAEncoding = FDEEncoding = FDECFIEncoding =
     TTypeEncoding = dwarf::DW_EH_PE_absptr;
 
+  CompactUnwindDwarfEHFrameOnly = 0;
+
   EHFrameSection = 0;             // Created on demand.
   CompactUnwindSection = 0;       // Used only by selected targets.
   DwarfAccelNamesSection = 0;     // Used only by selected targets.
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 0d2ce83..d21ce8d 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
@@ -45,14 +46,15 @@ void MCObjectStreamer::reset() {
   if (Assembler)
     Assembler->reset();
   CurSectionData = 0;
+  CurInsertionPoint = MCSectionData::iterator();
   MCStreamer::reset();
 }
 
 MCFragment *MCObjectStreamer::getCurrentFragment() const {
   assert(getCurrentSectionData() && "No current section!");
 
-  if (!getCurrentSectionData()->empty())
-    return &getCurrentSectionData()->getFragmentList().back();
+  if (CurInsertionPoint != getCurrentSectionData()->getFragmentList().begin())
+    return prior(CurInsertionPoint);
 
   return 0;
 }
@@ -61,8 +63,10 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
   MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
   // When bundling is enabled, we don't want to add data to a fragment that
   // already has instructions (see MCELFStreamer::EmitInstToData for details)
-  if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions()))
-    F = new MCDataFragment(getCurrentSectionData());
+  if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) {
+    F = new MCDataFragment();
+    insert(F);
+  }
   return F;
 }
 
@@ -145,7 +149,7 @@ void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
     return;
   }
   Value = ForceExpAbs(Value);
-  new MCLEBFragment(*Value, false, getCurrentSectionData());
+  insert(new MCLEBFragment(*Value, false));
 }
 
 void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
@@ -155,7 +159,7 @@ void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) {
     return;
   }
   Value = ForceExpAbs(Value);
-  new MCLEBFragment(*Value, true, getCurrentSectionData());
+  insert(new MCLEBFragment(*Value, true));
 }
 
 void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
@@ -163,10 +167,20 @@ void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
   report_fatal_error("This file format doesn't support weak aliases.");
 }
 
-void MCObjectStreamer::ChangeSection(const MCSection *Section) {
+void MCObjectStreamer::ChangeSection(const MCSection *Section,
+                                     const MCExpr *Subsection) {
   assert(Section && "Cannot switch to a null section!");
 
   CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
+
+  int64_t IntSubsection = 0;
+  if (Subsection &&
+      !Subsection->EvaluateAsAbsolute(IntSubsection, getAssembler()))
+    report_fatal_error("Cannot evaluate subsection number");
+  if (IntSubsection < 0 || IntSubsection > 8192)
+    report_fatal_error("Subsection number out of range");
+  CurInsertionPoint =
+    CurSectionData->getSubsectionInsertionPoint(unsigned(IntSubsection));
 }
 
 void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
@@ -185,7 +199,7 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
 
   // Now that a machine instruction has been assembled into this section, make
   // a line entry for any .loc directive that has been seen.
-  MCLineEntry::Make(this, getCurrentSection());
+  MCLineEntry::Make(this, getCurrentSection().first);
 
   // If this instruction doesn't need relaxation, just emit it as data.
   MCAssembler &Assembler = getAssembler();
@@ -216,8 +230,8 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
 void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
   // Always create a new, separate fragment here, because its size can change
   // during relaxation.
-  MCRelaxableFragment *IF =
-    new MCRelaxableFragment(Inst, getCurrentSectionData());
+  MCRelaxableFragment *IF = new MCRelaxableFragment(Inst);
+  insert(IF);
 
   SmallString<128> Code;
   raw_svector_ostream VecOS(Code);
@@ -258,7 +272,7 @@ void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
     return;
   }
   AddrDelta = ForceExpAbs(AddrDelta);
-  new MCDwarfLineAddrFragment(LineDelta, *AddrDelta, getCurrentSectionData());
+  insert(new MCDwarfLineAddrFragment(LineDelta, *AddrDelta));
 }
 
 void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
@@ -270,7 +284,7 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
     return;
   }
   AddrDelta = ForceExpAbs(AddrDelta);
-  new MCDwarfCallFrameFragment(*AddrDelta, getCurrentSectionData());
+  insert(new MCDwarfCallFrameFragment(*AddrDelta));
 }
 
 void MCObjectStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
@@ -284,8 +298,7 @@ void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment,
                                             unsigned MaxBytesToEmit) {
   if (MaxBytesToEmit == 0)
     MaxBytesToEmit = ByteAlignment;
-  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
-                      getCurrentSectionData());
+  insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit));
 
   // Update the maximum alignment on the current section if necessary.
   if (ByteAlignment > getCurrentSectionData()->getAlignment())
@@ -302,7 +315,7 @@ bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
                                          unsigned char Value) {
   int64_t Res;
   if (Offset->EvaluateAsAbsolute(Res, getAssembler())) {
-    new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+    insert(new MCOrgFragment(*Offset, Value));
     return false;
   }
 
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 804734c..edefdb4 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -201,9 +201,9 @@ public:
   }
 
   virtual bool Warning(SMLoc L, const Twine &Msg,
-                       ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
+                       ArrayRef<SMRange> Ranges = None);
   virtual bool Error(SMLoc L, const Twine &Msg,
-                     ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
+                     ArrayRef<SMRange> Ranges = None);
 
   virtual const AsmToken &Lex();
 
@@ -221,6 +221,7 @@ public:
 
   bool parseExpression(const MCExpr *&Res);
   virtual bool parseExpression(const MCExpr *&Res, SMLoc &EndLoc);
+  virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
   virtual bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
   virtual bool parseAbsoluteExpression(int64_t &Res);
 
@@ -285,7 +286,7 @@ private:
 
   void PrintMacroInstantiations();
   void PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
-                    ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) const {
+                    ArrayRef<SMRange> Ranges = None) const {
     SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
   }
   static void DiagHandler(const SMDiagnostic &Diag, void *Context);
@@ -601,7 +602,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   // If we are generating dwarf for assembly source files save the initial text
   // section and generate a .file directive.
   if (getContext().getGenDwarfForAssembly()) {
-    getContext().setGenDwarfSection(getStreamer().getCurrentSection());
+    getContext().setGenDwarfSection(getStreamer().getCurrentSection().first);
     MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
     getStreamer().EmitLabel(SectionStartSym);
     getContext().setGenDwarfSectionStartSym(SectionStartSym);
@@ -666,7 +667,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
 }
 
 void AsmParser::checkForValidSection() {
-  if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) {
+  if (!ParsingInlineAsm && !getStreamer().getCurrentSection().first) {
     TokError("expected section directive before assembly directive");
     Out.InitToTextSection();
   }
@@ -869,6 +870,10 @@ bool AsmParser::parseExpression(const MCExpr *&Res) {
   return parseExpression(Res, EndLoc);
 }
 
+bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+  return ParsePrimaryExpr(Res, EndLoc);
+}
+
 const MCExpr *
 AsmParser::ApplyModifierToExpr(const MCExpr *E,
                                MCSymbolRefExpr::VariantKind Variant) {
@@ -1087,7 +1092,7 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
     MCBinaryExpr::Opcode Dummy;
     unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
     if (TokPrec < NextTokPrec) {
-      if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true;
+      if (ParseBinOpRHS(TokPrec+1, RHS, EndLoc)) return true;
     }
 
     // Merge LHS and RHS according to operator.
@@ -1488,7 +1493,8 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) {
   // section is the initial text section then generate a .loc directive for
   // the instruction.
   if (!HadError && getContext().getGenDwarfForAssembly() &&
-      getContext().getGenDwarfSection() == getStreamer().getCurrentSection()) {
+      getContext().getGenDwarfSection() ==
+      getStreamer().getCurrentSection().first) {
 
     unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
 
@@ -1978,7 +1984,6 @@ static bool IsUsedIn(const MCSymbol *Sym, const MCExpr *Value) {
   case MCExpr::Binary: {
     const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Value);
     return IsUsedIn(Sym, BE->getLHS()) || IsUsedIn(Sym, BE->getRHS());
-    break;
   }
   case MCExpr::Target:
   case MCExpr::Constant:
@@ -2479,7 +2484,7 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
 
   // Check whether we should use optimal code alignment for this .align
   // directive.
-  bool UseCodeAlign = getStreamer().getCurrentSection()->UseCodeAlign();
+  bool UseCodeAlign = getStreamer().getCurrentSection().first->UseCodeAlign();
   if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
       ValueSize == 1 && UseCodeAlign) {
     getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
@@ -2631,12 +2636,10 @@ bool AsmParser::ParseDirectiveLoc() {
             Flags |= DWARF2_FLAG_IS_STMT;
           else
             return Error(Loc, "is_stmt value not 0 or 1");
-        }
-        else {
+        } else {
           return Error(Loc, "is_stmt value not the constant value of 0 or 1");
         }
-      }
-      else if (Name == "isa") {
+      } else if (Name == "isa") {
         Loc = getTok().getLoc();
         const MCExpr *Value;
         if (parseExpression(Value))
@@ -2647,16 +2650,13 @@ bool AsmParser::ParseDirectiveLoc() {
           if (Value < 0)
             return Error(Loc, "isa number less than zero");
           Isa = Value;
-        }
-        else {
+        } else {
           return Error(Loc, "isa number not a constant value");
         }
-      }
-      else if (Name == "discriminator") {
+      } else if (Name == "discriminator") {
         if (parseAbsoluteExpression(Discriminator))
           return true;
-      }
-      else {
+      } else {
         return Error(Loc, "unknown sub-directive in '.loc' directive");
       }
 
@@ -3615,18 +3615,17 @@ bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
 bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
   if (TheCondState.TheCond != AsmCond::IfCond &&
       TheCondState.TheCond != AsmCond::ElseIfCond)
-      Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
-                          " an .elseif");
+    Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
+                        " an .elseif");
   TheCondState.TheCond = AsmCond::ElseIfCond;
 
   bool LastIgnoreState = false;
   if (!TheCondStack.empty())
-      LastIgnoreState = TheCondStack.back().Ignore;
+    LastIgnoreState = TheCondStack.back().Ignore;
   if (LastIgnoreState || TheCondState.CondMet) {
     TheCondState.Ignore = true;
     eatToEndOfStatement();
-  }
-  else {
+  } else {
     int64_t ExprValue;
     if (parseAbsoluteExpression(ExprValue))
       return true;
@@ -3652,8 +3651,8 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
 
   if (TheCondState.TheCond != AsmCond::IfCond &&
       TheCondState.TheCond != AsmCond::ElseIfCond)
-      Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
-                          ".elseif");
+    Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
+                        ".elseif");
   TheCondState.TheCond = AsmCond::ElseCond;
   bool LastIgnoreState = false;
   if (!TheCondStack.empty())
@@ -4046,19 +4045,17 @@ static int RewritesSort(const void *A, const void *B) {
   if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer())
     return 1;
 
-  // It's possible to have a SizeDirective rewrite and an Input/Output rewrite
-  // to the same location.  Make sure the SizeDirective rewrite is performed
-  // first.  This also ensure the sort algorithm is stable.
-  if (AsmRewriteA->Kind == AOK_SizeDirective) {
-    assert ((AsmRewriteB->Kind == AOK_Input || AsmRewriteB->Kind == AOK_Output) &&
-            "Expected an Input/Output rewrite!");
+  // It's possible to have a SizeDirective, Imm/ImmPrefix and an Input/Output
+  // rewrite to the same location.  Make sure the SizeDirective rewrite is
+  // performed first, then the Imm/ImmPrefix and finally the Input/Output.  This
+  // ensures the sort algorithm is stable.
+  if (AsmRewritePrecedence [AsmRewriteA->Kind] >
+      AsmRewritePrecedence [AsmRewriteB->Kind])
     return -1;
-  }
-  if (AsmRewriteB->Kind == AOK_SizeDirective) {
-    assert ((AsmRewriteA->Kind == AOK_Input || AsmRewriteA->Kind == AOK_Output) &&
-            "Expected an Input/Output rewrite!");
+
+  if (AsmRewritePrecedence [AsmRewriteA->Kind] <
+      AsmRewritePrecedence [AsmRewriteB->Kind])
     return 1;
-  }
   llvm_unreachable ("Unstable rewrite sort.");
 }
 
@@ -4118,28 +4115,27 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
       }
 
       // Expr/Input or Output.
-      bool IsVarDecl;
-      unsigned Length, Size, Type;
-      void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc,
-                                                  Length, Size, Type,
-                                                  IsVarDecl);
+      StringRef SymName = Operand->getSymName();
+      if (SymName.empty())
+        continue;
+
+      void *OpDecl = Operand->getOpDecl();
       if (!OpDecl)
         continue;
 
       bool isOutput = (i == 1) && Desc.mayStore();
+      SMLoc Start = SMLoc::getFromPointer(SymName.data());
       if (isOutput) {
         ++InputIdx;
         OutputDecls.push_back(OpDecl);
         OutputDeclsAddressOf.push_back(Operand->needAddressOf());
         OutputConstraints.push_back('=' + Operand->getConstraint().str());
-        AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Operand->getStartLoc(),
-                                            Operand->getNameLen()));
+        AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size()));
       } else {
         InputDecls.push_back(OpDecl);
         InputDeclsAddressOf.push_back(Operand->needAddressOf());
         InputConstraints.push_back(Operand->getConstraint().str());
-        AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Operand->getStartLoc(),
-                                            Operand->getNameLen()));
+        AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Start, SymName.size()));
       }
     }
   }
@@ -4182,20 +4178,17 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
   for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(),
                                              E = AsmStrRewrites.end();
        I != E; ++I) {
+    AsmRewriteKind Kind = (*I).Kind;
+    if (Kind == AOK_Delete)
+      continue;
+
     const char *Loc = (*I).Loc.getPointer();
     assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
 
-    unsigned AdditionalSkip = 0;
-    AsmRewriteKind Kind = (*I).Kind;
-
     // Emit everything up to the immediate/expression.
     unsigned Len = Loc - AsmStart;
-    if (Len) {
-      // For Input/Output operands we need to remove the brackets, if present.
-      if ((Kind == AOK_Input || Kind == AOK_Output) && Loc[-1] == '[')
-        --Len;
+    if (Len)
       OS << StringRef(AsmStart, Len);
-    }
 
     // Skip the original expression.
     if (Kind == AOK_Skip) {
@@ -4203,6 +4196,7 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
       continue;
     }
 
+    unsigned AdditionalSkip = 0;
     // Rewrite expressions in $N notation.
     switch (Kind) {
     default: break;
@@ -4249,11 +4243,6 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
 
     // Skip the original expression.
     AsmStart = Loc + (*I).Len + AdditionalSkip;
-
-    // For Input/Output operands we need to remove the brackets, if present.
-    if ((Kind == AOK_Input || Kind == AOK_Output) && AsmStart != AsmEnd &&
-        *AsmStart == ']')
-      ++AsmStart;
   }
 
   // Emit the remainder of the asm string.
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 6d6409f..7eb8b74 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -566,10 +566,10 @@ bool DarwinAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
 /// ParseDirectivePrevious:
 ///   ::= .previous
 bool DarwinAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
-  const MCSection *PreviousSection = getStreamer().getPreviousSection();
-  if (PreviousSection == NULL)
+  MCSectionSubPair PreviousSection = getStreamer().getPreviousSection();
+  if (PreviousSection.first == NULL)
       return TokError(".previous without corresponding .section");
-  getStreamer().SwitchSection(PreviousSection);
+  getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second);
   return false;
 }
 
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 4c45e08..3134fc3 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -76,6 +76,7 @@ public:
       &ELFAsmParser::ParseDirectiveSymbolAttribute>(".internal");
     addDirectiveHandler<
       &ELFAsmParser::ParseDirectiveSymbolAttribute>(".hidden");
+    addDirectiveHandler<&ELFAsmParser::ParseDirectiveSubsection>(".subsection");
   }
 
   // FIXME: Part of this logic is duplicated in the MCELFStreamer. What is
@@ -147,9 +148,11 @@ public:
   bool ParseDirectiveVersion(StringRef, SMLoc);
   bool ParseDirectiveWeakref(StringRef, SMLoc);
   bool ParseDirectiveSymbolAttribute(StringRef, SMLoc);
+  bool ParseDirectiveSubsection(StringRef, SMLoc);
 
 private:
   bool ParseSectionName(StringRef &SectionName);
+  bool ParseSectionArguments(bool IsPush);
 };
 
 }
@@ -191,12 +194,15 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
 
 bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
                                       unsigned Flags, SectionKind Kind) {
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in section switching directive");
-  Lex();
+  const MCExpr *Subsection = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getParser().parseExpression(Subsection))
+      return true;
+  }
 
   getStreamer().SwitchSection(getContext().getELFSection(
-                                Section, Type, Flags, Kind));
+                                Section, Type, Flags, Kind),
+                              Subsection);
 
   return false;
 }
@@ -316,7 +322,7 @@ static int parseSectionFlags(StringRef flagsStr) {
 bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
   getStreamer().PushSection();
 
-  if (ParseDirectiveSection(s, loc)) {
+  if (ParseSectionArguments(/*IsPush=*/true)) {
     getStreamer().PopSection();
     return true;
   }
@@ -332,6 +338,10 @@ bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
 
 // FIXME: This is a work in progress.
 bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+  return ParseSectionArguments(/*IsPush=*/false);
+}
+
+bool ELFAsmParser::ParseSectionArguments(bool IsPush) {
   StringRef SectionName;
 
   if (ParseSectionName(SectionName))
@@ -341,6 +351,7 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
   int64_t Size = 0;
   StringRef GroupName;
   unsigned Flags = 0;
+  const MCExpr *Subsection = 0;
 
   // Set the defaults first.
   if (SectionName == ".fini" || SectionName == ".init" ||
@@ -352,6 +363,14 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
   if (getLexer().is(AsmToken::Comma)) {
     Lex();
 
+    if (IsPush && getLexer().isNot(AsmToken::String)) {
+      if (getParser().parseExpression(Subsection))
+        return true;
+      if (getLexer().isNot(AsmToken::Comma))
+        goto EndStmt;
+      Lex();
+    }
+   
     if (getLexer().isNot(AsmToken::String))
       return TokError("expected string in directive");
 
@@ -408,6 +427,7 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
     }
   }
 
+EndStmt:
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
@@ -444,15 +464,16 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
   SectionKind Kind = computeSectionKind(Flags);
   getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
                                                          Flags, Kind, Size,
-                                                         GroupName));
+                                                         GroupName),
+                              Subsection);
   return false;
 }
 
 bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
-  const MCSection *PreviousSection = getStreamer().getPreviousSection();
-  if (PreviousSection == NULL)
+  MCSectionSubPair PreviousSection = getStreamer().getPreviousSection();
+  if (PreviousSection.first == NULL)
       return TokError(".previous without corresponding .section");
-  getStreamer().SwitchSection(PreviousSection);
+  getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second);
 
   return false;
 }
@@ -613,6 +634,20 @@ bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
   return false;
 }
 
+bool ELFAsmParser::ParseDirectiveSubsection(StringRef, SMLoc) {
+  const MCExpr *Subsection = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getParser().parseExpression(Subsection))
+     return true;
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  getStreamer().SubSection(Subsection);
+  return false;
+}
+
 namespace llvm {
 
 MCAsmParserExtension *createELFAsmParser() {
diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp
index 0e04c55..8ae724f 100644
--- a/lib/MC/MCPureStreamer.cpp
+++ b/lib/MC/MCPureStreamer.cpp
@@ -12,9 +12,8 @@
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectStreamer.h"
-// FIXME: Remove this.
-#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 
@@ -113,25 +112,22 @@ void MCPureStreamer::InitSections() {
 }
 
 void MCPureStreamer::InitToTextSection() {
-  // FIMXE: To what!?
-  SwitchSection(getContext().getMachOSection("__TEXT", "__text",
-                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                    0, SectionKind::getText()));
+  SwitchSection(getContext().getObjectFileInfo()->getTextSection());
 }
 
 void MCPureStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
   assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(getCurrentSection() && "Cannot emit before setting section!");
+  assert(getCurrentSection().first && "Cannot emit before setting section!");
 
-  Symbol->setSection(*getCurrentSection());
+  Symbol->setSection(*getCurrentSection().first);
 
   MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
 
   // We have to create a new fragment if this is an atom defining symbol,
   // fragments cannot span atoms.
   if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()))
-    new MCDataFragment(getCurrentSectionData());
+    insert(new MCDataFragment());
 
   // FIXME: This is wasteful, we don't necessarily need to create a data
   // fragment. Instead, we should mark the symbol as pointing into the data
@@ -166,8 +162,7 @@ void MCPureStreamer::EmitValueToAlignment(unsigned ByteAlignment,
   // MCObjectStreamer.
   if (MaxBytesToEmit == 0)
     MaxBytesToEmit = ByteAlignment;
-  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
-                      getCurrentSectionData());
+  insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit));
 
   // Update the maximum alignment on the current section if necessary.
   if (ByteAlignment > getCurrentSectionData()->getAlignment())
@@ -180,8 +175,8 @@ void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
   // MCObjectStreamer.
   if (MaxBytesToEmit == 0)
     MaxBytesToEmit = ByteAlignment;
-  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
-                                           getCurrentSectionData());
+  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit);
+  insert(F);
   F->setEmitNops(true);
 
   // Update the maximum alignment on the current section if necessary.
@@ -191,13 +186,13 @@ void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
 
 bool MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
                                        unsigned char Value) {
-  new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+  insert(new MCOrgFragment(*Offset, Value));
   return false;
 }
 
 void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
-  MCRelaxableFragment *IF =
-    new MCRelaxableFragment(Inst, getCurrentSectionData());
+  MCRelaxableFragment *IF = new MCRelaxableFragment(Inst);
+  insert(IF);
 
   // Add the fixups and data.
   //
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index aac9377..6cedf06 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -29,7 +29,8 @@ bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
 }
 
 void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
-                                         raw_ostream &OS) const {
+                                         raw_ostream &OS,
+                                         const MCExpr *Subsection) const {
 
   // standard sections don't require the '.section'
   if (ShouldOmitSectionDirective(SectionName, MAI)) {
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index 0775cfa..bf1a984 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -10,6 +10,7 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/raw_ostream.h"
@@ -32,10 +33,14 @@ bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
 }
 
 void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
-                                        raw_ostream &OS) const {
+                                        raw_ostream &OS,
+                                        const MCExpr *Subsection) const {
 
   if (ShouldOmitSectionDirective(SectionName, MAI)) {
-    OS << '\t' << getSectionName() << '\n';
+    OS << '\t' << getSectionName();
+    if (Subsection)
+      OS << '\t' << *Subsection;
+    OS << '\n';
     return;
   }
 
@@ -129,6 +134,9 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
   if (Flags & ELF::SHF_GROUP)
     OS << "," << Group->getName() << ",comdat";
   OS << '\n';
+
+  if (Subsection)
+    OS << "\t.subsection\t" << *Subsection << '\n';
 }
 
 bool MCSectionELF::UseCodeAlign() const {
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index fc32315..8704513 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -91,7 +91,8 @@ MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
 }
 
 void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
-                                          raw_ostream &OS) const {
+                                          raw_ostream &OS,
+                                          const MCExpr *Subsection) const {
   OS << "\t.section\t" << getSegmentName() << ',' << getSectionName();
 
   // Get the section type and attributes.
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index d02e553..8f1895e 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -24,8 +24,7 @@ using namespace llvm;
 MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx)
     : Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
       CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) {
-  const MCSection *section = 0;
-  SectionStack.push_back(std::make_pair(section, section));
+  SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
 }
 
 MCStreamer::~MCStreamer() {
@@ -36,13 +35,13 @@ MCStreamer::~MCStreamer() {
 void MCStreamer::reset() {
   for (unsigned i = 0; i < getNumW64UnwindInfos(); ++i)
     delete W64UnwindInfos[i];
+  W64UnwindInfos.clear();
   EmitEHFrame = true;
   EmitDebugFrame = false;
   CurrentW64UnwindInfo = 0;
   LastSymbol = 0;
-  const MCSection *section = 0;
   SectionStack.clear();
-  SectionStack.push_back(std::make_pair(section, section));
+  SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
 }
 
 const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context,
@@ -188,15 +187,15 @@ void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
 
 void MCStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(getCurrentSection() && "Cannot emit before setting section!");
-  Symbol->setSection(*getCurrentSection());
+  assert(getCurrentSection().first && "Cannot emit before setting section!");
+  Symbol->setSection(*getCurrentSection().first);
   LastSymbol = Symbol;
 }
 
 void MCStreamer::EmitDebugLabel(MCSymbol *Symbol) {
   assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(getCurrentSection() && "Cannot emit before setting section!");
-  Symbol->setSection(*getCurrentSection());
+  assert(getCurrentSection().first && "Cannot emit before setting section!");
+  Symbol->setSection(*getCurrentSection().first);
   LastSymbol = Symbol;
 }
 
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 6dffed7..518b59e 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -147,8 +147,7 @@ public:
   object_t *createCOFFEntity(StringRef Name, list_t &List);
 
   void DefineSection(MCSectionData const &SectionData);
-  void DefineSymbol(MCSymbol const &Symbol,
-                    MCSymbolData const &SymbolData,
+  void DefineSymbol(MCSymbolData const &SymbolData,
                     MCAssembler &Assembler);
 
   void MakeSymbolReal(COFFSymbol &S, size_t Index);
@@ -410,25 +409,23 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
 
 /// This function takes a section data object from the assembler
 /// and creates the associated COFF symbol staging object.
-void WinCOFFObjectWriter::DefineSymbol(MCSymbol const &Symbol,
-                                       MCSymbolData const &SymbolData,
+void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
                                        MCAssembler &Assembler) {
+  MCSymbol const &Symbol = SymbolData.getSymbol();
   COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol);
-
-  coff_symbol->Data.Type         = (SymbolData.getFlags() & 0x0000FFFF) >>  0;
-  coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16;
+  SymbolMap[&Symbol] = coff_symbol;
 
   if (SymbolData.getFlags() & COFF::SF_WeakExternal) {
     coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
 
     if (Symbol.isVariable()) {
-      coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+      const MCSymbolRefExpr *SymRef =
+        dyn_cast<MCSymbolRefExpr>(Symbol.getVariableValue());
 
-      // FIXME: This assert message isn't very good.
-      assert(Symbol.getVariableValue()->getKind() == MCExpr::SymbolRef &&
-              "Value must be a SymbolRef!");
+      if (!SymRef)
+        report_fatal_error("Weak externals may only alias symbols");
 
-      coff_symbol->Other = GetOrCreateCOFFSymbol(&Symbol);
+      coff_symbol->Other = GetOrCreateCOFFSymbol(&SymRef->getSymbol());
     } else {
       std::string WeakName = std::string(".weak.")
                            +  Symbol.getName().str()
@@ -448,23 +445,29 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbol const &Symbol,
     coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
     coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
       COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
-  }
 
-  // If no storage class was specified in the streamer, define it here.
-  if (coff_symbol->Data.StorageClass == 0) {
-    bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL);
+    coff_symbol->MCData = &SymbolData;
+  } else {
+    const MCSymbolData &ResSymData =
+      Assembler.getSymbolData(Symbol.AliasedSymbol());
 
-    coff_symbol->Data.StorageClass =
-      external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
-  }
+    coff_symbol->Data.Type         = (ResSymData.getFlags() & 0x0000FFFF) >>  0;
+    coff_symbol->Data.StorageClass = (ResSymData.getFlags() & 0x00FF0000) >> 16;
 
-  if (SymbolData.Fragment != NULL)
-    coff_symbol->Section =
-      SectionMap[&SymbolData.Fragment->getParent()->getSection()];
+    // If no storage class was specified in the streamer, define it here.
+    if (coff_symbol->Data.StorageClass == 0) {
+      bool external = ResSymData.isExternal() || (ResSymData.Fragment == NULL);
 
-  // Bind internal COFF symbol to MC symbol.
-  coff_symbol->MCData = &SymbolData;
-  SymbolMap[&Symbol] = coff_symbol;
+      coff_symbol->Data.StorageClass =
+       external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
+    }
+
+    if (ResSymData.Fragment != NULL)
+      coff_symbol->Section =
+        SectionMap[&ResSymData.Fragment->getParent()->getSection()];
+
+    coff_symbol->MCData = &ResSymData;
+  }
 }
 
 /// making a section real involves assigned it a number and putting
@@ -620,9 +623,7 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
   for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(),
                                           e = Asm.symbol_end(); i != e; i++) {
     if (ExportSymbol(*i, Asm)) {
-      const MCSymbol &Alias = i->getSymbol();
-      const MCSymbol &Symbol = Alias.AliasedSymbol();
-      DefineSymbol(Alias, Asm.getSymbolData(Symbol), Asm);
+      DefineSymbol(*i, Asm);
     }
   }
 }
@@ -689,13 +690,8 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
   ++Reloc.Symb->Relocations;
 
   Reloc.Data.VirtualAddress += Fixup.getOffset();
-
-  unsigned FixupKind = Fixup.getKind();
-
-  if (CrossSection)
-    FixupKind = FK_PCRel_4;
-
-  Reloc.Data.Type = TargetObjectWriter->getRelocType(FixupKind);
+  Reloc.Data.Type = TargetObjectWriter->getRelocType(Target, Fixup,
+                                                     CrossSection);
 
   // FIXME: Can anyone explain what this does other than adjust for the size
   // of the offset?
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index c20fc0c..4ed129f 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -4,7 +4,6 @@ add_llvm_library(LLVMObject
   COFFObjectFile.cpp
   ELFObjectFile.cpp
   Error.cpp
-  MachOObject.cpp
   MachOObjectFile.cpp
   Object.cpp
   ObjectFile.cpp
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index ca90e0e..70fec32 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -429,7 +429,7 @@ relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
 }
 
 COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
-  : ObjectFile(Binary::ID_COFF, Object, ec)
+  : ObjectFile(Binary::ID_COFF, Object)
   , Header(0)
   , SectionTable(0)
   , SymbolTable(0)
@@ -705,8 +705,7 @@ error_code COFFObjectFile::getRelocationNext(DataRefImpl Rel,
 }
 error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel,
                                                 uint64_t &Res) const {
-  Res = toRel(Rel)->VirtualAddress;
-  return object_error::success;
+  report_fatal_error("getRelocationAddress not implemented in COFFObjectFile");
 }
 error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel,
                                                uint64_t &Res) const {
diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp
deleted file mode 100644
index c9c341a..0000000
--- a/lib/Object/MachOObject.cpp
+++ /dev/null
@@ -1,422 +0,0 @@
-//===- MachOObject.cpp - Mach-O Object File Wrapper -----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Object/MachOObject.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SwapByteOrder.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-using namespace llvm::object;
-
-/* Translation Utilities */
-
-template<typename T>
-static void SwapValue(T &Value) {
-  Value = sys::SwapByteOrder(Value);
-}
-
-template<typename T>
-static void SwapStruct(T &Value);
-
-template<typename T>
-static void ReadInMemoryStruct(const MachOObject &MOO,
-                               StringRef Buffer, uint64_t Base,
-                               InMemoryStruct<T> &Res) {
-  typedef T struct_type;
-  uint64_t Size = sizeof(struct_type);
-
-  // Check that the buffer contains the expected data.
-  if (Base + Size >  Buffer.size()) {
-    Res = 0;
-    return;
-  }
-
-  // Check whether we can return a direct pointer.
-  struct_type *Ptr = reinterpret_cast<struct_type *>(
-                       const_cast<char *>(Buffer.data() + Base));
-  if (!MOO.isSwappedEndian()) {
-    Res = Ptr;
-    return;
-  }
-
-  // Otherwise, copy the struct and translate the values.
-  Res = *Ptr;
-  SwapStruct(*Res);
-}
-
-/* *** */
-
-MachOObject::MachOObject(MemoryBuffer *Buffer_, bool IsLittleEndian_,
-                         bool Is64Bit_)
-  : Buffer(Buffer_), IsLittleEndian(IsLittleEndian_), Is64Bit(Is64Bit_),
-    IsSwappedEndian(IsLittleEndian != sys::isLittleEndianHost()),
-    HasStringTable(false), LoadCommands(0), NumLoadedCommands(0) {
-  // Load the common header.
-  memcpy(&Header, Buffer->getBuffer().data(), sizeof(Header));
-  if (IsSwappedEndian) {
-    SwapValue(Header.Magic);
-    SwapValue(Header.CPUType);
-    SwapValue(Header.CPUSubtype);
-    SwapValue(Header.FileType);
-    SwapValue(Header.NumLoadCommands);
-    SwapValue(Header.SizeOfLoadCommands);
-    SwapValue(Header.Flags);
-  }
-
-  if (is64Bit()) {
-    memcpy(&Header64Ext, Buffer->getBuffer().data() + sizeof(Header),
-           sizeof(Header64Ext));
-    if (IsSwappedEndian) {
-      SwapValue(Header64Ext.Reserved);
-    }
-  }
-
-  // Create the load command array if sane.
-  if (getHeader().NumLoadCommands < (1 << 20))
-    LoadCommands = new LoadCommandInfo[getHeader().NumLoadCommands];
-}
-
-MachOObject::~MachOObject() {
-  delete [] LoadCommands;
-}
-
-MachOObject *MachOObject::LoadFromBuffer(MemoryBuffer *Buffer,
-                                         std::string *ErrorStr) {
-  // First, check the magic value and initialize the basic object info.
-  bool IsLittleEndian = false, Is64Bit = false;
-  StringRef Magic = Buffer->getBuffer().slice(0, 4);
-  if (Magic == "\xFE\xED\xFA\xCE") {
-  }  else if (Magic == "\xCE\xFA\xED\xFE") {
-    IsLittleEndian = true;
-  } else if (Magic == "\xFE\xED\xFA\xCF") {
-    Is64Bit = true;
-  } else if (Magic == "\xCF\xFA\xED\xFE") {
-    IsLittleEndian = true;
-    Is64Bit = true;
-  } else {
-    if (ErrorStr) *ErrorStr = "not a Mach object file (invalid magic)";
-    return 0;
-  }
-
-  // Ensure that the at least the full header is present.
-  unsigned HeaderSize = Is64Bit ? macho::Header64Size : macho::Header32Size;
-  if (Buffer->getBufferSize() < HeaderSize) {
-    if (ErrorStr) *ErrorStr = "not a Mach object file (invalid header)";
-    return 0;
-  }
-
-  OwningPtr<MachOObject> Object(new MachOObject(Buffer, IsLittleEndian,
-                                                Is64Bit));
-
-  // Check for bogus number of load commands.
-  if (Object->getHeader().NumLoadCommands >= (1 << 20)) {
-    if (ErrorStr) *ErrorStr = "not a Mach object file (unreasonable header)";
-    return 0;
-  }
-
-  if (ErrorStr) *ErrorStr = "";
-  return Object.take();
-}
-
-StringRef MachOObject::getData(size_t Offset, size_t Size) const {
-  return Buffer->getBuffer().substr(Offset,Size);
-}
-
-void MachOObject::RegisterStringTable(macho::SymtabLoadCommand &SLC) {
-  HasStringTable = true;
-  StringTable = Buffer->getBuffer().substr(SLC.StringTableOffset,
-                                           SLC.StringTableSize);
-}
-
-const MachOObject::LoadCommandInfo &
-MachOObject::getLoadCommandInfo(unsigned Index) const {
-  assert(Index < getHeader().NumLoadCommands && "Invalid index!");
-
-  // Load the command, if necessary.
-  if (Index >= NumLoadedCommands) {
-    uint64_t Offset;
-    if (Index == 0) {
-      Offset = getHeaderSize();
-    } else {
-      const LoadCommandInfo &Prev = getLoadCommandInfo(Index - 1);
-      Offset = Prev.Offset + Prev.Command.Size;
-    }
-
-    LoadCommandInfo &Info = LoadCommands[Index];
-    memcpy(&Info.Command, Buffer->getBuffer().data() + Offset,
-           sizeof(macho::LoadCommand));
-    if (IsSwappedEndian) {
-      SwapValue(Info.Command.Type);
-      SwapValue(Info.Command.Size);
-    }
-    Info.Offset = Offset;
-    NumLoadedCommands = Index + 1;
-  }
-
-  return LoadCommands[Index];
-}
-
-template<>
-void SwapStruct(macho::SegmentLoadCommand &Value) {
-  SwapValue(Value.Type);
-  SwapValue(Value.Size);
-  SwapValue(Value.VMAddress);
-  SwapValue(Value.VMSize);
-  SwapValue(Value.FileOffset);
-  SwapValue(Value.FileSize);
-  SwapValue(Value.MaxVMProtection);
-  SwapValue(Value.InitialVMProtection);
-  SwapValue(Value.NumSections);
-  SwapValue(Value.Flags);
-}
-void MachOObject::ReadSegmentLoadCommand(const LoadCommandInfo &LCI,
-                         InMemoryStruct<macho::SegmentLoadCommand> &Res) const {
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::Segment64LoadCommand &Value) {
-  SwapValue(Value.Type);
-  SwapValue(Value.Size);
-  SwapValue(Value.VMAddress);
-  SwapValue(Value.VMSize);
-  SwapValue(Value.FileOffset);
-  SwapValue(Value.FileSize);
-  SwapValue(Value.MaxVMProtection);
-  SwapValue(Value.InitialVMProtection);
-  SwapValue(Value.NumSections);
-  SwapValue(Value.Flags);
-}
-void MachOObject::ReadSegment64LoadCommand(const LoadCommandInfo &LCI,
-                       InMemoryStruct<macho::Segment64LoadCommand> &Res) const {
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::SymtabLoadCommand &Value) {
-  SwapValue(Value.Type);
-  SwapValue(Value.Size);
-  SwapValue(Value.SymbolTableOffset);
-  SwapValue(Value.NumSymbolTableEntries);
-  SwapValue(Value.StringTableOffset);
-  SwapValue(Value.StringTableSize);
-}
-void MachOObject::ReadSymtabLoadCommand(const LoadCommandInfo &LCI,
-                          InMemoryStruct<macho::SymtabLoadCommand> &Res) const {
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::DysymtabLoadCommand &Value) {
-  SwapValue(Value.Type);
-  SwapValue(Value.Size);
-  SwapValue(Value.LocalSymbolsIndex);
-  SwapValue(Value.NumLocalSymbols);
-  SwapValue(Value.ExternalSymbolsIndex);
-  SwapValue(Value.NumExternalSymbols);
-  SwapValue(Value.UndefinedSymbolsIndex);
-  SwapValue(Value.NumUndefinedSymbols);
-  SwapValue(Value.TOCOffset);
-  SwapValue(Value.NumTOCEntries);
-  SwapValue(Value.ModuleTableOffset);
-  SwapValue(Value.NumModuleTableEntries);
-  SwapValue(Value.ReferenceSymbolTableOffset);
-  SwapValue(Value.NumReferencedSymbolTableEntries);
-  SwapValue(Value.IndirectSymbolTableOffset);
-  SwapValue(Value.NumIndirectSymbolTableEntries);
-  SwapValue(Value.ExternalRelocationTableOffset);
-  SwapValue(Value.NumExternalRelocationTableEntries);
-  SwapValue(Value.LocalRelocationTableOffset);
-  SwapValue(Value.NumLocalRelocationTableEntries);
-}
-void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI,
-                        InMemoryStruct<macho::DysymtabLoadCommand> &Res) const {
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::LinkeditDataLoadCommand &Value) {
-  SwapValue(Value.Type);
-  SwapValue(Value.Size);
-  SwapValue(Value.DataOffset);
-  SwapValue(Value.DataSize);
-}
-void MachOObject::ReadLinkeditDataLoadCommand(const LoadCommandInfo &LCI,
-                    InMemoryStruct<macho::LinkeditDataLoadCommand> &Res) const {
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::LinkerOptionsLoadCommand &Value) {
-  SwapValue(Value.Type);
-  SwapValue(Value.Size);
-  SwapValue(Value.Count);
-}
-void MachOObject::ReadLinkerOptionsLoadCommand(const LoadCommandInfo &LCI,
-                   InMemoryStruct<macho::LinkerOptionsLoadCommand> &Res) const {
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::IndirectSymbolTableEntry &Value) {
-  SwapValue(Value.Index);
-}
-void
-MachOObject::ReadIndirectSymbolTableEntry(const macho::DysymtabLoadCommand &DLC,
-                                          unsigned Index,
-                   InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const {
-  uint64_t Offset = (DLC.IndirectSymbolTableOffset +
-                     Index * sizeof(macho::IndirectSymbolTableEntry));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-
-template<>
-void SwapStruct(macho::Section &Value) {
-  SwapValue(Value.Address);
-  SwapValue(Value.Size);
-  SwapValue(Value.Offset);
-  SwapValue(Value.Align);
-  SwapValue(Value.RelocationTableOffset);
-  SwapValue(Value.NumRelocationTableEntries);
-  SwapValue(Value.Flags);
-  SwapValue(Value.Reserved1);
-  SwapValue(Value.Reserved2);
-}
-void MachOObject::ReadSection(const LoadCommandInfo &LCI,
-                              unsigned Index,
-                              InMemoryStruct<macho::Section> &Res) const {
-  assert(LCI.Command.Type == macho::LCT_Segment &&
-         "Unexpected load command info!");
-  uint64_t Offset = (LCI.Offset + sizeof(macho::SegmentLoadCommand) +
-                     Index * sizeof(macho::Section));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::Section64 &Value) {
-  SwapValue(Value.Address);
-  SwapValue(Value.Size);
-  SwapValue(Value.Offset);
-  SwapValue(Value.Align);
-  SwapValue(Value.RelocationTableOffset);
-  SwapValue(Value.NumRelocationTableEntries);
-  SwapValue(Value.Flags);
-  SwapValue(Value.Reserved1);
-  SwapValue(Value.Reserved2);
-  SwapValue(Value.Reserved3);
-}
-void MachOObject::ReadSection64(const LoadCommandInfo &LCI,
-                                unsigned Index,
-                                InMemoryStruct<macho::Section64> &Res) const {
-  assert(LCI.Command.Type == macho::LCT_Segment64 &&
-         "Unexpected load command info!");
-  uint64_t Offset = (LCI.Offset + sizeof(macho::Segment64LoadCommand) +
-                     Index * sizeof(macho::Section64));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::RelocationEntry &Value) {
-  SwapValue(Value.Word0);
-  SwapValue(Value.Word1);
-}
-void MachOObject::ReadRelocationEntry(uint64_t RelocationTableOffset,
-                                      unsigned Index,
-                            InMemoryStruct<macho::RelocationEntry> &Res) const {
-  uint64_t Offset = (RelocationTableOffset +
-                     Index * sizeof(macho::RelocationEntry));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::SymbolTableEntry &Value) {
-  SwapValue(Value.StringIndex);
-  SwapValue(Value.Flags);
-  SwapValue(Value.Value);
-}
-void MachOObject::ReadSymbolTableEntry(uint64_t SymbolTableOffset,
-                                       unsigned Index,
-                           InMemoryStruct<macho::SymbolTableEntry> &Res) const {
-  uint64_t Offset = (SymbolTableOffset +
-                     Index * sizeof(macho::SymbolTableEntry));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::Symbol64TableEntry &Value) {
-  SwapValue(Value.StringIndex);
-  SwapValue(Value.Flags);
-  SwapValue(Value.Value);
-}
-void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
-                                       unsigned Index,
-                         InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
-  uint64_t Offset = (SymbolTableOffset +
-                     Index * sizeof(macho::Symbol64TableEntry));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-template<>
-void SwapStruct(macho::DataInCodeTableEntry &Value) {
-  SwapValue(Value.Offset);
-  SwapValue(Value.Length);
-  SwapValue(Value.Kind);
-}
-void MachOObject::ReadDataInCodeTableEntry(uint64_t TableOffset,
-                                           unsigned Index,
-                       InMemoryStruct<macho::DataInCodeTableEntry> &Res) const {
-  uint64_t Offset = (TableOffset +
-                     Index * sizeof(macho::DataInCodeTableEntry));
-  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
-}
-
-void MachOObject::ReadULEB128s(uint64_t Index,
-                               SmallVectorImpl<uint64_t> &Out) const {
-  DataExtractor extractor(Buffer->getBuffer(), true, 0);
-
-  uint32_t offset = Index;
-  uint64_t data = 0;
-  while (uint64_t delta = extractor.getULEB128(&offset)) {
-    data += delta;
-    Out.push_back(data);
-  }
-}
-
-/* ** */
-// Object Dumping Facilities
-void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; }
-void MachOObject::dumpHeader() const { printHeader(dbgs()); dbgs() << '\n'; }
-
-void MachOObject::printHeader(raw_ostream &O) const {
-  O << "('cputype', " << Header.CPUType << ")\n";
-  O << "('cpusubtype', " << Header.CPUSubtype << ")\n";
-  O << "('filetype', " << Header.FileType << ")\n";
-  O << "('num_load_commands', " << Header.NumLoadCommands << ")\n";
-  O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n";
-  O << "('flag', " << Header.Flags << ")\n";
-
-  // Print extended header if 64-bit.
-  if (is64Bit())
-    O << "('reserved', " << Header64Ext.Reserved << ")\n";
-}
-
-void MachOObject::print(raw_ostream &O) const {
-  O << "Header:\n";
-  printHeader(O);
-  O << "Load Commands:\n";
-
-  O << "Buffer:\n";
-}
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 6501df9..dfd8d3d 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -15,7 +15,9 @@
 #include "llvm/Object/MachO.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <cctype>
 #include <cstring>
@@ -27,236 +29,560 @@ using namespace object;
 namespace llvm {
 namespace object {
 
-MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO,
-                                 error_code &ec)
-    : ObjectFile(Binary::ID_MachO, Object, ec),
-      MachOObj(MOO),
-      RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {
-  DataRefImpl DRI;
-  moveToNextSection(DRI);
-  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
-  while (DRI.d.a < LoadCommandCount) {
-    Sections.push_back(DRI);
-    DRI.d.b++;
-    moveToNextSection(DRI);
+struct SymbolTableEntryBase {
+  uint32_t StringIndex;
+  uint8_t Type;
+  uint8_t SectionIndex;
+  uint16_t Flags;
+};
+
+struct SectionBase {
+  char Name[16];
+  char SegmentName[16];
+};
+
+template<typename T>
+static void SwapValue(T &Value) {
+  Value = sys::SwapByteOrder(Value);
+}
+
+template<typename T>
+static void SwapStruct(T &Value);
+
+template<>
+void SwapStruct(macho::RelocationEntry &H) {
+  SwapValue(H.Word0);
+  SwapValue(H.Word1);
+}
+
+template<>
+void SwapStruct(macho::LoadCommand &L) {
+  SwapValue(L.Type);
+  SwapValue(L.Size);
+}
+
+template<>
+void SwapStruct(SymbolTableEntryBase &S) {
+  SwapValue(S.StringIndex);
+  SwapValue(S.Flags);
+}
+
+template<>
+void SwapStruct(macho::Section &S) {
+  SwapValue(S.Address);
+  SwapValue(S.Size);
+  SwapValue(S.Offset);
+  SwapValue(S.Align);
+  SwapValue(S.RelocationTableOffset);
+  SwapValue(S.NumRelocationTableEntries);
+  SwapValue(S.Flags);
+  SwapValue(S.Reserved1);
+  SwapValue(S.Reserved2);
+}
+
+template<>
+void SwapStruct(macho::Section64 &S) {
+  SwapValue(S.Address);
+  SwapValue(S.Size);
+  SwapValue(S.Offset);
+  SwapValue(S.Align);
+  SwapValue(S.RelocationTableOffset);
+  SwapValue(S.NumRelocationTableEntries);
+  SwapValue(S.Flags);
+  SwapValue(S.Reserved1);
+  SwapValue(S.Reserved2);
+  SwapValue(S.Reserved3);
+}
+
+template<>
+void SwapStruct(macho::SymbolTableEntry &S) {
+  SwapValue(S.StringIndex);
+  SwapValue(S.Flags);
+  SwapValue(S.Value);
+}
+
+template<>
+void SwapStruct(macho::Symbol64TableEntry &S) {
+  SwapValue(S.StringIndex);
+  SwapValue(S.Flags);
+  SwapValue(S.Value);
+}
+
+template<>
+void SwapStruct(macho::Header &H) {
+  SwapValue(H.Magic);
+  SwapValue(H.CPUType);
+  SwapValue(H.CPUSubtype);
+  SwapValue(H.FileType);
+  SwapValue(H.NumLoadCommands);
+  SwapValue(H.SizeOfLoadCommands);
+  SwapValue(H.Flags);
+}
+
+template<>
+void SwapStruct(macho::Header64Ext &E) {
+  SwapValue(E.Reserved);
+}
+
+template<>
+void SwapStruct(macho::SymtabLoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.SymbolTableOffset);
+  SwapValue(C.NumSymbolTableEntries);
+  SwapValue(C.StringTableOffset);
+  SwapValue(C.StringTableSize);
+}
+
+template<>
+void SwapStruct(macho::DysymtabLoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.LocalSymbolsIndex);
+  SwapValue(C.NumLocalSymbols);
+  SwapValue(C.ExternalSymbolsIndex);
+  SwapValue(C.NumExternalSymbols);
+  SwapValue(C.UndefinedSymbolsIndex);
+  SwapValue(C.NumUndefinedSymbols);
+  SwapValue(C.TOCOffset);
+  SwapValue(C.NumTOCEntries);
+  SwapValue(C.ModuleTableOffset);
+  SwapValue(C.NumModuleTableEntries);
+  SwapValue(C.ReferenceSymbolTableOffset);
+  SwapValue(C.NumReferencedSymbolTableEntries);
+  SwapValue(C.IndirectSymbolTableOffset);
+  SwapValue(C.NumIndirectSymbolTableEntries);
+  SwapValue(C.ExternalRelocationTableOffset);
+  SwapValue(C.NumExternalRelocationTableEntries);
+  SwapValue(C.LocalRelocationTableOffset);
+  SwapValue(C.NumLocalRelocationTableEntries);
+}
+
+template<>
+void SwapStruct(macho::LinkeditDataLoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.DataOffset);
+  SwapValue(C.DataSize);
+}
+
+template<>
+void SwapStruct(macho::SegmentLoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.VMAddress);
+  SwapValue(C.VMSize);
+  SwapValue(C.FileOffset);
+  SwapValue(C.FileSize);
+  SwapValue(C.MaxVMProtection);
+  SwapValue(C.InitialVMProtection);
+  SwapValue(C.NumSections);
+  SwapValue(C.Flags);
+}
+
+template<>
+void SwapStruct(macho::Segment64LoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.VMAddress);
+  SwapValue(C.VMSize);
+  SwapValue(C.FileOffset);
+  SwapValue(C.FileSize);
+  SwapValue(C.MaxVMProtection);
+  SwapValue(C.InitialVMProtection);
+  SwapValue(C.NumSections);
+  SwapValue(C.Flags);
+}
+
+template<>
+void SwapStruct(macho::IndirectSymbolTableEntry &C) {
+  SwapValue(C.Index);
+}
+
+template<>
+void SwapStruct(macho::LinkerOptionsLoadCommand &C) {
+  SwapValue(C.Type);
+  SwapValue(C.Size);
+  SwapValue(C.Count);
+}
+
+template<>
+void SwapStruct(macho::DataInCodeTableEntry &C) {
+  SwapValue(C.Offset);
+  SwapValue(C.Length);
+  SwapValue(C.Kind);
+}
+
+template<typename T>
+T getStruct(const MachOObjectFile *O, const char *P) {
+  T Cmd;
+  memcpy(&Cmd, P, sizeof(T));
+  if (O->isLittleEndian() != sys::IsLittleEndianHost)
+    SwapStruct(Cmd);
+  return Cmd;
+}
+
+static uint32_t
+getSegmentLoadCommandNumSections(const MachOObjectFile *O,
+                                 const MachOObjectFile::LoadCommandInfo &L) {
+  if (O->is64Bit()) {
+    macho::Segment64LoadCommand S = O->getSegment64LoadCommand(L);
+    return S.NumSections;
   }
+  macho::SegmentLoadCommand S = O->getSegmentLoadCommand(L);
+  return S.NumSections;
 }
 
+static const char *
+getSectionPtr(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L,
+              unsigned Sec) {
+  uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(L.Ptr);
 
-ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+  bool Is64 = O->is64Bit();
+  unsigned SegmentLoadSize = Is64 ? sizeof(macho::Segment64LoadCommand) :
+                                    sizeof(macho::SegmentLoadCommand);
+  unsigned SectionSize = Is64 ? sizeof(macho::Section64) :
+                                sizeof(macho::Section);
+
+  uintptr_t SectionAddr = CommandAddr + SegmentLoadSize + Sec * SectionSize;
+  return reinterpret_cast<const char*>(SectionAddr);
+}
+
+static const char *getPtr(const MachOObjectFile *O, size_t Offset) {
+  return O->getData().substr(Offset, 1).data();
+}
+
+static SymbolTableEntryBase
+getSymbolTableEntryBase(const MachOObjectFile *O, DataRefImpl DRI) {
+  const char *P = reinterpret_cast<const char *>(DRI.p);
+  return getStruct<SymbolTableEntryBase>(O, P);
+}
+
+static StringRef parseSegmentOrSectionName(const char *P) {
+  if (P[15] == 0)
+    // Null terminated.
+    return P;
+  // Not null terminated, so this is a 16 char string.
+  return StringRef(P, 16);
+}
+
+// Helper to advance a section or symbol iterator multiple increments at a time.
+template<class T>
+static error_code advance(T &it, size_t Val) {
   error_code ec;
-  std::string Err;
-  MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err);
-  if (!MachOObj)
-    return NULL;
-  // MachOObject takes ownership of the Buffer we passed to it, and
-  // MachOObjectFile does, too, so we need to make sure they don't get the
-  // same object. A MemoryBuffer is cheap (it's just a reference to memory,
-  // not a copy of the memory itself), so just make a new copy here for
-  // the MachOObjectFile.
-  MemoryBuffer *NewBuffer =
-    MemoryBuffer::getMemBuffer(Buffer->getBuffer(),
-                               Buffer->getBufferIdentifier(), false);
-  return new MachOObjectFile(NewBuffer, MachOObj, ec);
-}
-
-/*===-- Symbols -----------------------------------------------------------===*/
-
-void MachOObjectFile::moveToNextSymbol(DataRefImpl &DRI) const {
-  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
-  while (DRI.d.a < LoadCommandCount) {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    if (LCI.Command.Type == macho::LCT_Symtab) {
-      InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
-      MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
-      if (DRI.d.b < SymtabLoadCmd->NumSymbolTableEntries)
-        return;
+  while (Val--) {
+    it.increment(ec);
+  }
+  return ec;
+}
+
+template<class T>
+static void advanceTo(T &it, size_t Val) {
+  if (error_code ec = advance(it, Val))
+    report_fatal_error(ec.message());
+}
+
+static unsigned getCPUType(const MachOObjectFile *O) {
+  return O->getHeader().CPUType;
+}
+
+static void printRelocationTargetName(const MachOObjectFile *O,
+                                      const macho::RelocationEntry &RE,
+                                      raw_string_ostream &fmt) {
+  bool IsScattered = O->isRelocationScattered(RE);
+
+  // Target of a scattered relocation is an address.  In the interest of
+  // generating pretty output, scan through the symbol table looking for a
+  // symbol that aligns with that address.  If we find one, print it.
+  // Otherwise, we just print the hex address of the target.
+  if (IsScattered) {
+    uint32_t Val = O->getPlainRelocationSymbolNum(RE);
+
+    error_code ec;
+    for (symbol_iterator SI = O->begin_symbols(), SE = O->end_symbols();
+         SI != SE; SI.increment(ec)) {
+      if (ec) report_fatal_error(ec.message());
+
+      uint64_t Addr;
+      StringRef Name;
+
+      if ((ec = SI->getAddress(Addr)))
+        report_fatal_error(ec.message());
+      if (Addr != Val) continue;
+      if ((ec = SI->getName(Name)))
+        report_fatal_error(ec.message());
+      fmt << Name;
+      return;
+    }
+
+    // If we couldn't find a symbol that this relocation refers to, try
+    // to find a section beginning instead.
+    for (section_iterator SI = O->begin_sections(), SE = O->end_sections();
+         SI != SE; SI.increment(ec)) {
+      if (ec) report_fatal_error(ec.message());
+
+      uint64_t Addr;
+      StringRef Name;
+
+      if ((ec = SI->getAddress(Addr)))
+        report_fatal_error(ec.message());
+      if (Addr != Val) continue;
+      if ((ec = SI->getName(Name)))
+        report_fatal_error(ec.message());
+      fmt << Name;
+      return;
     }
 
-    DRI.d.a++;
-    DRI.d.b = 0;
+    fmt << format("0x%x", Val);
+    return;
   }
-}
 
-void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI,
-    InMemoryStruct<macho::SymbolTableEntry> &Res) const {
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+  StringRef S;
+  bool isExtern = O->getPlainRelocationExternal(RE);
+  uint64_t Val = O->getAnyRelocationAddress(RE);
 
-  if (RegisteredStringTable != DRI.d.a) {
-    MachOObj->RegisterStringTable(*SymtabLoadCmd);
-    RegisteredStringTable = DRI.d.a;
+  if (isExtern) {
+    symbol_iterator SI = O->begin_symbols();
+    advanceTo(SI, Val);
+    SI->getName(S);
+  } else {
+    section_iterator SI = O->begin_sections();
+    advanceTo(SI, Val);
+    SI->getName(S);
   }
 
-  MachOObj->ReadSymbolTableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
-                                 Res);
+  fmt << S;
+}
+
+static uint32_t getPlainRelocationAddress(const macho::RelocationEntry &RE) {
+  return RE.Word0;
+}
+
+static unsigned
+getScatteredRelocationAddress(const macho::RelocationEntry &RE) {
+  return RE.Word0 & 0xffffff;
+}
+
+static bool getPlainRelocationPCRel(const MachOObjectFile *O,
+                                    const macho::RelocationEntry &RE) {
+  if (O->isLittleEndian())
+    return (RE.Word1 >> 24) & 1;
+  return (RE.Word1 >> 7) & 1;
+}
+
+static bool
+getScatteredRelocationPCRel(const MachOObjectFile *O,
+                            const macho::RelocationEntry &RE) {
+  return (RE.Word0 >> 30) & 1;
+}
+
+static unsigned getPlainRelocationLength(const MachOObjectFile *O,
+                                         const macho::RelocationEntry &RE) {
+  if (O->isLittleEndian())
+    return (RE.Word1 >> 25) & 3;
+  return (RE.Word1 >> 5) & 3;
+}
+
+static unsigned
+getScatteredRelocationLength(const macho::RelocationEntry &RE) {
+  return (RE.Word0 >> 28) & 3;
 }
 
-void MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI,
-    InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+static unsigned getPlainRelocationType(const MachOObjectFile *O,
+                                       const macho::RelocationEntry &RE) {
+  if (O->isLittleEndian())
+    return RE.Word1 >> 28;
+  return RE.Word1 & 0xf;
+}
 
-  if (RegisteredStringTable != DRI.d.a) {
-    MachOObj->RegisterStringTable(*SymtabLoadCmd);
-    RegisteredStringTable = DRI.d.a;
+static unsigned getScatteredRelocationType(const macho::RelocationEntry &RE) {
+  return (RE.Word0 >> 24) & 0xf;
+}
+
+static uint32_t getSectionFlags(const MachOObjectFile *O,
+                                DataRefImpl Sec) {
+  if (O->is64Bit()) {
+    macho::Section64 Sect = O->getSection64(Sec);
+    return Sect.Flags;
   }
+  macho::Section Sect = O->getSection(Sec);
+  return Sect.Flags;
+}
+
+MachOObjectFile::MachOObjectFile(MemoryBuffer *Object,
+                                 bool IsLittleEndian, bool Is64bits,
+                                 error_code &ec)
+    : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
+      SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL) {
+  uint32_t LoadCommandCount = this->getHeader().NumLoadCommands;
+  macho::LoadCommandType SegmentLoadType = is64Bit() ?
+    macho::LCT_Segment64 : macho::LCT_Segment;
+
+  MachOObjectFile::LoadCommandInfo Load = getFirstLoadCommandInfo();
+  for (unsigned I = 0; ; ++I) {
+    if (Load.C.Type == macho::LCT_Symtab) {
+      assert(!SymtabLoadCmd && "Multiple symbol tables");
+      SymtabLoadCmd = Load.Ptr;
+    } else if (Load.C.Type == macho::LCT_Dysymtab) {
+      assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables");
+      DysymtabLoadCmd = Load.Ptr;
+    } else if (Load.C.Type == SegmentLoadType) {
+      uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load);
+      for (unsigned J = 0; J < NumSections; ++J) {
+        const char *Sec = getSectionPtr(this, Load, J);
+        Sections.push_back(Sec);
+      }
+    }
 
-  MachOObj->ReadSymbol64TableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
-                                   Res);
+    if (I == LoadCommandCount - 1)
+      break;
+    else
+      Load = getNextLoadCommandInfo(Load);
+  }
 }
 
+error_code MachOObjectFile::getSymbolNext(DataRefImpl Symb,
+                                          SymbolRef &Res) const {
+  unsigned SymbolTableEntrySize = is64Bit() ?
+    sizeof(macho::Symbol64TableEntry) :
+    sizeof(macho::SymbolTableEntry);
+  Symb.p += SymbolTableEntrySize;
+  Res = SymbolRef(Symb, this);
+  return object_error::success;
+}
 
-error_code MachOObjectFile::getSymbolNext(DataRefImpl DRI,
-                                          SymbolRef &Result) const {
-  DRI.d.b++;
-  moveToNextSymbol(DRI);
-  Result = SymbolRef(DRI, this);
+error_code MachOObjectFile::getSymbolName(DataRefImpl Symb,
+                                          StringRef &Res) const {
+  StringRef StringTable = getStringTableData();
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  const char *Start = &StringTable.data()[Entry.StringIndex];
+  Res = StringRef(Start);
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
-                                          StringRef &Result) const {
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb,
+                                             uint64_t &Res) const {
+  if (is64Bit()) {
+    macho::Symbol64TableEntry Entry = getSymbol64TableEntry(Symb);
+    Res = Entry.Value;
   } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+    macho::SymbolTableEntry Entry = getSymbolTableEntry(Symb);
+    Res = Entry.Value;
   }
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolFileOffset(DataRefImpl DRI,
-                                                uint64_t &Result) const {
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    Result = Entry->Value;
-    if (Entry->SectionIndex) {
-      InMemoryStruct<macho::Section64> Section;
-      getSection64(Sections[Entry->SectionIndex-1], Section);
-      Result += Section->Offset - Section->Address;
-    }
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    Result = Entry->Value;
-    if (Entry->SectionIndex) {
-      InMemoryStruct<macho::Section> Section;
-      getSection(Sections[Entry->SectionIndex-1], Section);
-      Result += Section->Offset - Section->Address;
+error_code
+MachOObjectFile::getSymbolFileOffset(DataRefImpl Symb,
+                                     uint64_t &Res) const {
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  getSymbolAddress(Symb, Res);
+  if (Entry.SectionIndex) {
+    uint64_t Delta;
+    DataRefImpl SecRel;
+    SecRel.d.a = Entry.SectionIndex-1;
+    if (is64Bit()) {
+      macho::Section64 Sec = getSection64(SecRel);
+      Delta = Sec.Offset - Sec.Address;
+    } else {
+      macho::Section Sec = getSection(SecRel);
+      Delta = Sec.Offset - Sec.Address;
     }
+
+    Res += Delta;
   }
 
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
-                                             uint64_t &Result) const {
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    Result = Entry->Value;
+error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI,
+                                               uint32_t &Result) const {
+  uint32_t flags;
+  this->getSymbolFlags(DRI, flags);
+  if (flags & SymbolRef::SF_Common) {
+    SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
+    Result = 1 << MachO::GET_COMM_ALIGN(Entry.Flags);
   } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    Result = Entry->Value;
+    Result = 0;
   }
   return object_error::success;
 }
 
 error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
                                           uint64_t &Result) const {
-  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
   uint64_t BeginOffset;
   uint64_t EndOffset = 0;
   uint8_t SectionIndex;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    BeginOffset = Entry->Value;
-    SectionIndex = Entry->SectionIndex;
-    if (!SectionIndex) {
-      uint32_t flags = SymbolRef::SF_None;
-      getSymbolFlags(DRI, flags);
-      if (flags & SymbolRef::SF_Common)
-        Result = Entry->Value;
-      else
-        Result = UnknownAddressOrSize;
-      return object_error::success;
-    }
-    // Unfortunately symbols are unsorted so we need to touch all
-    // symbols from load command
-    DRI.d.b = 0;
-    uint32_t Command = DRI.d.a;
-    while (Command == DRI.d.a) {
-      moveToNextSymbol(DRI);
-      if (DRI.d.a < LoadCommandCount) {
-        getSymbol64TableEntry(DRI, Entry);
-        if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
-          if (!EndOffset || Entry->Value < EndOffset)
-            EndOffset = Entry->Value;
-      }
-      DRI.d.b++;
-    }
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    BeginOffset = Entry->Value;
-    SectionIndex = Entry->SectionIndex;
-    if (!SectionIndex) {
-      uint32_t flags = SymbolRef::SF_None;
-      getSymbolFlags(DRI, flags);
-      if (flags & SymbolRef::SF_Common)
-        Result = Entry->Value;
-      else
-        Result = UnknownAddressOrSize;
-      return object_error::success;
-    }
-    // Unfortunately symbols are unsorted so we need to touch all
-    // symbols from load command
-    DRI.d.b = 0;
-    uint32_t Command = DRI.d.a;
-    while (Command == DRI.d.a) {
-      moveToNextSymbol(DRI);
-      if (DRI.d.a < LoadCommandCount) {
-        getSymbolTableEntry(DRI, Entry);
-        if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
-          if (!EndOffset || Entry->Value < EndOffset)
-            EndOffset = Entry->Value;
-      }
-      DRI.d.b++;
-    }
+
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
+  uint64_t Value;
+  getSymbolAddress(DRI, Value);
+
+  BeginOffset = Value;
+
+  SectionIndex = Entry.SectionIndex;
+  if (!SectionIndex) {
+    uint32_t flags = SymbolRef::SF_None;
+    this->getSymbolFlags(DRI, flags);
+    if (flags & SymbolRef::SF_Common)
+      Result = Value;
+    else
+      Result = UnknownAddressOrSize;
+    return object_error::success;
+  }
+  // Unfortunately symbols are unsorted so we need to touch all
+  // symbols from load command
+  error_code ec;
+  for (symbol_iterator I = begin_symbols(), E = end_symbols(); I != E;
+       I.increment(ec)) {
+    DataRefImpl DRI = I->getRawDataRefImpl();
+    Entry = getSymbolTableEntryBase(this, DRI);
+    getSymbolAddress(DRI, Value);
+    if (Entry.SectionIndex == SectionIndex && Value > BeginOffset)
+      if (!EndOffset || Value < EndOffset)
+        EndOffset = Value;
   }
   if (!EndOffset) {
     uint64_t Size;
-    getSectionSize(Sections[SectionIndex-1], Size);
-    getSectionAddress(Sections[SectionIndex-1], EndOffset);
+    DataRefImpl Sec;
+    Sec.d.a = SectionIndex-1;
+    getSectionSize(Sec, Size);
+    getSectionAddress(Sec, EndOffset);
     EndOffset += Size;
   }
   Result = EndOffset - BeginOffset;
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
-                                                char &Result) const {
-  uint8_t Type, Flags;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    Type = Entry->Type;
-    Flags = Entry->Flags;
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    Type = Entry->Type;
-    Flags = Entry->Flags;
+error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
+                                          SymbolRef::Type &Res) const {
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  uint8_t n_type = Entry.Type;
+
+  Res = SymbolRef::ST_Other;
+
+  // If this is a STAB debugging symbol, we can do nothing more.
+  if (n_type & MachO::NlistMaskStab) {
+    Res = SymbolRef::ST_Debug;
+    return object_error::success;
+  }
+
+  switch (n_type & MachO::NlistMaskType) {
+    case MachO::NListTypeUndefined :
+      Res = SymbolRef::ST_Unknown;
+      break;
+    case MachO::NListTypeSection :
+      Res = SymbolRef::ST_Function;
+      break;
   }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
+                                                char &Res) const {
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  uint8_t Type = Entry.Type;
+  uint16_t Flags = Entry.Flags;
 
   char Char;
   switch (Type & macho::STF_TypeMask) {
@@ -274,25 +600,16 @@ error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
 
   if (Flags & (macho::STF_External | macho::STF_PrivateExtern))
     Char = toupper(static_cast<unsigned char>(Char));
-  Result = Char;
+  Res = Char;
   return object_error::success;
 }
 
 error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
                                            uint32_t &Result) const {
-  uint16_t MachOFlags;
-  uint8_t MachOType;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(DRI, Entry);
-    MachOFlags = Entry->Flags;
-    MachOType = Entry->Type;
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(DRI, Entry);
-    MachOFlags = Entry->Flags;
-    MachOType = Entry->Type;
-  }
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI);
+
+  uint8_t MachOType = Entry.Type;
+  uint16_t MachOFlags = Entry.Flags;
 
   // TODO: Correctly set SF_ThreadLocal
   Result = SymbolRef::SF_None;
@@ -305,8 +622,12 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
 
   if (MachOType & MachO::NlistMaskExternal) {
     Result |= SymbolRef::SF_Global;
-    if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined)
-      Result |= SymbolRef::SF_Common;
+    if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) {
+      uint64_t Value;
+      getSymbolAddress(DRI, Value);
+      if (Value)
+        Result |= SymbolRef::SF_Common;
+    }
   }
 
   if (MachOFlags & (MachO::NListDescWeakRef | MachO::NListDescWeakDef))
@@ -318,55 +639,20 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
-                                             section_iterator &Res) const {
-  uint8_t index;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(Symb, Entry);
-    index = Entry->SectionIndex;
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(Symb, Entry);
-    index = Entry->SectionIndex;
-  }
+error_code
+MachOObjectFile::getSymbolSection(DataRefImpl Symb,
+                                  section_iterator &Res) const {
+  SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb);
+  uint8_t index = Entry.SectionIndex;
 
-  if (index == 0)
+  if (index == 0) {
     Res = end_sections();
-  else
-    Res = section_iterator(SectionRef(Sections[index-1], this));
-
-  return object_error::success;
-}
-
-error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
-                                          SymbolRef::Type &Res) const {
-  uint8_t n_type;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(Symb, Entry);
-    n_type = Entry->Type;
   } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(Symb, Entry);
-    n_type = Entry->Type;
-  }
-  Res = SymbolRef::ST_Other;
-
-  // If this is a STAB debugging symbol, we can do nothing more.
-  if (n_type & MachO::NlistMaskStab) {
-    Res = SymbolRef::ST_Debug;
-    return object_error::success;
+    DataRefImpl DRI;
+    DRI.d.a = index - 1;
+    Res = section_iterator(SectionRef(DRI, this));
   }
 
-  switch (n_type & MachO::NlistMaskType) {
-    case MachO::NListTypeUndefined :
-      Res = SymbolRef::ST_Unknown;
-      break;
-    case MachO::NListTypeSection :
-      Res = SymbolRef::ST_Function;
-      break;
-  }
   return object_error::success;
 }
 
@@ -375,242 +661,101 @@ error_code MachOObjectFile::getSymbolValue(DataRefImpl Symb,
   report_fatal_error("getSymbolValue unimplemented in MachOObjectFile");
 }
 
-symbol_iterator MachOObjectFile::begin_symbols() const {
-  // DRI.d.a = segment number; DRI.d.b = symbol index.
-  DataRefImpl DRI;
-  moveToNextSymbol(DRI);
-  return symbol_iterator(SymbolRef(DRI, this));
-}
-
-symbol_iterator MachOObjectFile::end_symbols() const {
-  DataRefImpl DRI;
-  DRI.d.a = MachOObj->getHeader().NumLoadCommands;
-  return symbol_iterator(SymbolRef(DRI, this));
-}
-
-symbol_iterator MachOObjectFile::begin_dynamic_symbols() const {
-  // TODO: implement
-  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
-}
-
-symbol_iterator MachOObjectFile::end_dynamic_symbols() const {
-  // TODO: implement
-  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
-}
-
-library_iterator MachOObjectFile::begin_libraries_needed() const {
-  // TODO: implement
-  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
-}
-
-library_iterator MachOObjectFile::end_libraries_needed() const {
-  // TODO: implement
-  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
-}
-
-StringRef MachOObjectFile::getLoadName() const {
-  // TODO: Implement
-  report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
-}
-
-/*===-- Sections ----------------------------------------------------------===*/
-
-void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const {
-  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
-  while (DRI.d.a < LoadCommandCount) {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    if (LCI.Command.Type == macho::LCT_Segment) {
-      InMemoryStruct<macho::SegmentLoadCommand> SegmentLoadCmd;
-      MachOObj->ReadSegmentLoadCommand(LCI, SegmentLoadCmd);
-      if (DRI.d.b < SegmentLoadCmd->NumSections)
-        return;
-    } else if (LCI.Command.Type == macho::LCT_Segment64) {
-      InMemoryStruct<macho::Segment64LoadCommand> Segment64LoadCmd;
-      MachOObj->ReadSegment64LoadCommand(LCI, Segment64LoadCmd);
-      if (DRI.d.b < Segment64LoadCmd->NumSections)
-        return;
-    }
-
-    DRI.d.a++;
-    DRI.d.b = 0;
-  }
-}
-
-error_code MachOObjectFile::getSectionNext(DataRefImpl DRI,
-                                           SectionRef &Result) const {
-  DRI.d.b++;
-  moveToNextSection(DRI);
-  Result = SectionRef(DRI, this);
+error_code MachOObjectFile::getSectionNext(DataRefImpl Sec,
+                                           SectionRef &Res) const {
+  Sec.d.a++;
+  Res = SectionRef(Sec, this);
   return object_error::success;
 }
 
-void
-MachOObjectFile::getSection(DataRefImpl DRI,
-                            InMemoryStruct<macho::Section> &Res) const {
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSection(LCI, DRI.d.b, Res);
-}
-
-std::size_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const {
-  SectionList::const_iterator loc =
-    std::find(Sections.begin(), Sections.end(), Sec);
-  assert(loc != Sections.end() && "Sec is not a valid section!");
-  return std::distance(Sections.begin(), loc);
-}
-
-void
-MachOObjectFile::getSection64(DataRefImpl DRI,
-                            InMemoryStruct<macho::Section64> &Res) const {
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSection64(LCI, DRI.d.b, Res);
-}
-
-static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) {
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  if (LCI.Command.Type == macho::LCT_Segment64)
-    return true;
-  assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected Type.");
-  return false;
-}
-
-static StringRef parseSegmentOrSectionName(const char *P) {
-  if (P[15] == 0)
-    // Null terminated.
-    return P;
-  // Not null terminated, so this is a 16 char string.
-  return StringRef(P, 16);
-}
-
-error_code MachOObjectFile::getSectionName(DataRefImpl DRI,
-                                           StringRef &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) +
-      DRI.d.b * sizeof(macho::Section64);
-    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64));
-    const macho::Section64 *sec =
-      reinterpret_cast<const macho::Section64*>(Data.data());
-    Result = parseSegmentOrSectionName(sec->Name);
-  } else {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-    unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) +
-      DRI.d.b * sizeof(macho::Section);
-    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section));
-    const macho::Section *sec =
-      reinterpret_cast<const macho::Section*>(Data.data());
-    Result = parseSegmentOrSectionName(sec->Name);
-  }
+error_code
+MachOObjectFile::getSectionName(DataRefImpl Sec, StringRef &Result) const {
+  ArrayRef<char> Raw = getSectionRawName(Sec);
+  Result = parseSegmentOrSectionName(Raw.data());
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec,
-                                                       StringRef &Res) const {
-  if (is64BitLoadCommand(MachOObj.get(), Sec)) {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a);
-    unsigned SectionOffset = LCI.Offset + sizeof(macho::Segment64LoadCommand) +
-      Sec.d.b * sizeof(macho::Section64);
-    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section64));
-    const macho::Section64 *sec =
-      reinterpret_cast<const macho::Section64*>(Data.data());
-    Res = parseSegmentOrSectionName(sec->SegmentName);
+error_code
+MachOObjectFile::getSectionAddress(DataRefImpl Sec, uint64_t &Res) const {
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Res = Sect.Address;
   } else {
-    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(Sec.d.a);
-    unsigned SectionOffset = LCI.Offset + sizeof(macho::SegmentLoadCommand) +
-      Sec.d.b * sizeof(macho::Section);
-    StringRef Data = MachOObj->getData(SectionOffset, sizeof(macho::Section));
-    const macho::Section *sec =
-      reinterpret_cast<const macho::Section*>(Data.data());
-    Res = parseSegmentOrSectionName(sec->SegmentName);
+    macho::Section Sect = getSection(Sec);
+    Res = Sect.Address;
   }
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI,
-                                              uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = Sect->Address;
+error_code
+MachOObjectFile::getSectionSize(DataRefImpl Sec, uint64_t &Res) const {
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Res = Sect.Size;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = Sect->Address;
+    macho::Section Sect = getSection(Sec);
+    Res = Sect.Size;
   }
-  return object_error::success;
-}
 
-error_code MachOObjectFile::getSectionSize(DataRefImpl DRI,
-                                           uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = Sect->Size;
-  } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = Sect->Size;
-  }
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
-                                               StringRef &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = MachOObj->getData(Sect->Offset, Sect->Size);
+error_code
+MachOObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Res) const {
+  uint32_t Offset;
+  uint64_t Size;
+
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Offset = Sect.Offset;
+    Size = Sect.Size;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = MachOObj->getData(Sect->Offset, Sect->Size);
+    macho::Section Sect =getSection(Sec);
+    Offset = Sect.Offset;
+    Size = Sect.Size;
   }
+
+  Res = this->getData().substr(Offset, Size);
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSectionAlignment(DataRefImpl DRI,
-                                                uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = uint64_t(1) << Sect->Align;
+error_code
+MachOObjectFile::getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const {
+  uint32_t Align;
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Align = Sect.Align;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = uint64_t(1) << Sect->Align;
+    macho::Section Sect = getSection(Sec);
+    Align = Sect.Align;
   }
+
+  Res = uint64_t(1) << Align;
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSectionText(DataRefImpl DRI,
-                                          bool &Result) const {
-  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    Result = Sect->Flags & macho::SF_PureInstructions;
-  } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    Result = Sect->Flags & macho::SF_PureInstructions;
-  }
+error_code
+MachOObjectFile::isSectionText(DataRefImpl Sec, bool &Res) const {
+  uint32_t Flags = getSectionFlags(this, Sec);
+  Res = Flags & macho::SF_PureInstructions;
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSectionData(DataRefImpl DRI,
-                                          bool &Result) const {
+error_code MachOObjectFile::isSectionData(DataRefImpl DRI, bool &Result) const {
   // FIXME: Unimplemented.
   Result = false;
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI,
-                                         bool &Result) const {
+error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI, bool &Result) const {
   // FIXME: Unimplemented.
   Result = false;
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
-                                                          bool &Result) const {
+error_code
+MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
+                                               bool &Result) const {
   // FIXME: Unimplemented.
   Result = true;
   return object_error::success;
@@ -623,22 +768,12 @@ error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec,
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSectionZeroInit(DataRefImpl DRI,
-                                              bool &Result) const {
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(DRI, Sect);
-    unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType;
-    Result = (SectionType == MachO::SectionTypeZeroFill ||
-              SectionType == MachO::SectionTypeZeroFillLarge);
-  } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(DRI, Sect);
-    unsigned SectionType = Sect->Flags & MachO::SectionFlagMaskSectionType;
-    Result = (SectionType == MachO::SectionTypeZeroFill ||
-              SectionType == MachO::SectionTypeZeroFillLarge);
-  }
-
+error_code
+MachOObjectFile::isSectionZeroInit(DataRefImpl Sec, bool &Res) const {
+  uint32_t Flags = getSectionFlags(this, Sec);
+  unsigned SectionType = Flags & MachO::SectionFlagMaskSectionType;
+  Res = SectionType == MachO::SectionTypeZeroFill ||
+    SectionType == MachO::SectionTypeZeroFillLarge;
   return object_error::success;
 }
 
@@ -653,11 +788,11 @@ error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec,
   return object_error::success;
 }
 
-error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
-                                                  DataRefImpl Symb,
-                                                  bool &Result) const {
+error_code
+MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
+                                       bool &Result) const {
   SymbolRef::Type ST;
-  getSymbolType(Symb, ST);
+  this->getSymbolType(Symb, ST);
   if (ST == SymbolRef::ST_Unknown) {
     Result = false;
     return object_error::success;
@@ -668,164 +803,107 @@ error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
   getSectionSize(Sec, SectEnd);
   SectEnd += SectBegin;
 
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(Symb, Entry);
-    uint64_t SymAddr= Entry->Value;
-    Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(Symb, Entry);
-    uint64_t SymAddr= Entry->Value;
-    Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
-  }
+  uint64_t SymAddr;
+  getSymbolAddress(Symb, SymAddr);
+  Result = (SymAddr >= SectBegin) && (SymAddr < SectEnd);
 
   return object_error::success;
 }
 
 relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
-  DataRefImpl ret;
-  ret.d.b = getSectionIndex(Sec);
-  return relocation_iterator(RelocationRef(ret, this));
-}
-relocation_iterator MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
-  uint32_t last_reloc;
-  if (is64BitLoadCommand(MachOObj.get(), Sec)) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(Sec, Sect);
-    last_reloc = Sect->NumRelocationTableEntries;
+  uint32_t Offset;
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Offset = Sect.RelocationTableOffset;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(Sec, Sect);
-    last_reloc = Sect->NumRelocationTableEntries;
+    macho::Section Sect = getSection(Sec);
+    Offset = Sect.RelocationTableOffset;
   }
-  DataRefImpl ret;
-  ret.d.a = last_reloc;
-  ret.d.b = getSectionIndex(Sec);
-  return relocation_iterator(RelocationRef(ret, this));
-}
-
-section_iterator MachOObjectFile::begin_sections() const {
-  DataRefImpl DRI;
-  moveToNextSection(DRI);
-  return section_iterator(SectionRef(DRI, this));
-}
 
-section_iterator MachOObjectFile::end_sections() const {
-  DataRefImpl DRI;
-  DRI.d.a = MachOObj->getHeader().NumLoadCommands;
-  return section_iterator(SectionRef(DRI, this));
+  DataRefImpl Ret;
+  Ret.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
+  return relocation_iterator(RelocationRef(Ret, this));
 }
 
-/*===-- Relocations -------------------------------------------------------===*/
-
-void MachOObjectFile::
-getRelocation(DataRefImpl Rel,
-              InMemoryStruct<macho::RelocationEntry> &Res) const {
-  uint32_t relOffset;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(Sections[Rel.d.b], Sect);
-    relOffset = Sect->RelocationTableOffset;
+relocation_iterator
+MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
+  uint32_t Offset;
+  uint32_t Num;
+  if (is64Bit()) {
+    macho::Section64 Sect = getSection64(Sec);
+    Offset = Sect.RelocationTableOffset;
+    Num = Sect.NumRelocationTableEntries;
   } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(Sections[Rel.d.b], Sect);
-    relOffset = Sect->RelocationTableOffset;
+    macho::Section Sect = getSection(Sec);
+    Offset = Sect.RelocationTableOffset;
+    Num = Sect.NumRelocationTableEntries;
   }
-  MachOObj->ReadRelocationEntry(relOffset, Rel.d.a, Res);
+
+  const macho::RelocationEntry *P =
+    reinterpret_cast<const macho::RelocationEntry*>(getPtr(this, Offset));
+
+  DataRefImpl Ret;
+  Ret.p = reinterpret_cast<uintptr_t>(P + Num);
+  return relocation_iterator(RelocationRef(Ret, this));
 }
+
 error_code MachOObjectFile::getRelocationNext(DataRefImpl Rel,
                                               RelocationRef &Res) const {
-  ++Rel.d.a;
+  const macho::RelocationEntry *P =
+    reinterpret_cast<const macho::RelocationEntry *>(Rel.p);
+  Rel.p = reinterpret_cast<uintptr_t>(P + 1);
   Res = RelocationRef(Rel, this);
   return object_error::success;
 }
-error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
-                                                 uint64_t &Res) const {
-  const uint8_t* sectAddress = 0;
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Section64> Sect;
-    getSection64(Sections[Rel.d.b], Sect);
-    sectAddress += Sect->Address;
-  } else {
-    InMemoryStruct<macho::Section> Sect;
-    getSection(Sections[Rel.d.b], Sect);
-    sectAddress += Sect->Address;
-  }
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
-  uint64_t RelAddr = 0;
-  if (isScattered)
-    RelAddr = RE->Word0 & 0xFFFFFF;
-  else
-    RelAddr = RE->Word0;
 
-  Res = reinterpret_cast<uintptr_t>(sectAddress + RelAddr);
-  return object_error::success;
+error_code
+MachOObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const {
+  report_fatal_error("getRelocationAddress not implemented in MachOObjectFile");
 }
+
 error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
                                                 uint64_t &Res) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
-  if (isScattered)
-    Res = RE->Word0 & 0xFFFFFF;
-  else
-    Res = RE->Word0;
+  macho::RelocationEntry RE = getRelocation(Rel);
+  Res = getAnyRelocationAddress(RE);
   return object_error::success;
 }
-error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
-                                                SymbolRef &Res) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-  uint32_t SymbolIdx = RE->Word1 & 0xffffff;
-  bool isExtern = (RE->Word1 >> 27) & 1;
 
-  DataRefImpl Sym;
-  moveToNextSymbol(Sym);
-  if (isExtern) {
-    for (unsigned i = 0; i < SymbolIdx; i++) {
-      Sym.d.b++;
-      moveToNextSymbol(Sym);
-      assert(Sym.d.a < MachOObj->getHeader().NumLoadCommands &&
-             "Relocation symbol index out of range!");
-    }
+error_code
+MachOObjectFile::getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const {
+  macho::RelocationEntry RE = getRelocation(Rel);
+  uint32_t SymbolIdx = getPlainRelocationSymbolNum(RE);
+  bool isExtern = getPlainRelocationExternal(RE);
+  if (!isExtern) {
+    Res = *end_symbols();
+    return object_error::success;
   }
+
+  macho::SymtabLoadCommand S = getSymtabLoadCommand();
+  unsigned SymbolTableEntrySize = is64Bit() ?
+    sizeof(macho::Symbol64TableEntry) :
+    sizeof(macho::SymbolTableEntry);
+  uint64_t Offset = S.SymbolTableOffset + SymbolIdx * SymbolTableEntrySize;
+  DataRefImpl Sym;
+  Sym.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
   Res = SymbolRef(Sym, this);
   return object_error::success;
 }
+
 error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
                                               uint64_t &Res) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-  Res = RE->Word0;
-  Res <<= 32;
-  Res |= RE->Word1;
+  macho::RelocationEntry RE = getRelocation(Rel);
+  Res = getAnyRelocationType(RE);
   return object_error::success;
 }
-error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
-                                          SmallVectorImpl<char> &Result) const {
-  // TODO: Support scattered relocations.
-  StringRef res;
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
 
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
+error_code
+MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
+                                       SmallVectorImpl<char> &Result) const {
+  StringRef res;
+  uint64_t RType;
+  getRelocationType(Rel, RType);
 
-  unsigned r_type;
-  if (isScattered)
-    r_type = (RE->Word0 >> 24) & 0xF;
-  else
-    r_type = (RE->Word1 >> 28) & 0xF;
+  unsigned Arch = this->getArch();
 
   switch (Arch) {
     case Triple::x86: {
@@ -837,10 +915,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
         "GENERIC_RELOC_LOCAL_SECTDIFF",
         "GENERIC_RELOC_TLV" };
 
-      if (r_type > 6)
+      if (RType > 6)
         res = "Unknown";
       else
-        res = Table[r_type];
+        res = Table[RType];
       break;
     }
     case Triple::x86_64: {
@@ -856,10 +934,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
         "X86_64_RELOC_SIGNED_4",
         "X86_64_RELOC_TLV" };
 
-      if (r_type > 9)
+      if (RType > 9)
         res = "Unknown";
       else
-        res = Table[r_type];
+        res = Table[RType];
       break;
     }
     case Triple::arm: {
@@ -875,10 +953,10 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
         "ARM_RELOC_HALF",
         "ARM_RELOC_HALF_SECTDIFF" };
 
-      if (r_type > 9)
+      if (RType > 9)
         res = "Unknown";
       else
-        res = Table[r_type];
+        res = Table[RType];
       break;
     }
     case Triple::ppc: {
@@ -900,7 +978,7 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
         "PPC_RELOC_LO14_SECTDIFF",
         "PPC_RELOC_LOCAL_SECTDIFF" };
 
-      res = Table[r_type];
+      res = Table[RType];
       break;
     }
     case Triple::UnknownArch:
@@ -910,193 +988,79 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
   Result.append(res.begin(), res.end());
   return object_error::success;
 }
+
 error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel,
                                                         int64_t &Res) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-  bool isExtern = (RE->Word1 >> 27) & 1;
   Res = 0;
-  if (!isExtern) {
-    const uint8_t* sectAddress = base();
-    if (MachOObj->is64Bit()) {
-      InMemoryStruct<macho::Section64> Sect;
-      getSection64(Sections[Rel.d.b], Sect);
-      sectAddress += Sect->Offset;
-    } else {
-      InMemoryStruct<macho::Section> Sect;
-      getSection(Sections[Rel.d.b], Sect);
-      sectAddress += Sect->Offset;
-    }
-    Res = reinterpret_cast<uintptr_t>(sectAddress);
-  }
   return object_error::success;
 }
 
-// Helper to advance a section or symbol iterator multiple increments at a time.
-template<class T>
-error_code advance(T &it, size_t Val) {
-  error_code ec;
-  while (Val--) {
-    it.increment(ec);
-  }
-  return ec;
-}
-
-template<class T>
-void advanceTo(T &it, size_t Val) {
-  if (error_code ec = advance(it, Val))
-    report_fatal_error(ec.message());
-}
-
-void MachOObjectFile::printRelocationTargetName(
-                                     InMemoryStruct<macho::RelocationEntry>& RE,
-                                     raw_string_ostream &fmt) const {
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
-
-  // Target of a scattered relocation is an address.  In the interest of
-  // generating pretty output, scan through the symbol table looking for a
-  // symbol that aligns with that address.  If we find one, print it.
-  // Otherwise, we just print the hex address of the target.
-  if (isScattered) {
-    uint32_t Val = RE->Word1;
-
-    error_code ec;
-    for (symbol_iterator SI = begin_symbols(), SE = end_symbols(); SI != SE;
-        SI.increment(ec)) {
-      if (ec) report_fatal_error(ec.message());
-
-      uint64_t Addr;
-      StringRef Name;
-
-      if ((ec = SI->getAddress(Addr)))
-        report_fatal_error(ec.message());
-      if (Addr != Val) continue;
-      if ((ec = SI->getName(Name)))
-        report_fatal_error(ec.message());
-      fmt << Name;
-      return;
-    }
-
-    // If we couldn't find a symbol that this relocation refers to, try
-    // to find a section beginning instead.
-    for (section_iterator SI = begin_sections(), SE = end_sections(); SI != SE;
-         SI.increment(ec)) {
-      if (ec) report_fatal_error(ec.message());
-
-      uint64_t Addr;
-      StringRef Name;
-
-      if ((ec = SI->getAddress(Addr)))
-        report_fatal_error(ec.message());
-      if (Addr != Val) continue;
-      if ((ec = SI->getName(Name)))
-        report_fatal_error(ec.message());
-      fmt << Name;
-      return;
-    }
-
-    fmt << format("0x%x", Val);
-    return;
-  }
-
-  StringRef S;
-  bool isExtern = (RE->Word1 >> 27) & 1;
-  uint32_t Val = RE->Word1 & 0xFFFFFF;
-
-  if (isExtern) {
-    symbol_iterator SI = begin_symbols();
-    advanceTo(SI, Val);
-    SI->getName(S);
-  } else {
-    section_iterator SI = begin_sections();
-    advanceTo(SI, Val);
-    SI->getName(S);
-  }
-
-  fmt << S;
-}
-
-error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
+error_code
+MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
                                           SmallVectorImpl<char> &Result) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
+  macho::RelocationEntry RE = getRelocation(Rel);
 
-  unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
+  unsigned Arch = this->getArch();
 
   std::string fmtbuf;
   raw_string_ostream fmt(fmtbuf);
-
-  unsigned Type;
-  if (isScattered)
-    Type = (RE->Word0 >> 24) & 0xF;
-  else
-    Type = (RE->Word1 >> 28) & 0xF;
-
-  bool isPCRel;
-  if (isScattered)
-    isPCRel = ((RE->Word0 >> 30) & 1);
-  else
-    isPCRel = ((RE->Word1 >> 24) & 1);
+  unsigned Type = this->getAnyRelocationType(RE);
+  bool IsPCRel = this->getAnyRelocationPCRel(RE);
 
   // Determine any addends that should be displayed with the relocation.
   // These require decoding the relocation type, which is triple-specific.
 
   // X86_64 has entirely custom relocation types.
   if (Arch == Triple::x86_64) {
-    bool isPCRel = ((RE->Word1 >> 24) & 1);
+    bool isPCRel = getAnyRelocationPCRel(RE);
 
     switch (Type) {
       case macho::RIT_X86_64_GOTLoad:   // X86_64_RELOC_GOT_LOAD
       case macho::RIT_X86_64_GOT: {     // X86_64_RELOC_GOT
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "@GOT";
         if (isPCRel) fmt << "PCREL";
         break;
       }
       case macho::RIT_X86_64_Subtractor: { // X86_64_RELOC_SUBTRACTOR
-        InMemoryStruct<macho::RelocationEntry> RENext;
         DataRefImpl RelNext = Rel;
         RelNext.d.a++;
-        getRelocation(RelNext, RENext);
+        macho::RelocationEntry RENext = getRelocation(RelNext);
 
         // X86_64_SUBTRACTOR must be followed by a relocation of type
         // X86_64_RELOC_UNSIGNED.
         // NOTE: Scattered relocations don't exist on x86_64.
-        unsigned RType = (RENext->Word1 >> 28) & 0xF;
+        unsigned RType = getAnyRelocationType(RENext);
         if (RType != 0)
           report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
                              "X86_64_RELOC_SUBTRACTOR.");
 
         // The X86_64_RELOC_UNSIGNED contains the minuend symbol,
         // X86_64_SUBTRACTOR contains to the subtrahend.
-        printRelocationTargetName(RENext, fmt);
+        printRelocationTargetName(this, RENext, fmt);
         fmt << "-";
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         break;
       }
       case macho::RIT_X86_64_TLV:
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "@TLV";
         if (isPCRel) fmt << "P";
         break;
       case macho::RIT_X86_64_Signed1: // X86_64_RELOC_SIGNED1
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "-1";
         break;
       case macho::RIT_X86_64_Signed2: // X86_64_RELOC_SIGNED2
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "-2";
         break;
       case macho::RIT_X86_64_Signed4: // X86_64_RELOC_SIGNED4
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "-4";
         break;
       default:
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         break;
     }
   // X86 and ARM share some relocation types in common.
@@ -1106,27 +1070,21 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
       case macho::RIT_Pair: // GENERIC_RELOC_PAIR - prints no info
         return object_error::success;
       case macho::RIT_Difference: { // GENERIC_RELOC_SECTDIFF
-        InMemoryStruct<macho::RelocationEntry> RENext;
         DataRefImpl RelNext = Rel;
         RelNext.d.a++;
-        getRelocation(RelNext, RENext);
+        macho::RelocationEntry RENext = getRelocation(RelNext);
 
         // X86 sect diff's must be followed by a relocation of type
         // GENERIC_RELOC_PAIR.
-        bool isNextScattered = (Arch != Triple::x86_64) &&
-                               (RENext->Word0 & macho::RF_Scattered);
-        unsigned RType;
-        if (isNextScattered)
-          RType = (RENext->Word0 >> 24) & 0xF;
-        else
-          RType = (RENext->Word1 >> 28) & 0xF;
+        unsigned RType = getAnyRelocationType(RENext);
+
         if (RType != 1)
           report_fatal_error("Expected GENERIC_RELOC_PAIR after "
                              "GENERIC_RELOC_SECTDIFF.");
 
-        printRelocationTargetName(RE, fmt);
+        printRelocationTargetName(this, RE, fmt);
         fmt << "-";
-        printRelocationTargetName(RENext, fmt);
+        printRelocationTargetName(this, RENext, fmt);
         break;
       }
     }
@@ -1136,37 +1094,30 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
       // handled in the generic code.
       switch (Type) {
         case macho::RIT_Generic_LocalDifference:{// GENERIC_RELOC_LOCAL_SECTDIFF
-          InMemoryStruct<macho::RelocationEntry> RENext;
           DataRefImpl RelNext = Rel;
           RelNext.d.a++;
-          getRelocation(RelNext, RENext);
+          macho::RelocationEntry RENext = getRelocation(RelNext);
 
           // X86 sect diff's must be followed by a relocation of type
           // GENERIC_RELOC_PAIR.
-          bool isNextScattered = (Arch != Triple::x86_64) &&
-                               (RENext->Word0 & macho::RF_Scattered);
-          unsigned RType;
-          if (isNextScattered)
-            RType = (RENext->Word0 >> 24) & 0xF;
-          else
-            RType = (RENext->Word1 >> 28) & 0xF;
+          unsigned RType = getAnyRelocationType(RENext);
           if (RType != 1)
             report_fatal_error("Expected GENERIC_RELOC_PAIR after "
                                "GENERIC_RELOC_LOCAL_SECTDIFF.");
 
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
           fmt << "-";
-          printRelocationTargetName(RENext, fmt);
+          printRelocationTargetName(this, RENext, fmt);
           break;
         }
         case macho::RIT_Generic_TLV: {
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
           fmt << "@TLV";
-          if (isPCRel) fmt << "P";
+          if (IsPCRel) fmt << "P";
           break;
         }
         default:
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
       }
     } else { // ARM-specific relocations
       switch (Type) {
@@ -1174,33 +1125,21 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
         case macho::RIT_ARM_HalfDifference: { // ARM_RELOC_HALF_SECTDIFF
           // Half relocations steal a bit from the length field to encode
           // whether this is an upper16 or a lower16 relocation.
-          bool isUpper;
-          if (isScattered)
-            isUpper = (RE->Word0 >> 28) & 1;
-          else
-            isUpper = (RE->Word1 >> 25) & 1;
+          bool isUpper = getAnyRelocationLength(RE) >> 1;
 
           if (isUpper)
             fmt << ":upper16:(";
           else
             fmt << ":lower16:(";
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
 
-          InMemoryStruct<macho::RelocationEntry> RENext;
           DataRefImpl RelNext = Rel;
           RelNext.d.a++;
-          getRelocation(RelNext, RENext);
+          macho::RelocationEntry RENext = getRelocation(RelNext);
 
           // ARM half relocs must be followed by a relocation of type
           // ARM_RELOC_PAIR.
-          bool isNextScattered = (Arch != Triple::x86_64) &&
-                                 (RENext->Word0 & macho::RF_Scattered);
-          unsigned RType;
-          if (isNextScattered)
-            RType = (RENext->Word0 >> 24) & 0xF;
-          else
-            RType = (RENext->Word1 >> 28) & 0xF;
-
+          unsigned RType = getAnyRelocationType(RENext);
           if (RType != 1)
             report_fatal_error("Expected ARM_RELOC_PAIR after "
                                "GENERIC_RELOC_HALF");
@@ -1214,38 +1153,30 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
           // symbol/section pointer of the follow-on relocation.
           if (Type == macho::RIT_ARM_HalfDifference) {
             fmt << "-";
-            printRelocationTargetName(RENext, fmt);
+            printRelocationTargetName(this, RENext, fmt);
           }
 
           fmt << ")";
           break;
         }
         default: {
-          printRelocationTargetName(RE, fmt);
+          printRelocationTargetName(this, RE, fmt);
         }
       }
     }
   } else
-    printRelocationTargetName(RE, fmt);
+    printRelocationTargetName(this, RE, fmt);
 
   fmt.flush();
   Result.append(fmtbuf.begin(), fmtbuf.end());
   return object_error::success;
 }
 
-error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
-                                                bool &Result) const {
-  InMemoryStruct<macho::RelocationEntry> RE;
-  getRelocation(Rel, RE);
-
+error_code
+MachOObjectFile::getRelocationHidden(DataRefImpl Rel, bool &Result) const {
   unsigned Arch = getArch();
-  bool isScattered = (Arch != Triple::x86_64) &&
-                     (RE->Word0 & macho::RF_Scattered);
-  unsigned Type;
-  if (isScattered)
-    Type = (RE->Word0 >> 24) & 0xF;
-  else
-    Type = (RE->Word1 >> 28) & 0xF;
+  uint64_t Type;
+  getRelocationType(Rel, Type);
 
   Result = false;
 
@@ -1259,12 +1190,10 @@ error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
     if (Type == macho::RIT_X86_64_Unsigned && Rel.d.a > 0) {
       DataRefImpl RelPrev = Rel;
       RelPrev.d.a--;
-      InMemoryStruct<macho::RelocationEntry> REPrev;
-      getRelocation(RelPrev, REPrev);
-
-      unsigned PrevType = (REPrev->Word1 >> 28) & 0xF;
-
-      if (PrevType == macho::RIT_X86_64_Subtractor) Result = true;
+      uint64_t PrevType;
+      getRelocationType(RelPrev, PrevType);
+      if (PrevType == macho::RIT_X86_64_Subtractor)
+        Result = true;
     }
   }
 
@@ -1281,16 +1210,70 @@ error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData,
   report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
 }
 
+symbol_iterator MachOObjectFile::begin_symbols() const {
+  DataRefImpl DRI;
+  if (!SymtabLoadCmd)
+    return symbol_iterator(SymbolRef(DRI, this));
+
+  macho::SymtabLoadCommand Symtab = getSymtabLoadCommand();
+  DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Symtab.SymbolTableOffset));
+  return symbol_iterator(SymbolRef(DRI, this));
+}
+
+symbol_iterator MachOObjectFile::end_symbols() const {
+  DataRefImpl DRI;
+  if (!SymtabLoadCmd)
+    return symbol_iterator(SymbolRef(DRI, this));
+
+  macho::SymtabLoadCommand Symtab = getSymtabLoadCommand();
+  unsigned SymbolTableEntrySize = is64Bit() ?
+    sizeof(macho::Symbol64TableEntry) :
+    sizeof(macho::SymbolTableEntry);
+  unsigned Offset = Symtab.SymbolTableOffset +
+    Symtab.NumSymbolTableEntries * SymbolTableEntrySize;
+  DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
+  return symbol_iterator(SymbolRef(DRI, this));
+}
+
+symbol_iterator MachOObjectFile::begin_dynamic_symbols() const {
+  // TODO: implement
+  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+symbol_iterator MachOObjectFile::end_dynamic_symbols() const {
+  // TODO: implement
+  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+section_iterator MachOObjectFile::begin_sections() const {
+  DataRefImpl DRI;
+  return section_iterator(SectionRef(DRI, this));
+}
+
+section_iterator MachOObjectFile::end_sections() const {
+  DataRefImpl DRI;
+  DRI.d.a = Sections.size();
+  return section_iterator(SectionRef(DRI, this));
+}
 
-/*===-- Miscellaneous -----------------------------------------------------===*/
+library_iterator MachOObjectFile::begin_libraries_needed() const {
+  // TODO: implement
+  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+library_iterator MachOObjectFile::end_libraries_needed() const {
+  // TODO: implement
+  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
 
 uint8_t MachOObjectFile::getBytesInAddress() const {
-  return MachOObj->is64Bit() ? 8 : 4;
+  return is64Bit() ? 8 : 4;
 }
 
 StringRef MachOObjectFile::getFileFormatName() const {
-  if (!MachOObj->is64Bit()) {
-    switch (MachOObj->getHeader().CPUType) {
+  unsigned CPUType = getCPUType(this);
+  if (!is64Bit()) {
+    switch (CPUType) {
     case llvm::MachO::CPUTypeI386:
       return "Mach-O 32-bit i386";
     case llvm::MachO::CPUTypeARM:
@@ -1298,18 +1281,18 @@ StringRef MachOObjectFile::getFileFormatName() const {
     case llvm::MachO::CPUTypePowerPC:
       return "Mach-O 32-bit ppc";
     default:
-      assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 0 &&
+      assert((CPUType & llvm::MachO::CPUArchABI64) == 0 &&
              "64-bit object file when we're not 64-bit?");
       return "Mach-O 32-bit unknown";
     }
   }
 
   // Make sure the cpu type has the correct mask.
-  assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64)
+  assert((CPUType & llvm::MachO::CPUArchABI64)
 	 == llvm::MachO::CPUArchABI64 &&
 	 "32-bit object file when we're 64-bit?");
 
-  switch (MachOObj->getHeader().CPUType) {
+  switch (CPUType) {
   case llvm::MachO::CPUTypeX86_64:
     return "Mach-O 64-bit x86-64";
   case llvm::MachO::CPUTypePowerPC64:
@@ -1320,7 +1303,7 @@ StringRef MachOObjectFile::getFileFormatName() const {
 }
 
 unsigned MachOObjectFile::getArch() const {
-  switch (MachOObj->getHeader().CPUType) {
+  switch (getCPUType(this)) {
   case llvm::MachO::CPUTypeI386:
     return Triple::x86;
   case llvm::MachO::CPUTypeX86_64:
@@ -1336,5 +1319,260 @@ unsigned MachOObjectFile::getArch() const {
   }
 }
 
+StringRef MachOObjectFile::getLoadName() const {
+  // TODO: Implement
+  report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
+}
+
+relocation_iterator MachOObjectFile::getSectionRelBegin(unsigned Index) const {
+  DataRefImpl DRI;
+  DRI.d.a = Index;
+  return getSectionRelBegin(DRI);
+}
+
+relocation_iterator MachOObjectFile::getSectionRelEnd(unsigned Index) const {
+  DataRefImpl DRI;
+  DRI.d.a = Index;
+  return getSectionRelEnd(DRI);
+}
+
+StringRef
+MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const {
+  ArrayRef<char> Raw = getSectionRawFinalSegmentName(Sec);
+  return parseSegmentOrSectionName(Raw.data());
+}
+
+ArrayRef<char>
+MachOObjectFile::getSectionRawName(DataRefImpl Sec) const {
+  const SectionBase *Base =
+    reinterpret_cast<const SectionBase*>(Sections[Sec.d.a]);
+  return ArrayRef<char>(Base->Name);
+}
+
+ArrayRef<char>
+MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const {
+  const SectionBase *Base =
+    reinterpret_cast<const SectionBase*>(Sections[Sec.d.a]);
+  return ArrayRef<char>(Base->SegmentName);
+}
+
+bool
+MachOObjectFile::isRelocationScattered(const macho::RelocationEntry &RE)
+  const {
+  if (getCPUType(this) == llvm::MachO::CPUTypeX86_64)
+    return false;
+  return getPlainRelocationAddress(RE) & macho::RF_Scattered;
+}
+
+unsigned MachOObjectFile::getPlainRelocationSymbolNum(const macho::RelocationEntry &RE) const {
+  if (isLittleEndian())
+    return RE.Word1 & 0xffffff;
+  return RE.Word1 >> 8;
+}
+
+bool MachOObjectFile::getPlainRelocationExternal(const macho::RelocationEntry &RE) const {
+  if (isLittleEndian())
+    return (RE.Word1 >> 27) & 1;
+  return (RE.Word1 >> 4) & 1;
+}
+
+bool
+MachOObjectFile::getScatteredRelocationScattered(const macho::RelocationEntry &RE) const {
+  return RE.Word0 >> 31;
+}
+
+uint32_t
+MachOObjectFile::getScatteredRelocationValue(const macho::RelocationEntry &RE) const {
+  return RE.Word1;
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationAddress(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE))
+    return getScatteredRelocationAddress(RE);
+  return getPlainRelocationAddress(RE);
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationPCRel(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE))
+    return getScatteredRelocationPCRel(this, RE);
+  return getPlainRelocationPCRel(this, RE);
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationLength(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE))
+    return getScatteredRelocationLength(RE);
+  return getPlainRelocationLength(this, RE);
+}
+
+unsigned
+MachOObjectFile::getAnyRelocationType(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE))
+    return getScatteredRelocationType(RE);
+  return getPlainRelocationType(this, RE);
+}
+
+SectionRef
+MachOObjectFile::getRelocationSection(const macho::RelocationEntry &RE) const {
+  if (isRelocationScattered(RE) || getPlainRelocationExternal(RE))
+    return *end_sections();
+  unsigned SecNum = getPlainRelocationSymbolNum(RE) - 1;
+  DataRefImpl DRI;
+  DRI.d.a = SecNum;
+  return SectionRef(DRI, this);
+}
+
+MachOObjectFile::LoadCommandInfo
+MachOObjectFile::getFirstLoadCommandInfo() const {
+  MachOObjectFile::LoadCommandInfo Load;
+
+  unsigned HeaderSize = is64Bit() ? macho::Header64Size : macho::Header32Size;
+  Load.Ptr = getPtr(this, HeaderSize);
+  Load.C = getStruct<macho::LoadCommand>(this, Load.Ptr);
+  return Load;
+}
+
+MachOObjectFile::LoadCommandInfo
+MachOObjectFile::getNextLoadCommandInfo(const LoadCommandInfo &L) const {
+  MachOObjectFile::LoadCommandInfo Next;
+  Next.Ptr = L.Ptr + L.C.Size;
+  Next.C = getStruct<macho::LoadCommand>(this, Next.Ptr);
+  return Next;
+}
+
+macho::Section MachOObjectFile::getSection(DataRefImpl DRI) const {
+  return getStruct<macho::Section>(this, Sections[DRI.d.a]);
+}
+
+macho::Section64 MachOObjectFile::getSection64(DataRefImpl DRI) const {
+  return getStruct<macho::Section64>(this, Sections[DRI.d.a]);
+}
+
+macho::Section MachOObjectFile::getSection(const LoadCommandInfo &L,
+                                           unsigned Index) const {
+  const char *Sec = getSectionPtr(this, L, Index);
+  return getStruct<macho::Section>(this, Sec);
+}
+
+macho::Section64 MachOObjectFile::getSection64(const LoadCommandInfo &L,
+                                               unsigned Index) const {
+  const char *Sec = getSectionPtr(this, L, Index);
+  return getStruct<macho::Section64>(this, Sec);
+}
+
+macho::SymbolTableEntry
+MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI) const {
+  const char *P = reinterpret_cast<const char *>(DRI.p);
+  return getStruct<macho::SymbolTableEntry>(this, P);
+}
+
+macho::Symbol64TableEntry
+MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI) const {
+  const char *P = reinterpret_cast<const char *>(DRI.p);
+  return getStruct<macho::Symbol64TableEntry>(this, P);
+}
+
+macho::LinkeditDataLoadCommand
+MachOObjectFile::getLinkeditDataLoadCommand(const MachOObjectFile::LoadCommandInfo &L) const {
+  return getStruct<macho::LinkeditDataLoadCommand>(this, L.Ptr);
+}
+
+macho::SegmentLoadCommand
+MachOObjectFile::getSegmentLoadCommand(const LoadCommandInfo &L) const {
+  return getStruct<macho::SegmentLoadCommand>(this, L.Ptr);
+}
+
+macho::Segment64LoadCommand
+MachOObjectFile::getSegment64LoadCommand(const LoadCommandInfo &L) const {
+  return getStruct<macho::Segment64LoadCommand>(this, L.Ptr);
+}
+
+macho::LinkerOptionsLoadCommand
+MachOObjectFile::getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const {
+  return getStruct<macho::LinkerOptionsLoadCommand>(this, L.Ptr);
+}
+
+macho::RelocationEntry
+MachOObjectFile::getRelocation(DataRefImpl Rel) const {
+  const char *P = reinterpret_cast<const char *>(Rel.p);
+  return getStruct<macho::RelocationEntry>(this, P);
+}
+
+macho::Header MachOObjectFile::getHeader() const {
+  return getStruct<macho::Header>(this, getPtr(this, 0));
+}
+
+macho::Header64Ext MachOObjectFile::getHeader64Ext() const {
+  return
+    getStruct<macho::Header64Ext>(this, getPtr(this, sizeof(macho::Header)));
+}
+
+macho::IndirectSymbolTableEntry MachOObjectFile::getIndirectSymbolTableEntry(
+                                          const macho::DysymtabLoadCommand &DLC,
+                                          unsigned Index) const {
+  uint64_t Offset = DLC.IndirectSymbolTableOffset +
+    Index * sizeof(macho::IndirectSymbolTableEntry);
+  return getStruct<macho::IndirectSymbolTableEntry>(this, getPtr(this, Offset));
+}
+
+macho::DataInCodeTableEntry
+MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset,
+                                         unsigned Index) const {
+  uint64_t Offset = DataOffset + Index * sizeof(macho::DataInCodeTableEntry);
+  return getStruct<macho::DataInCodeTableEntry>(this, getPtr(this, Offset));
+}
+
+macho::SymtabLoadCommand MachOObjectFile::getSymtabLoadCommand() const {
+  return getStruct<macho::SymtabLoadCommand>(this, SymtabLoadCmd);
+}
+
+macho::DysymtabLoadCommand MachOObjectFile::getDysymtabLoadCommand() const {
+  return getStruct<macho::DysymtabLoadCommand>(this, DysymtabLoadCmd);
+}
+
+StringRef MachOObjectFile::getStringTableData() const {
+  macho::SymtabLoadCommand S = getSymtabLoadCommand();
+  return getData().substr(S.StringTableOffset, S.StringTableSize);
+}
+
+bool MachOObjectFile::is64Bit() const {
+  return getType() == getMachOType(false, true) ||
+    getType() == getMachOType(true, true);
+}
+
+void MachOObjectFile::ReadULEB128s(uint64_t Index,
+                                   SmallVectorImpl<uint64_t> &Out) const {
+  DataExtractor extractor(ObjectFile::getData(), true, 0);
+
+  uint32_t offset = Index;
+  uint64_t data = 0;
+  while (uint64_t delta = extractor.getULEB128(&offset)) {
+    data += delta;
+    Out.push_back(data);
+  }
+}
+
+ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+  StringRef Magic = Buffer->getBuffer().slice(0, 4);
+  error_code ec;
+  ObjectFile *Ret;
+  if (Magic == "\xFE\xED\xFA\xCE")
+    Ret = new MachOObjectFile(Buffer, false, false, ec);
+  else if (Magic == "\xCE\xFA\xED\xFE")
+    Ret = new MachOObjectFile(Buffer, true, false, ec);
+  else if (Magic == "\xFE\xED\xFA\xCF")
+    Ret = new MachOObjectFile(Buffer, false, true, ec);
+  else if (Magic == "\xCF\xFA\xED\xFE")
+    Ret = new MachOObjectFile(Buffer, true, true, ec);
+  else
+    return NULL;
+
+  if (ec)
+    return NULL;
+  return Ret;
+}
+
 } // end namespace object
 } // end namespace llvm
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index f061ea7..3e2c78e 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -12,12 +12,51 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm-c/Object.h"
 
 using namespace llvm;
 using namespace object;
 
+inline ObjectFile *unwrap(LLVMObjectFileRef OF) {
+  return reinterpret_cast<ObjectFile*>(OF);
+}
+
+inline LLVMObjectFileRef wrap(const ObjectFile *OF) {
+  return reinterpret_cast<LLVMObjectFileRef>(const_cast<ObjectFile*>(OF));
+}
+
+inline section_iterator *unwrap(LLVMSectionIteratorRef SI) {
+  return reinterpret_cast<section_iterator*>(SI);
+}
+
+inline LLVMSectionIteratorRef
+wrap(const section_iterator *SI) {
+  return reinterpret_cast<LLVMSectionIteratorRef>
+    (const_cast<section_iterator*>(SI));
+}
+
+inline symbol_iterator *unwrap(LLVMSymbolIteratorRef SI) {
+  return reinterpret_cast<symbol_iterator*>(SI);
+}
+
+inline LLVMSymbolIteratorRef
+wrap(const symbol_iterator *SI) {
+  return reinterpret_cast<LLVMSymbolIteratorRef>
+    (const_cast<symbol_iterator*>(SI));
+}
+
+inline relocation_iterator *unwrap(LLVMRelocationIteratorRef SI) {
+  return reinterpret_cast<relocation_iterator*>(SI);
+}
+
+inline LLVMRelocationIteratorRef
+wrap(const relocation_iterator *SI) {
+  return reinterpret_cast<LLVMRelocationIteratorRef>
+    (const_cast<relocation_iterator*>(SI));
+}
+
 // ObjectFile creation
 LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
   return wrap(ObjectFile::createObjectFile(unwrap(MemBuf)));
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index 860c87b..77fd995 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -23,10 +23,16 @@ using namespace object;
 
 void ObjectFile::anchor() { }
 
-ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec)
+ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source)
   : Binary(Type, source) {
 }
 
+error_code ObjectFile::getSymbolAlignment(DataRefImpl DRI,
+                                          uint32_t &Result) const {
+  Result = 0;
+  return object_error::success;
+}
+
 ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
   if (!Object || Object->getBufferSize() < 64)
     return 0;
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 3746a81..01565c5 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_library(LLVMSupport
   BranchProbability.cpp
   circular_raw_ostream.cpp
   CommandLine.cpp
+  Compression.cpp
   ConstantRange.cpp
   ConvertUTF.c
   ConvertUTFWrapper.cpp
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 560d7eb..18d3db5 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/system_error.h"
 #include <cerrno>
 #include <cstdlib>
+#include <map>
 using namespace llvm;
 using namespace cl;
 
@@ -106,6 +107,17 @@ void Option::addArgument() {
   MarkOptionsChanged();
 }
 
+// This collects the different option categories that have been registered.
+typedef SmallPtrSet<OptionCategory*,16> OptionCatSet;
+static ManagedStatic<OptionCatSet> RegisteredOptionCategories;
+
+// Initialise the general option category.
+OptionCategory llvm::cl::GeneralCategory("General options");
+
+void OptionCategory::registerCategory()
+{
+  RegisteredOptionCategories->insert(this);
+}
 
 //===----------------------------------------------------------------------===//
 // Basic, shared command line option processing machinery.
@@ -1222,11 +1234,20 @@ sortOpts(StringMap<Option*> &OptMap,
 namespace {
 
 class HelpPrinter {
+protected:
   const bool ShowHidden;
+  typedef SmallVector<std::pair<const char *, Option*>,128> StrOptionPairVector;
+  // Print the options. Opts is assumed to be alphabetically sorted.
+  virtual void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) {
+    for (size_t i = 0, e = Opts.size(); i != e; ++i)
+      Opts[i].second->printOptionInfo(MaxArgLen);
+  }
 
 public:
   explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {}
+  virtual ~HelpPrinter() {}
 
+  // Invoke the printer.
   void operator=(bool Value) {
     if (Value == false) return;
 
@@ -1236,7 +1257,7 @@ public:
     StringMap<Option*> OptMap;
     GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
 
-    SmallVector<std::pair<const char *, Option*>, 128> Opts;
+    StrOptionPairVector Opts;
     sortOpts(OptMap, Opts, ShowHidden);
 
     if (ProgramOverview)
@@ -1267,12 +1288,12 @@ public:
       MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
 
     outs() << "OPTIONS:\n";
-    for (size_t i = 0, e = Opts.size(); i != e; ++i)
-      Opts[i].second->printOptionInfo(MaxArgLen);
+    printOptions(Opts, MaxArgLen);
 
     // Print any extra help the user has declared.
     for (std::vector<const char *>::iterator I = MoreHelp->begin(),
-          E = MoreHelp->end(); I != E; ++I)
+                                             E = MoreHelp->end();
+         I != E; ++I)
       outs() << *I;
     MoreHelp->clear();
 
@@ -1280,21 +1301,152 @@ public:
     exit(1);
   }
 };
+
+class CategorizedHelpPrinter : public HelpPrinter {
+public:
+  explicit CategorizedHelpPrinter(bool showHidden) : HelpPrinter(showHidden) {}
+
+  // Helper function for printOptions().
+  // It shall return true if A's name should be lexographically
+  // ordered before B's name. It returns false otherwise.
+  static bool OptionCategoryCompare(OptionCategory *A, OptionCategory *B) {
+    int Length = strcmp(A->getName(), B->getName());
+    assert(Length != 0 && "Duplicate option categories");
+    return Length < 0;
+  }
+
+  // Make sure we inherit our base class's operator=()
+  using HelpPrinter::operator= ;
+
+protected:
+  virtual void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) {
+    std::vector<OptionCategory *> SortedCategories;
+    std::map<OptionCategory *, std::vector<Option *> > CategorizedOptions;
+
+    // Collect registered option categories into vector in preperation for
+    // sorting.
+    for (OptionCatSet::const_iterator I = RegisteredOptionCategories->begin(),
+                                      E = RegisteredOptionCategories->end();
+         I != E; ++I)
+      SortedCategories.push_back(*I);
+
+    // Sort the different option categories alphabetically.
+    assert(SortedCategories.size() > 0 && "No option categories registered!");
+    std::sort(SortedCategories.begin(), SortedCategories.end(),
+              OptionCategoryCompare);
+
+    // Create map to empty vectors.
+    for (std::vector<OptionCategory *>::const_iterator
+             I = SortedCategories.begin(),
+             E = SortedCategories.end();
+         I != E; ++I)
+      CategorizedOptions[*I] = std::vector<Option *>();
+
+    // Walk through pre-sorted options and assign into categories.
+    // Because the options are already alphabetically sorted the
+    // options within categories will also be alphabetically sorted.
+    for (size_t I = 0, E = Opts.size(); I != E; ++I) {
+      Option *Opt = Opts[I].second;
+      assert(CategorizedOptions.count(Opt->Category) > 0 &&
+             "Option has an unregistered category");
+      CategorizedOptions[Opt->Category].push_back(Opt);
+    }
+
+    // Now do printing.
+    for (std::vector<OptionCategory *>::const_iterator
+             Category = SortedCategories.begin(),
+             E = SortedCategories.end();
+         Category != E; ++Category) {
+      // Hide empty categories for -help, but show for -help-hidden.
+      bool IsEmptyCategory = CategorizedOptions[*Category].size() == 0;
+      if (!ShowHidden && IsEmptyCategory)
+        continue;
+
+      // Print category information.
+      outs() << "\n";
+      outs() << (*Category)->getName() << ":\n";
+
+      // Check if description is set.
+      if ((*Category)->getDescription() != 0)
+        outs() << (*Category)->getDescription() << "\n\n";
+      else
+        outs() << "\n";
+
+      // When using -help-hidden explicitly state if the category has no
+      // options associated with it.
+      if (IsEmptyCategory) {
+        outs() << "  This option category has no options.\n";
+        continue;
+      }
+      // Loop over the options in the category and print.
+      for (std::vector<Option *>::const_iterator
+               Opt = CategorizedOptions[*Category].begin(),
+               E = CategorizedOptions[*Category].end();
+           Opt != E; ++Opt)
+        (*Opt)->printOptionInfo(MaxArgLen);
+    }
+  }
+};
+
+// This wraps the Uncategorizing and Categorizing printers and decides
+// at run time which should be invoked.
+class HelpPrinterWrapper {
+private:
+  HelpPrinter &UncategorizedPrinter;
+  CategorizedHelpPrinter &CategorizedPrinter;
+
+public:
+  explicit HelpPrinterWrapper(HelpPrinter &UncategorizedPrinter,
+                              CategorizedHelpPrinter &CategorizedPrinter) :
+    UncategorizedPrinter(UncategorizedPrinter),
+    CategorizedPrinter(CategorizedPrinter) { }
+
+  // Invoke the printer.
+  void operator=(bool Value);
+};
+
 } // End anonymous namespace
 
-// Define the two HelpPrinter instances that are used to print out help, or
-// help-hidden...
-//
-static HelpPrinter NormalPrinter(false);
-static HelpPrinter HiddenPrinter(true);
+// Declare the four HelpPrinter instances that are used to print out help, or
+// help-hidden as an uncategorized list or in categories.
+static HelpPrinter UncategorizedNormalPrinter(false);
+static HelpPrinter UncategorizedHiddenPrinter(true);
+static CategorizedHelpPrinter CategorizedNormalPrinter(false);
+static CategorizedHelpPrinter CategorizedHiddenPrinter(true);
+
 
+// Declare HelpPrinter wrappers that will decide whether or not to invoke
+// a categorizing help printer
+static HelpPrinterWrapper WrappedNormalPrinter(UncategorizedNormalPrinter,
+                                               CategorizedNormalPrinter);
+static HelpPrinterWrapper WrappedHiddenPrinter(UncategorizedHiddenPrinter,
+                                               CategorizedHiddenPrinter);
+
+// Define uncategorized help printers.
+// -help-list is hidden by default because if Option categories are being used
+// then -help behaves the same as -help-list.
 static cl::opt<HelpPrinter, true, parser<bool> >
-HOp("help", cl::desc("Display available options (-help-hidden for more)"),
-    cl::location(NormalPrinter), cl::ValueDisallowed);
+HLOp("help-list",
+     cl::desc("Display list of available options (-help-list-hidden for more)"),
+     cl::location(UncategorizedNormalPrinter), cl::Hidden, cl::ValueDisallowed);
 
 static cl::opt<HelpPrinter, true, parser<bool> >
+HLHOp("help-list-hidden",
+     cl::desc("Display list of all available options"),
+     cl::location(UncategorizedHiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+
+// Define uncategorized/categorized help printers. These printers change their
+// behaviour at runtime depending on whether one or more Option categories have
+// been declared.
+static cl::opt<HelpPrinterWrapper, true, parser<bool> >
+HOp("help", cl::desc("Display available options (-help-hidden for more)"),
+    cl::location(WrappedNormalPrinter), cl::ValueDisallowed);
+
+static cl::opt<HelpPrinterWrapper, true, parser<bool> >
 HHOp("help-hidden", cl::desc("Display all available options"),
-     cl::location(HiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+     cl::location(WrappedHiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+
+
 
 static cl::opt<bool>
 PrintOptions("print-options",
@@ -1306,6 +1458,24 @@ PrintAllOptions("print-all-options",
                 cl::desc("Print all option values after command line parsing"),
                 cl::Hidden, cl::init(false));
 
+void HelpPrinterWrapper::operator=(bool Value) {
+  if (Value == false)
+    return;
+
+  // Decide which printer to invoke. If more than one option category is
+  // registered then it is useful to show the categorized help instead of
+  // uncategorized help.
+  if (RegisteredOptionCategories->size() > 1) {
+    // unhide -help-list option so user can have uncategorized output if they
+    // want it.
+    HLOp.setHiddenFlag(NotHidden);
+
+    CategorizedPrinter = true; // Invoke categorized printer
+  }
+  else
+    UncategorizedPrinter = true; // Invoke uncategorized printer
+}
+
 // Print the value of each option.
 void cl::PrintOptionValues() {
   if (!PrintOptions && !PrintAllOptions) return;
@@ -1393,14 +1563,22 @@ VersOp("version", cl::desc("Display the version of this program"),
     cl::location(VersionPrinterInstance), cl::ValueDisallowed);
 
 // Utility function for printing the help message.
-void cl::PrintHelpMessage() {
-  // This looks weird, but it actually prints the help message. The
-  // NormalPrinter variable is a HelpPrinter and the help gets printed when
-  // its operator= is invoked. That's because the "normal" usages of the
-  // help printer is to be assigned true/false depending on whether the
-  // -help option was given or not. Since we're circumventing that we have
-  // to make it look like -help was given, so we assign true.
-  NormalPrinter = true;
+void cl::PrintHelpMessage(bool Hidden, bool Categorized) {
+  // This looks weird, but it actually prints the help message. The Printers are
+  // types of HelpPrinter and the help gets printed when its operator= is
+  // invoked. That's because the "normal" usages of the help printer is to be
+  // assigned true/false depending on whether -help or -help-hidden was given or
+  // not.  Since we're circumventing that we have to make it look like -help or
+  // -help-hidden were given, so we assign true.
+
+  if (!Hidden && !Categorized)
+    UncategorizedNormalPrinter = true;
+  else if (!Hidden && Categorized)
+    CategorizedNormalPrinter = true;
+  else if (Hidden && !Categorized)
+    UncategorizedHiddenPrinter = true;
+  else
+    CategorizedHiddenPrinter = true;
 }
 
 /// Utility function for printing version number.
@@ -1418,3 +1596,13 @@ void cl::AddExtraVersionPrinter(void (*func)()) {
 
   ExtraVersionPrinters->push_back(func);
 }
+
+void cl::getRegisteredOptions(StringMap<Option*> &Map)
+{
+  // Get all the options.
+  SmallVector<Option*, 4> PositionalOpts; //NOT USED
+  SmallVector<Option*, 4> SinkOpts;  //NOT USED
+  assert(Map.size() == 0 && "StringMap must be empty");
+  GetOptionInfo(PositionalOpts, SinkOpts, Map);
+  return;
+}
diff --git a/lib/Support/Compression.cpp b/lib/Support/Compression.cpp
new file mode 100644
index 0000000..fd8a874
--- /dev/null
+++ b/lib/Support/Compression.cpp
@@ -0,0 +1,97 @@
+//===--- Compression.cpp - Compression implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements compression functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Compression.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H
+#include <zlib.h>
+#endif
+
+using namespace llvm;
+
+#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ
+static int encodeZlibCompressionLevel(zlib::CompressionLevel Level) {
+  switch (Level) {
+    case zlib::NoCompression: return 0;
+    case zlib::BestSpeedCompression: return 1;
+    case zlib::DefaultCompression: return Z_DEFAULT_COMPRESSION;
+    case zlib::BestSizeCompression: return 9;
+  }
+  llvm_unreachable("Invalid zlib::CompressionLevel!");
+}
+
+static zlib::Status encodeZlibReturnValue(int ReturnValue) {
+  switch (ReturnValue) {
+    case Z_OK: return zlib::StatusOK;
+    case Z_MEM_ERROR: return zlib::StatusOutOfMemory;
+    case Z_BUF_ERROR: return zlib::StatusBufferTooShort;
+    case Z_STREAM_ERROR: return zlib::StatusInvalidArg;
+    case Z_DATA_ERROR: return zlib::StatusInvalidData;
+    default: llvm_unreachable("unknown zlib return status!");
+  }
+}
+
+bool zlib::isAvailable() { return true; }
+zlib::Status zlib::compress(StringRef InputBuffer,
+                            OwningPtr<MemoryBuffer> &CompressedBuffer,
+                            CompressionLevel Level) {
+  unsigned long CompressedSize = ::compressBound(InputBuffer.size());
+  OwningArrayPtr<char> TmpBuffer(new char[CompressedSize]);
+  int CLevel = encodeZlibCompressionLevel(Level);
+  Status Res = encodeZlibReturnValue(::compress2(
+      (Bytef *)TmpBuffer.get(), &CompressedSize,
+      (const Bytef *)InputBuffer.data(), InputBuffer.size(), CLevel));
+  if (Res == StatusOK) {
+    CompressedBuffer.reset(MemoryBuffer::getMemBufferCopy(
+        StringRef(TmpBuffer.get(), CompressedSize)));
+    // Tell MSan that memory initialized by zlib is valid.
+    __msan_unpoison(CompressedBuffer->getBufferStart(), CompressedSize);
+  }
+  return Res;
+}
+
+zlib::Status zlib::uncompress(StringRef InputBuffer,
+                              OwningPtr<MemoryBuffer> &UncompressedBuffer,
+                              size_t UncompressedSize) {
+  OwningArrayPtr<char> TmpBuffer(new char[UncompressedSize]);
+  Status Res = encodeZlibReturnValue(
+      ::uncompress((Bytef *)TmpBuffer.get(), (uLongf *)&UncompressedSize,
+                   (const Bytef *)InputBuffer.data(), InputBuffer.size()));
+  if (Res == StatusOK) {
+    UncompressedBuffer.reset(MemoryBuffer::getMemBufferCopy(
+        StringRef(TmpBuffer.get(), UncompressedSize)));
+    // Tell MSan that memory initialized by zlib is valid.
+    __msan_unpoison(UncompressedBuffer->getBufferStart(), UncompressedSize);
+  }
+  return Res;
+}
+
+#else
+bool zlib::isAvailable() { return false; }
+zlib::Status zlib::compress(StringRef InputBuffer,
+                            OwningPtr<MemoryBuffer> &CompressedBuffer,
+                            CompressionLevel Level) {
+  return zlib::StatusUnsupported;
+}
+zlib::Status zlib::uncompress(StringRef InputBuffer,
+                              OwningPtr<MemoryBuffer> &UncompressedBuffer,
+                              size_t UncompressedSize) {
+  return zlib::StatusUnsupported;
+}
+#endif
+
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
index 3d5cce0..a564d21 100644
--- a/lib/Support/DataExtractor.cpp
+++ b/lib/Support/DataExtractor.cpp
@@ -20,7 +20,7 @@ static T getU(uint32_t *offset_ptr, const DataExtractor *de,
   uint32_t offset = *offset_ptr;
   if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) {
     std::memcpy(&val, &Data[offset], sizeof(val));
-    if (sys::isLittleEndianHost() != isLittleEndian)
+    if (sys::IsLittleEndianHost != isLittleEndian)
       val = sys::SwapByteOrder(val);
 
     // Advance the offset
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 36e33b5..145f12d 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -101,7 +101,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
     // Otherwise do it the hard way.
     // To be compatible with above bulk transfer, we need to take endianness
     // into account.
-    if (sys::isBigEndianHost()) {
+    if (sys::IsBigEndianHost) {
       for (Pos += 4; Pos <= Size; Pos += 4) {
         unsigned V = ((unsigned char)String[Pos - 4] << 24) |
                      ((unsigned char)String[Pos - 3] << 16) |
@@ -110,7 +110,7 @@ void FoldingSetNodeID::AddString(StringRef String) {
         Bits.push_back(V);
       }
     } else {
-      assert(sys::isLittleEndianHost() && "Unexpected host endianness");
+      assert(sys::IsLittleEndianHost && "Unexpected host endianness");
       for (Pos += 4; Pos <= Size; Pos += 4) {
         unsigned V = ((unsigned char)String[Pos - 1] << 24) |
                      ((unsigned char)String[Pos - 2] << 16) |
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 73d98d1..a7c7a95 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -112,19 +112,19 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
 #endif
 }
 
-static bool OSHasAVXSupport() {
-#if defined(__GNUC__)
-  // Check xgetbv; this uses a .byte sequence instead of the instruction 
-  // directly because older assemblers do not include support for xgetbv and 
-  // there is no easy way to conditionally compile based on the assembler used.
-  int rEAX, rEDX;
-  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
-#elif defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219
-  unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
-#else
-  int rEAX = 0; // Ensures we return false
-#endif
-  return (rEAX & 6) == 6;
+static bool OSHasAVXSupport() {
+#if defined(__GNUC__)
+  // Check xgetbv; this uses a .byte sequence instead of the instruction
+  // directly because older assemblers do not include support for xgetbv and
+  // there is no easy way to conditionally compile based on the assembler used.
+  int rEAX, rEDX;
+  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
+#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
+  unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+#else
+  int rEAX = 0; // Ensures we return false
+#endif
+  return (rEAX & 6) == 6;
 }
 
 static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
@@ -355,10 +355,15 @@ std::string sys::getHostCPUName() {
       case 20:
         return "btver1";
       case 21:
-        if (Model <= 15)
-          return "bdver1";
-        else if (Model <= 31)
+        if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback.
+          return "btver1";
+        if (Model > 15 && Model <= 31)
           return "bdver2";
+        return "bdver1";
+      case 22:
+        if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback.
+          return "btver1";
+        return "btver2";
     default:
       return "generic";
     }
@@ -608,7 +613,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features){
 #endif
 
 std::string sys::getProcessTriple() {
-  Triple PT(LLVM_HOSTTRIPLE);
+  Triple PT(LLVM_HOST_TRIPLE);
 
   if (sizeof(void *) == 8 && PT.isArch32Bit())
     PT = PT.get64BitArchVariant();
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index 92d8b83..2917e27 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -174,8 +174,8 @@ void LockFileManager::waitForUnlock() {
   Interval.tv_sec = 0;
   Interval.tv_nsec = 1000000;
 #endif
-  // Don't wait more than an hour for the file to appear.
-  const unsigned MaxSeconds = 3600;
+  // Don't wait more than five minutes for the file to appear.
+  unsigned MaxSeconds = 300;
   bool LockFileGone = false;
   do {
     // Sleep for the designated interval, to allow the owning process time to
@@ -187,21 +187,48 @@ void LockFileManager::waitForUnlock() {
 #else
     nanosleep(&Interval, NULL);
 #endif
-    // If the lock file no longer exists, wait for the actual file.
     bool Exists = false;
+    bool LockFileJustDisappeared = false;
+
+    // If the lock file is still expected to be there, check whether it still
+    // is.
     if (!LockFileGone) {
       if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists) {
         LockFileGone = true;
+        LockFileJustDisappeared = true;
         Exists = false;
       }
     }
+
+    // If the lock file is no longer there, check if the original file is
+    // available now.
     if (LockFileGone) {
-      if (!sys::fs::exists(FileName.str(), Exists) && Exists)
+      if (!sys::fs::exists(FileName.str(), Exists) && Exists) {
         return;
+      }
+
+      // The lock file is gone, so now we're waiting for the original file to
+      // show up. If this just happened, reset our waiting intervals and keep
+      // waiting.
+      if (LockFileJustDisappeared) {
+        MaxSeconds = 5;
+
+#if LLVM_ON_WIN32
+        Interval = 1;
+#else
+        Interval.tv_sec = 0;
+        Interval.tv_nsec = 1000000;
+#endif
+        continue;
+      }
     }
 
-    if (!processStillExecuting((*Owner).first, (*Owner).second))
+    // If we're looking for the lock file to disappear, but the process
+    // owning the lock died without cleaning up, just bail out.
+    if (!LockFileGone &&
+        !processStillExecuting((*Owner).first, (*Owner).second)) {
       return;
+    }
 
     // Exponentially increase the time we wait for the lock to be removed.
 #if LLVM_ON_WIN32
diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp
index 58a6ea7..ac53a9e9 100644
--- a/lib/Support/PathV2.cpp
+++ b/lib/Support/PathV2.cpp
@@ -789,8 +789,11 @@ file_magic identify_magic(StringRef magic) {
 
     case '\177':
       if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
-        if (magic.size() >= 18 && magic[17] == 0)
-          switch (magic[16]) {
+        bool Data2MSB = magic[5] == 2;
+        unsigned high = Data2MSB ? 16 : 17;
+        unsigned low  = Data2MSB ? 17 : 16;
+        if (magic.size() >= 18 && magic[high] == 0)
+          switch (magic[low]) {
             default: break;
             case 1: return file_magic::elf_relocatable;
             case 2: return file_magic::elf_executable;
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index d2508ac..412e34c 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -32,6 +32,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case r600:    return "r600";
   case sparc:   return "sparc";
   case sparcv9: return "sparcv9";
+  case systemz: return "s390x";
   case tce:     return "tce";
   case thumb:   return "thumb";
   case x86:     return "i386";
@@ -76,6 +77,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case sparcv9:
   case sparc:   return "sparc";
 
+  case systemz: return "systemz";
+
   case x86:
   case x86_64:  return "x86";
 
@@ -170,6 +173,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("hexagon", hexagon)
     .Case("sparc", sparc)
     .Case("sparcv9", sparcv9)
+    .Case("systemz", systemz)
     .Case("tce", tce)
     .Case("thumb", thumb)
     .Case("x86", x86)
@@ -233,6 +237,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("mips64el", Triple::mips64el)
     .Case("r600", Triple::r600)
     .Case("hexagon", Triple::hexagon)
+    .Case("s390x", Triple::systemz)
     .Case("sparc", Triple::sparc)
     .Case("sparcv9", Triple::sparcv9)
     .Case("tce", Triple::tce)
@@ -687,6 +692,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::nvptx64:
   case llvm::Triple::ppc64:
   case llvm::Triple::sparcv9:
+  case llvm::Triple::systemz:
   case llvm::Triple::x86_64:
   case llvm::Triple::spir64:
     return 64;
@@ -712,6 +718,7 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::UnknownArch:
   case Triple::aarch64:
   case Triple::msp430:
+  case Triple::systemz:
     T.setArch(UnknownArch);
     break;
 
@@ -769,6 +776,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::nvptx64:
   case Triple::ppc64:
   case Triple::sparcv9:
+  case Triple::systemz:
   case Triple::x86_64:
     // Already 64-bit.
     break;
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index e9b26bd..72a8af6 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -325,7 +325,7 @@ void Memory::InvalidateInstructionCache(const void *Addr,
   for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
     asm volatile("icbi 0, %0" : : "r"(Line));
   asm volatile("isync");
-#  elif defined(__arm__) && defined(__GNUC__)
+#  elif (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
   // FIXME: Can we safely always call this for __GNUC__ everywhere?
   const char *Start = static_cast<const char *>(Addr);
   const char *End = Start + Len;
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index a3dfd4b..7e0aead 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -430,9 +430,7 @@ rety_open_create:
     if (SavedErrno == errc::file_exists)
       goto retry_random_path;
     // If path prefix doesn't exist, try to create it.
-    if (SavedErrno == errc::no_such_file_or_directory &&
-        !exists(path::parent_path(RandomPath)) &&
-        !TriedToCreateParent) {
+    if (SavedErrno == errc::no_such_file_or_directory && !TriedToCreateParent) {
       TriedToCreateParent = true;
       StringRef p(RandomPath);
       SmallString<64> dir_to_create;
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 117151c..aa03d48 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -32,6 +32,9 @@
 #if HAVE_FCNTL_H
 #include <fcntl.h>
 #endif
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 #ifdef HAVE_POSIX_SPAWN
 #include <spawn.h>
 #if !defined(__APPLE__)
@@ -409,4 +412,25 @@ error_code Program::ChangeStderrToBinary(){
   return make_error_code(errc::success);
 }
 
+bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
+  static long ArgMax = sysconf(_SC_ARG_MAX);
+
+  // System says no practical limit.
+  if (ArgMax == -1)
+    return true;
+
+  // Conservatively account for space required by environment variables.
+  ArgMax /= 2;
+
+  size_t ArgLength = 0;
+  for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
+       I != E; ++I) {
+    ArgLength += strlen(*I) + 1;
+    if (ArgLength > size_t(ArgMax)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 }
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 66338f1..64d1fc1 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -27,10 +27,12 @@
 #if HAVE_SYS_STAT_H
 #include <sys/stat.h>
 #endif
-#if HAVE_DLFCN_H && __GNUG__
-#include <dlfcn.h>
+#if HAVE_CXXABI_H
 #include <cxxabi.h>
 #endif
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
 #if HAVE_MACH_MACH_H
 #include <mach/mach.h>
 #endif
@@ -184,6 +186,15 @@ static RETSIGTYPE SignalHandler(int Sig) {
   // Otherwise if it is a fault (like SEGV) run any handler.
   for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i)
     CallBacksToRun[i].first(CallBacksToRun[i].second);
+
+#ifdef __s390__
+  // On S/390, certain signals are delivered with PSW Address pointing to
+  // *after* the faulting instruction.  Simply returning from the signal
+  // handler would continue execution after that point, instead of
+  // re-raising the signal.  Raise the signal manually in those cases.
+  if (Sig == SIGILL || Sig == SIGFPE || Sig == SIGTRAP)
+    raise(Sig);
+#endif
 }
 
 void llvm::sys::RunInterruptHandlers() {
@@ -290,9 +301,13 @@ void llvm::sys::PrintStackTrace(FILE *FD) {
             (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
 
     if (dlinfo.dli_sname != NULL) {
-      int res;
       fputc(' ', FD);
+#  if HAVE_CXXABI_H
+      int res;
       char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res);
+#  else
+      char* d = NULL;
+#  endif
       if (d == NULL) fputs(dlinfo.dli_sname, FD);
       else           fputs(d, FD);
       free(d);
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index 691d6d4..619ae5d 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -126,20 +126,58 @@ static bool ArgNeedsQuotes(const char *Str) {
   return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0;
 }
 
+/// CountPrecedingBackslashes - Returns the number of backslashes preceding Cur
+/// in the C string Start.
+static unsigned int CountPrecedingBackslashes(const char *Start,
+                                              const char *Cur) {
+  unsigned int Count = 0;
+  --Cur;
+  while (Cur >= Start && *Cur == '\\') {
+    ++Count;
+    --Cur;
+  }
+  return Count;
+}
+
+/// EscapePrecedingEscapes - Append a backslash to Dst for every backslash
+/// preceding Cur in the Start string.  Assumes Dst has enough space.
+static char *EscapePrecedingEscapes(char *Dst, const char *Start,
+                                    const char *Cur) {
+  unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Cur);
+  while (PrecedingEscapes > 0) {
+    *Dst++ = '\\';
+    --PrecedingEscapes;
+  }
+  return Dst;
+}
 
 /// ArgLenWithQuotes - Check whether argument needs to be quoted when calling
 /// CreateProcess and returns length of quoted arg with escaped quotes
 static unsigned int ArgLenWithQuotes(const char *Str) {
-  unsigned int len = ArgNeedsQuotes(Str) ? 2 : 0;
+  const char *Start = Str;
+  bool Quoted = ArgNeedsQuotes(Str);
+  unsigned int len = Quoted ? 2 : 0;
 
   while (*Str != '\0') {
-    if (*Str == '\"')
-      ++len;
+    if (*Str == '\"') {
+      // We need to add a backslash, but ensure that it isn't escaped.
+      unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str);
+      len += PrecedingEscapes + 1;
+    }
+    // Note that we *don't* need to escape runs of backslashes that don't
+    // precede a double quote!  See MSDN:
+    // http://msdn.microsoft.com/en-us/library/17w5ykft%28v=vs.85%29.aspx
 
     ++len;
     ++Str;
   }
 
+  if (Quoted) {
+    // Make sure the closing quote doesn't get escaped by a trailing backslash.
+    unsigned PrecedingEscapes = CountPrecedingBackslashes(Start, Str);
+    len += PrecedingEscapes + 1;
+  }
+
   return len;
 }
 
@@ -180,20 +218,27 @@ Program::Execute(const Path& path,
 
   for (unsigned i = 0; args[i]; i++) {
     const char *arg = args[i];
+    const char *start = arg;
 
     bool needsQuoting = ArgNeedsQuotes(arg);
     if (needsQuoting)
       *p++ = '"';
 
     while (*arg != '\0') {
-      if (*arg == '\"')
+      if (*arg == '\"') {
+        // Escape all preceding escapes (if any), and then escape the quote.
+        p = EscapePrecedingEscapes(p, start, arg);
         *p++ = '\\';
+      }
 
       *p++ = *arg++;
     }
 
-    if (needsQuoting)
+    if (needsQuoting) {
+      // Make sure our quote doesn't get escaped by a trailing backslash.
+      p = EscapePrecedingEscapes(p, start, arg);
       *p++ = '"';
+    }
     *p++ = ' ';
   }
 
@@ -396,4 +441,20 @@ error_code Program::ChangeStderrToBinary(){
   return make_error_code(errc::success);
 }
 
+bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
+  // The documented max length of the command line passed to CreateProcess.
+  static const size_t MaxCommandStringLength = 32768;
+  size_t ArgLength = 0;
+  for (ArrayRef<const char*>::iterator I = Args.begin(), E = Args.end();
+       I != E; ++I) {
+    // Account for the trailing space for every arg but the last one and the
+    // trailing NULL of the last argument.
+    ArgLength += ArgLenWithQuotes(*I) + 1;
+    if (ArgLength > MaxCommandStringLength) {
+      return false;
+    }
+  }
+  return true;
+}
+
 }
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 3dd6660..b18b4d1 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -178,6 +178,19 @@ namespace llvm {
 //===----------------------------------------------------------------------===//
 
 #ifdef _MSC_VER
+/// AvoidMessageBoxHook - Emulates hitting "retry" from an "abort, retry,
+/// ignore" CRT debug report dialog.  "retry" raises an exception which
+/// ultimately triggers our stack dumper.
+static int AvoidMessageBoxHook(int ReportType, char *Message, int *Return) {
+  // Set *Return to the retry code for the return value of _CrtDbgReport:
+  // http://msdn.microsoft.com/en-us/library/8hyw4sy7(v=vs.71).aspx
+  // This may also trigger just-in-time debugging via DebugBreak().
+  if (Return)
+    *Return = 1;
+  // Don't call _CrtDbgReport.
+  return TRUE;
+}
+
 /// CRTReportHook - Function called on a CRT debugging event.
 static int CRTReportHook(int ReportType, char *Message, int *Return) {
   // Don't cause a DebugBreak() on return.
@@ -238,6 +251,15 @@ static void RegisterHandler() {
   OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter);
   SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
 
+#ifdef _MSC_VER
+  const char *EnableMsgbox = getenv("LLVM_ENABLE_CRT_REPORT");
+  if (!EnableMsgbox || strcmp("0", EnableMsgbox) == 0) {
+    // Setting a report hook overrides the default behavior of popping an "abort,
+    // retry, or ignore" dialog.
+    _CrtSetReportHook(AvoidMessageBoxHook);
+  }
+#endif
+
   // Environment variable to disable any kind of crash dialog.
   if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
 #ifdef _MSC_VER
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 2cead20..213f5e1 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -260,7 +260,7 @@ public:
   Token getNext();
 
   void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
-                  ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+                  ArrayRef<SMRange> Ranges = None) {
     SM.PrintMessage(Loc, Kind, Message, Ranges);
   }
 
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index dc41f2f..daa7f1d 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -367,9 +367,8 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   // shoving a base register and an offset into the instruction then we may well
   // need to scavenge registers. We should either specifically add an
   // callee-save register for this purpose or allocate an extra spill slot.
-
   bool BigStack =
-    (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF))
+    MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)
     || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
     || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
 
@@ -392,6 +391,8 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   if (ExtraReg != 0) {
     MF.getRegInfo().setPhysRegUsed(ExtraReg);
   } else {
+    assert(RS && "Expect register scavenger to be available");
+
     // Create a stack slot for scavenging purposes. PrologEpilogInserter
     // helpfully places it near either SP or FP for us to avoid
     // infinitely-regression during scavenging.
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 46b8221..102c71b 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -70,6 +70,15 @@ public:
     return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
   }
 
+  /// Used for pre-lowered address-reference nodes, so we already know
+  /// the fields match. This operand's job is simply to add an
+  /// appropriate shift operand (i.e. 0) to the MOVZ/MOVK instruction.
+  bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) {
+    Imm = N;
+    Shift = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
   bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
 
   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
@@ -88,6 +97,13 @@ public:
 
   bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
 
+  SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32,
+                       unsigned Op64);
+
+  /// Put the given constant into a pool and return a DAG which will give its
+  /// address.
+  SDValue getConstantPoolItemAddress(DebugLoc DL, const Constant *CV);
+
   SDNode *TrySelectToMoveImm(SDNode *N);
   SDNode *LowerToFPLitPool(SDNode *Node);
   SDNode *SelectToLitPool(SDNode *N);
@@ -224,12 +240,51 @@ SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
   return ResNode;
 }
 
+SDValue
+AArch64DAGToDAGISel::getConstantPoolItemAddress(DebugLoc DL,
+                                                const Constant *CV) {
+  EVT PtrVT = TLI.getPointerTy();
+
+  switch (TLI.getTargetMachine().getCodeModel()) {
+  case CodeModel::Small: {
+    unsigned Alignment =
+        TLI.getDataLayout()->getABITypeAlignment(CV->getType());
+    return CurDAG->getNode(
+        AArch64ISD::WrapperSmall, DL, PtrVT,
+        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG),
+        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12),
+        CurDAG->getConstant(Alignment, MVT::i32));
+  }
+  case CodeModel::Large: {
+    SDNode *LitAddr;
+    LitAddr = CurDAG->getMachineNode(
+        AArch64::MOVZxii, DL, PtrVT,
+        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
+        CurDAG->getTargetConstant(0, MVT::i32));
+    LitAddr = CurDAG->getMachineNode(
+        AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
+        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
+        CurDAG->getTargetConstant(0, MVT::i32));
+    LitAddr = CurDAG->getMachineNode(
+        AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
+        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
+        CurDAG->getTargetConstant(0, MVT::i32));
+    LitAddr = CurDAG->getMachineNode(
+        AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
+        CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC),
+        CurDAG->getTargetConstant(0, MVT::i32));
+    return SDValue(LitAddr, 0);
+  }
+  default:
+    llvm_unreachable("Only small and large code models supported now");
+  }
+}
+
 SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
   DebugLoc DL = Node->getDebugLoc();
   uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
   int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
   EVT DestType = Node->getValueType(0);
-  EVT PtrVT = TLI.getPointerTy();
 
   // Since we may end up loading a 64-bit constant from a 32-bit entry the
   // constant in the pool may have a different type to the eventual node.
@@ -256,14 +311,8 @@ SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
   Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
                                                   MemType.getSizeInBits()),
                                   UnsignedVal);
-  SDValue PoolAddr;
+  SDValue PoolAddr = getConstantPoolItemAddress(DL, CV);
   unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(CV->getType());
-  PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
-                             CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
-                                                         AArch64II::MO_NO_FLAG),
-                             CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0,
-                                                           AArch64II::MO_LO12),
-                             CurDAG->getConstant(Alignment, MVT::i32));
 
   return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
                             PoolAddr,
@@ -276,20 +325,10 @@ SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
 SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
   DebugLoc DL = Node->getDebugLoc();
   const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
-  EVT PtrVT = TLI.getPointerTy();
   EVT DestType = Node->getValueType(0);
 
   unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(FV->getType());
-  SDValue PoolAddr;
-
-  assert(TM.getCodeModel() == CodeModel::Small &&
-         "Only small code model supported");
-  PoolAddr = CurDAG->getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
-                             CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
-                                                         AArch64II::MO_NO_FLAG),
-                             CurDAG->getTargetConstantPool(FV, PtrVT, 0, 0,
-                                                           AArch64II::MO_LO12),
-                             CurDAG->getConstant(Alignment, MVT::i32));
+  SDValue PoolAddr = getConstantPoolItemAddress(DL, FV);
 
   return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
                          MachinePointerInfo::getConstantPool(),
@@ -318,6 +357,38 @@ AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
   return true;
 }
 
+SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
+                                          unsigned Op16,unsigned Op32,
+                                          unsigned Op64) {
+  // Mostly direct translation to the given operations, except that we preserve
+  // the AtomicOrdering for use later on.
+  AtomicSDNode *AN = cast<AtomicSDNode>(Node);
+  EVT VT = AN->getMemoryVT();
+
+  unsigned Op;
+  if (VT == MVT::i8)
+    Op = Op8;
+  else if (VT == MVT::i16)
+    Op = Op16;
+  else if (VT == MVT::i32)
+    Op = Op32;
+  else if (VT == MVT::i64)
+    Op = Op64;
+  else
+    llvm_unreachable("Unexpected atomic operation");
+
+  SmallVector<SDValue, 4> Ops;
+  for (unsigned i = 1; i < AN->getNumOperands(); ++i)
+      Ops.push_back(AN->getOperand(i));
+
+  Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
+  Ops.push_back(AN->getOperand(0)); // Chain moves to the end
+
+  return CurDAG->SelectNodeTo(Node, Op,
+                              AN->getValueType(0), MVT::Other,
+                              &Ops[0], Ops.size());
+}
+
 SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
   // Dump information about the Node being selected
   DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
@@ -328,6 +399,78 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
   }
 
   switch (Node->getOpcode()) {
+  case ISD::ATOMIC_LOAD_ADD:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_ADD_I8,
+                        AArch64::ATOMIC_LOAD_ADD_I16,
+                        AArch64::ATOMIC_LOAD_ADD_I32,
+                        AArch64::ATOMIC_LOAD_ADD_I64);
+  case ISD::ATOMIC_LOAD_SUB:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_SUB_I8,
+                        AArch64::ATOMIC_LOAD_SUB_I16,
+                        AArch64::ATOMIC_LOAD_SUB_I32,
+                        AArch64::ATOMIC_LOAD_SUB_I64);
+  case ISD::ATOMIC_LOAD_AND:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_AND_I8,
+                        AArch64::ATOMIC_LOAD_AND_I16,
+                        AArch64::ATOMIC_LOAD_AND_I32,
+                        AArch64::ATOMIC_LOAD_AND_I64);
+  case ISD::ATOMIC_LOAD_OR:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_OR_I8,
+                        AArch64::ATOMIC_LOAD_OR_I16,
+                        AArch64::ATOMIC_LOAD_OR_I32,
+                        AArch64::ATOMIC_LOAD_OR_I64);
+  case ISD::ATOMIC_LOAD_XOR:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_XOR_I8,
+                        AArch64::ATOMIC_LOAD_XOR_I16,
+                        AArch64::ATOMIC_LOAD_XOR_I32,
+                        AArch64::ATOMIC_LOAD_XOR_I64);
+  case ISD::ATOMIC_LOAD_NAND:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_NAND_I8,
+                        AArch64::ATOMIC_LOAD_NAND_I16,
+                        AArch64::ATOMIC_LOAD_NAND_I32,
+                        AArch64::ATOMIC_LOAD_NAND_I64);
+  case ISD::ATOMIC_LOAD_MIN:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_MIN_I8,
+                        AArch64::ATOMIC_LOAD_MIN_I16,
+                        AArch64::ATOMIC_LOAD_MIN_I32,
+                        AArch64::ATOMIC_LOAD_MIN_I64);
+  case ISD::ATOMIC_LOAD_MAX:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_MAX_I8,
+                        AArch64::ATOMIC_LOAD_MAX_I16,
+                        AArch64::ATOMIC_LOAD_MAX_I32,
+                        AArch64::ATOMIC_LOAD_MAX_I64);
+  case ISD::ATOMIC_LOAD_UMIN:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_UMIN_I8,
+                        AArch64::ATOMIC_LOAD_UMIN_I16,
+                        AArch64::ATOMIC_LOAD_UMIN_I32,
+                        AArch64::ATOMIC_LOAD_UMIN_I64);
+  case ISD::ATOMIC_LOAD_UMAX:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_LOAD_UMAX_I8,
+                        AArch64::ATOMIC_LOAD_UMAX_I16,
+                        AArch64::ATOMIC_LOAD_UMAX_I32,
+                        AArch64::ATOMIC_LOAD_UMAX_I64);
+  case ISD::ATOMIC_SWAP:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_SWAP_I8,
+                        AArch64::ATOMIC_SWAP_I16,
+                        AArch64::ATOMIC_SWAP_I32,
+                        AArch64::ATOMIC_SWAP_I64);
+  case ISD::ATOMIC_CMP_SWAP:
+    return SelectAtomic(Node,
+                        AArch64::ATOMIC_CMP_SWAP_I8,
+                        AArch64::ATOMIC_CMP_SWAP_I16,
+                        AArch64::ATOMIC_CMP_SWAP_I32,
+                        AArch64::ATOMIC_CMP_SWAP_I64);
   case ISD::FrameIndex: {
     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
     EVT PtrTy = TLI.getPointerTy();
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index e9f4497..56f6751 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -59,13 +59,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
 
   computeRegisterProperties();
 
-  // Some atomic operations can be folded into load-acquire or store-release
-  // instructions on AArch64. It's marginally simpler to let LLVM expand
-  // everything out to a barrier and then recombine the (few) barriers we can.
-  setInsertFencesForAtomic(true);
-  setTargetDAGCombine(ISD::ATOMIC_FENCE);
-  setTargetDAGCombine(ISD::ATOMIC_STORE);
-
   // We combine OR nodes for bitfield and NEON BSL operations.
   setTargetDAGCombine(ISD::OR);
 
@@ -275,27 +268,34 @@ EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const {
   return VT.changeVectorElementTypeToInteger();
 }
 
-static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc,
-                                  unsigned &strOpc) {
-  switch (Size) {
-  default: llvm_unreachable("unsupported size for atomic binary op!");
-  case 1:
-    ldrOpc = AArch64::LDXR_byte;
-    strOpc = AArch64::STXR_byte;
-    break;
-  case 2:
-    ldrOpc = AArch64::LDXR_hword;
-    strOpc = AArch64::STXR_hword;
-    break;
-  case 4:
-    ldrOpc = AArch64::LDXR_word;
-    strOpc = AArch64::STXR_word;
-    break;
-  case 8:
-    ldrOpc = AArch64::LDXR_dword;
-    strOpc = AArch64::STXR_dword;
-    break;
-  }
+static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
+                                  unsigned &LdrOpc,
+                                  unsigned &StrOpc) {
+  static unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
+                                 AArch64::LDXR_word, AArch64::LDXR_dword};
+  static unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
+                                AArch64::LDAXR_word, AArch64::LDAXR_dword};
+  static unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
+                                  AArch64::STXR_word, AArch64::STXR_dword};
+  static unsigned StoreRels[] = {AArch64::STLXR_byte, AArch64::STLXR_hword,
+                                 AArch64::STLXR_word, AArch64::STLXR_dword};
+
+  unsigned *LoadOps, *StoreOps;
+  if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+    LoadOps = LoadAcqs;
+  else
+    LoadOps = LoadBares;
+
+  if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+    StoreOps = StoreRels;
+  else
+    StoreOps = StoreBares;
+
+  assert(isPowerOf2_32(Size) && Size <= 8 &&
+         "unsupported size for atomic binary op!");
+
+  LdrOpc = LoadOps[Log2_32(Size)];
+  StrOpc = StoreOps[Log2_32(Size)];
 }
 
 MachineBasicBlock *
@@ -313,12 +313,13 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   unsigned dest = MI->getOperand(0).getReg();
   unsigned ptr = MI->getOperand(1).getReg();
   unsigned incr = MI->getOperand(2).getReg();
+  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
   DebugLoc dl = MI->getDebugLoc();
 
   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
 
   unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, ldrOpc, strOpc);
+  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
 
   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -397,6 +398,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
   unsigned dest = MI->getOperand(0).getReg();
   unsigned ptr = MI->getOperand(1).getReg();
   unsigned incr = MI->getOperand(2).getReg();
+  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
+
   unsigned oldval = dest;
   DebugLoc dl = MI->getDebugLoc();
 
@@ -411,7 +414,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
   }
 
   unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, ldrOpc, strOpc);
+  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
 
   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -479,6 +482,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
   unsigned ptr     = MI->getOperand(1).getReg();
   unsigned oldval  = MI->getOperand(2).getReg();
   unsigned newval  = MI->getOperand(3).getReg();
+  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
 
@@ -487,7 +491,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
   TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
 
   unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, ldrOpc, strOpc);
+  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
 
   MachineFunction *MF = BB->getParent();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -777,6 +781,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case AArch64ISD::TC_RETURN:      return "AArch64ISD::TC_RETURN";
   case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
   case AArch64ISD::TLSDESCCALL:    return "AArch64ISD::TLSDESCCALL";
+  case AArch64ISD::WrapperLarge:   return "AArch64ISD::WrapperLarge";
   case AArch64ISD::WrapperSmall:   return "AArch64ISD::WrapperSmall";
 
   default:                       return NULL;
@@ -1662,17 +1667,26 @@ AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   EVT PtrVT = getPointerTy();
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
 
-  assert(getTargetMachine().getCodeModel() == CodeModel::Small
-         && "Only small code model supported at the moment");
-
-  // The most efficient code is PC-relative anyway for the small memory model,
-  // so we don't need to worry about relocation model.
-  return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
-                     DAG.getTargetBlockAddress(BA, PtrVT, 0,
-                                               AArch64II::MO_NO_FLAG),
-                     DAG.getTargetBlockAddress(BA, PtrVT, 0,
-                                               AArch64II::MO_LO12),
-                     DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
+  switch(getTargetMachine().getCodeModel()) {
+  case CodeModel::Small:
+    // The most efficient code is PC-relative anyway for the small memory model,
+    // so we don't need to worry about relocation model.
+    return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+                       DAG.getTargetBlockAddress(BA, PtrVT, 0,
+                                                 AArch64II::MO_NO_FLAG),
+                       DAG.getTargetBlockAddress(BA, PtrVT, 0,
+                                                 AArch64II::MO_LO12),
+                       DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
+  case CodeModel::Large:
+    return DAG.getNode(
+      AArch64ISD::WrapperLarge, DL, PtrVT,
+      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3),
+      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
+      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
+      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
+  default:
+    llvm_unreachable("Only small and large code models supported now");
+  }
 }
 
 
@@ -1841,12 +1855,33 @@ AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
 }
 
 SDValue
-AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
-                                             SelectionDAG &DAG) const {
-  // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
-  // we make that distinction here.
+AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  assert(getTargetMachine().getCodeModel() == CodeModel::Large);
+  assert(getTargetMachine().getRelocationModel() == Reloc::Static);
+
+  EVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *GV = GN->getGlobal();
+
+  SDValue GlobalAddr = DAG.getNode(
+      AArch64ISD::WrapperLarge, dl, PtrVT,
+      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3),
+      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
+      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
+      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
 
-  // We support the small memory model for now.
+  if (GN->getOffset() != 0)
+    return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
+                       DAG.getConstant(GN->getOffset(), PtrVT));
+
+  return GlobalAddr;
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op,
+                                                  SelectionDAG &DAG) const {
   assert(getTargetMachine().getCodeModel() == CodeModel::Small);
 
   EVT PtrVT = getPointerTy();
@@ -1925,6 +1960,22 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
   return GlobalRef;
 }
 
+SDValue
+AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
+  // we make those distinctions here.
+
+  switch (getTargetMachine().getCodeModel()) {
+  case CodeModel::Small:
+    return LowerGlobalAddressELFSmall(Op, DAG);
+  case CodeModel::Large:
+    return LowerGlobalAddressELFLarge(Op, DAG);
+  default:
+    llvm_unreachable("Only small and large code models supported now");
+  }
+}
+
 SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
                                                 SDValue DescAddr,
                                                 DebugLoc DL,
@@ -1974,6 +2025,8 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                              SelectionDAG &DAG) const {
   assert(Subtarget->isTargetELF() &&
          "TLS not implemented for non-ELF targets");
+  assert(getTargetMachine().getCodeModel() == CodeModel::Small
+         && "TLS only supported in small memory model");
   const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
 
   TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
@@ -2082,14 +2135,27 @@ SDValue
 AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
   DebugLoc dl = JT->getDebugLoc();
+  EVT PtrVT = getPointerTy();
 
   // When compiling PIC, jump tables get put in the code section so a static
   // relocation-style is acceptable for both cases.
-  return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(),
-                     DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()),
-                     DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
-                                            AArch64II::MO_LO12),
-                     DAG.getConstant(1, MVT::i32));
+  switch (getTargetMachine().getCodeModel()) {
+  case CodeModel::Small:
+    return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
+                       DAG.getTargetJumpTable(JT->getIndex(), PtrVT),
+                       DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+                                              AArch64II::MO_LO12),
+                       DAG.getConstant(1, MVT::i32));
+  case CodeModel::Large:
+    return DAG.getNode(
+      AArch64ISD::WrapperLarge, dl, PtrVT,
+      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3),
+      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC),
+      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC),
+      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC));
+  default:
+    llvm_unreachable("Only small and large code models supported now");
+  }
 }
 
 // (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
@@ -2377,78 +2443,6 @@ static SDValue PerformANDCombine(SDNode *N,
                      DAG.getConstant(LSB + Width - 1, MVT::i64));
 }
 
-static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode,
-                                         TargetLowering::DAGCombinerInfo &DCI) {
-  // An atomic operation followed by an acquiring atomic fence can be reduced to
-  // an acquiring load. The atomic operation provides a convenient pointer to
-  // load from. If the original operation was a load anyway we can actually
-  // combine the two operations into an acquiring load.
-  SelectionDAG &DAG = DCI.DAG;
-  SDValue AtomicOp = FenceNode->getOperand(0);
-  AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp);
-
-  // A fence on its own can't be optimised
-  if (!AtomicNode)
-    return SDValue();
-
-  AtomicOrdering FenceOrder
-    = static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1));
-  SynchronizationScope FenceScope
-    = static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2));
-
-  if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope())
-    return SDValue();
-
-  // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so
-  // the chain we use should be its input, otherwise we'll put our store after
-  // it so we use its output chain.
-  SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ?
-    AtomicNode->getChain() : AtomicOp;
-
-  // We have an acquire fence with a handy atomic operation nearby, we can
-  // convert the fence into a load-acquire, discarding the result.
-  DebugLoc DL = FenceNode->getDebugLoc();
-  SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(),
-                             AtomicNode->getValueType(0),
-                             Chain,                  // Chain
-                             AtomicOp.getOperand(1), // Pointer
-                             AtomicNode->getMemOperand(), Acquire,
-                             FenceScope);
-
-  if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD)
-    DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode());
-
-  return Op.getValue(1);
-}
-
-static SDValue PerformATOMIC_STORECombine(SDNode *N,
-                                         TargetLowering::DAGCombinerInfo &DCI) {
-  // A releasing atomic fence followed by an atomic store can be combined into a
-  // single store operation.
-  SelectionDAG &DAG = DCI.DAG;
-  AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N);
-  SDValue FenceOp = AtomicNode->getOperand(0);
-
-  if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE)
-    return SDValue();
-
-  AtomicOrdering FenceOrder
-    = static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1));
-  SynchronizationScope FenceScope
-    = static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2));
-
-  if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope())
-    return SDValue();
-
-  DebugLoc DL = AtomicNode->getDebugLoc();
-  return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(),
-                       FenceOp.getOperand(0),  // Chain
-                       AtomicNode->getOperand(1),       // Pointer
-                       AtomicNode->getOperand(2),       // Value
-                       AtomicNode->getMemOperand(), Release,
-                       FenceScope);
-}
-
 /// For a true bitfield insert, the bits getting into that contiguous mask
 /// should come from the low part of an existing value: they must be formed from
 /// a compatible SHL operation (unless they're already low). This function
@@ -2804,8 +2798,6 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   switch (N->getOpcode()) {
   default: break;
   case ISD::AND: return PerformANDCombine(N, DCI);
-  case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI);
-  case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI);
   case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
   case ISD::SRA: return PerformSRACombine(N, DCI);
   }
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 4960d28..d49b3ee 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -103,7 +103,12 @@ namespace AArch64ISD {
     UBFX,
 
     // Wraps an address which the ISelLowering phase has decided should be
-    // created using the small absolute memory model: i.e. adrp/add or
+    // created using the large memory model style: i.e. a sequence of four
+    // movz/movk instructions.
+    WrapperLarge,
+
+    // Wraps an address which the ISelLowering phase has decided should be
+    // created using the small memory model style: i.e. adrp/add or
     // adrp/mem-op. This exists to prevent bare TargetAddresses which may never
     // get selected.
     WrapperSmall
@@ -206,7 +211,11 @@ public:
   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
+
+  SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+
   SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL,
                            SelectionDAG &DAG) const;
   SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index cb93471..9dd122f 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 // This file describes AArch64 instruction formats, down to the level of the
 // instruction's overall class.
-// ===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 37be5e4..d2cfc7d 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -70,12 +70,20 @@ def A64cmn : PatFrag<(ops node:$lhs, node:$rhs),
 //       made for a variable/address at ISelLowering.
 //     + The output of ISelLowering should be selectable (hence the Wrapper,
 //       rather than a bare target opcode)
-def SDTAArch64Wrapper : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
-                                             SDTCisSameAs<1, 2>,
-                                             SDTCisVT<3, i32>,
-                                             SDTCisPtrTy<0>]>;
+def SDTAArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+                                                  SDTCisSameAs<0, 2>,
+                                                  SDTCisSameAs<0, 3>,
+                                                  SDTCisSameAs<0, 4>,
+                                                  SDTCisPtrTy<0>]>;
 
-def A64WrapperSmall : SDNode<"AArch64ISD::WrapperSmall", SDTAArch64Wrapper>;
+def A64WrapperLarge :SDNode<"AArch64ISD::WrapperLarge", SDTAArch64WrapperLarge>;
+
+def SDTAArch64WrapperSmall : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+                                                  SDTCisSameAs<1, 2>,
+                                                  SDTCisVT<3, i32>,
+                                                  SDTCisPtrTy<0>]>;
+
+def A64WrapperSmall :SDNode<"AArch64ISD::WrapperSmall", SDTAArch64WrapperSmall>;
 
 
 def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
@@ -159,49 +167,55 @@ let Defs = [XSP], Uses = [XSP] in {
 // Atomic operation pseudo-instructions
 //===----------------------------------------------------------------------===//
 
-let usesCustomInserter = 1 in {
-multiclass AtomicSizes<string opname> {
-  def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
-          [(set i32:$dst, (!cast<SDNode>(opname # "_8") i64:$ptr, i32:$incr))]>;
-  def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
-         [(set i32:$dst, (!cast<SDNode>(opname # "_16") i64:$ptr, i32:$incr))]>;
-  def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
-         [(set i32:$dst, (!cast<SDNode>(opname # "_32") i64:$ptr, i32:$incr))]>;
-  def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr),
-         [(set i64:$dst, (!cast<SDNode>(opname # "_64") i64:$ptr, i64:$incr))]>;
-}
-}
-
-defm ATOMIC_LOAD_ADD  : AtomicSizes<"atomic_load_add">;
-defm ATOMIC_LOAD_SUB  : AtomicSizes<"atomic_load_sub">;
-defm ATOMIC_LOAD_AND  : AtomicSizes<"atomic_load_and">;
-defm ATOMIC_LOAD_OR   : AtomicSizes<"atomic_load_or">;
-defm ATOMIC_LOAD_XOR  : AtomicSizes<"atomic_load_xor">;
-defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">;
-defm ATOMIC_SWAP      : AtomicSizes<"atomic_swap">;
+// These get selected from C++ code as a pretty much direct translation from the
+// generic DAG nodes. The one exception is the AtomicOrdering is added as an
+// operand so that the eventual lowering can make use of it and choose
+// acquire/release operations when required.
+
+let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
+multiclass AtomicSizes {
+  def _I8 : PseudoInst<(outs GPR32:$dst),
+                       (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
+  def _I16 : PseudoInst<(outs GPR32:$dst),
+                        (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
+  def _I32 : PseudoInst<(outs GPR32:$dst),
+                        (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
+  def _I64 : PseudoInst<(outs GPR64:$dst),
+                        (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
+}
+}
+
+defm ATOMIC_LOAD_ADD  : AtomicSizes;
+defm ATOMIC_LOAD_SUB  : AtomicSizes;
+defm ATOMIC_LOAD_AND  : AtomicSizes;
+defm ATOMIC_LOAD_OR   : AtomicSizes;
+defm ATOMIC_LOAD_XOR  : AtomicSizes;
+defm ATOMIC_LOAD_NAND : AtomicSizes;
+defm ATOMIC_SWAP      : AtomicSizes;
 let Defs = [NZCV] in {
   // These operations need a CMP to calculate the correct value
-  defm ATOMIC_LOAD_MIN  : AtomicSizes<"atomic_load_min">;
-  defm ATOMIC_LOAD_MAX  : AtomicSizes<"atomic_load_max">;
-  defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">;
-  defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">;
-}
-
-let usesCustomInserter = 1, Defs = [NZCV] in {
-def ATOMIC_CMP_SWAP_I8
-  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
-            [(set i32:$dst, (atomic_cmp_swap_8 i64:$ptr, i32:$old, i32:$new))]>;
-def ATOMIC_CMP_SWAP_I16
-  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
-           [(set i32:$dst, (atomic_cmp_swap_16 i64:$ptr, i32:$old, i32:$new))]>;
-def ATOMIC_CMP_SWAP_I32
-  : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new),
-           [(set i32:$dst, (atomic_cmp_swap_32 i64:$ptr, i32:$old, i32:$new))]>;
-def ATOMIC_CMP_SWAP_I64
-  : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new),
-           [(set i64:$dst, (atomic_cmp_swap_64 i64:$ptr, i64:$old, i64:$new))]>;
+  defm ATOMIC_LOAD_MIN  : AtomicSizes;
+  defm ATOMIC_LOAD_MAX  : AtomicSizes;
+  defm ATOMIC_LOAD_UMIN : AtomicSizes;
+  defm ATOMIC_LOAD_UMAX : AtomicSizes;
+}
+
+class AtomicCmpSwap<RegisterClass GPRData>
+  : PseudoInst<(outs GPRData:$dst),
+               (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new,
+                    i32imm:$ordering), []> {
+  let usesCustomInserter = 1;
+  let hasCtrlDep = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let Defs = [NZCV];
 }
 
+def ATOMIC_CMP_SWAP_I8  : AtomicCmpSwap<GPR32>;
+def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
+def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
+def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
+
 //===----------------------------------------------------------------------===//
 // Add-subtract (extended register) instructions
 //===----------------------------------------------------------------------===//
@@ -2579,7 +2593,8 @@ defm LDAR  : A64I_LRex<"ldar",  0b101>;
 
 class acquiring_load<PatFrag base>
   : PatFrag<(ops node:$ptr), (base node:$ptr), [{
-  return cast<AtomicSDNode>(N)->getOrdering() == Acquire;
+  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+  return Ordering == Acquire || Ordering == SequentiallyConsistent;
 }]>;
 
 def atomic_load_acquire_8  : acquiring_load<atomic_load_8>;
@@ -2610,7 +2625,8 @@ class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
 
 class releasing_store<PatFrag base>
   : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
-  return cast<AtomicSDNode>(N)->getOrdering() == Release;
+  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+  return Ordering == Release || Ordering == SequentiallyConsistent;
 }]>;
 
 def atomic_store_release_8  : releasing_store<atomic_store_8>;
@@ -3863,7 +3879,7 @@ multiclass movw_operands<string prefix, string instname, int width> {
     let DiagnosticType = "MOVWUImm16";
   }
 
-  def _imm : Operand<i32> {
+  def _imm : Operand<i64> {
     let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand");
     let PrintMethod = "printMoveWideImmOperand";
     let EncoderMethod = "getMoveWideImmOpValue";
@@ -3934,7 +3950,7 @@ multiclass movalias_operand<string prefix, string basename,
                                        # "A64Imms::" # immpredicate # ">";
   }
 
-  def _movimm : Operand<i32> {
+  def _movimm : Operand<i64> {
     let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
 
     let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
@@ -3958,6 +3974,15 @@ def : movalias<MOVZxii, GPR64, movz64_movimm>;
 def : movalias<MOVNwii, GPR32, movn32_movimm>;
 def : movalias<MOVNxii, GPR64, movn64_movimm>;
 
+def movw_addressref : ComplexPattern<i64, 2, "SelectMOVWAddressRef">;
+
+def : Pat<(A64WrapperLarge movw_addressref:$G3, movw_addressref:$G2,
+                           movw_addressref:$G1, movw_addressref:$G0),
+          (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref:$G3),
+                                     movw_addressref:$G2),
+                            movw_addressref:$G1),
+                   movw_addressref:$G0)>;
+
 //===----------------------------------------------------------------------===//
 // PC-relative addressing instructions
 //===----------------------------------------------------------------------===//
@@ -4454,8 +4479,6 @@ def : ADRP_ADD<A64WrapperSmall, tjumptable>;
 // GOT access patterns
 //===----------------------------------------------------------------------===//
 
-// FIXME: Wibble
-
 class GOTLoadSmall<SDNode addrfrag>
   : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)),
         (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index c96bf85..3d22330 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -68,6 +68,18 @@ AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO,
   case AArch64II::MO_TPREL_G0_NC:
     Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext);
     break;
+  case AArch64II::MO_ABS_G3:
+    Expr = AArch64MCExpr::CreateABS_G3(Expr, OutContext);
+    break;
+  case AArch64II::MO_ABS_G2_NC:
+    Expr = AArch64MCExpr::CreateABS_G2_NC(Expr, OutContext);
+    break;
+  case AArch64II::MO_ABS_G1_NC:
+    Expr = AArch64MCExpr::CreateABS_G1_NC(Expr, OutContext);
+    break;
+  case AArch64II::MO_ABS_G0_NC:
+    Expr = AArch64MCExpr::CreateABS_G0_NC(Expr, OutContext);
+    break;
   case AArch64II::MO_NO_FLAG:
     // Expr is already correct
     break;
diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt
index 3b296fd..6e4ce8b 100644
--- a/lib/Target/AArch64/LLVMBuild.txt
+++ b/lib/Target/AArch64/LLVMBuild.txt
@@ -25,7 +25,7 @@ parent = Target
 has_asmparser = 1
 has_asmprinter = 1
 has_disassembler = 1
-;has_jit = 1
+has_jit = 1
 
 [component_1]
 type = Library
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index b83577a..3b811df 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -63,14 +63,15 @@ public:
 
   ~AArch64ELFStreamer() {}
 
-  virtual void ChangeSection(const MCSection *Section) {
+  virtual void ChangeSection(const MCSection *Section,
+                             const MCExpr *Subsection) {
     // We have to keep track of the mapping symbol state of any sections we
     // use. Each one should start off as EMS_None, which is provided as the
     // default constructor by DenseMap::lookup.
-    LastMappingSymbols[getPreviousSection()] = LastEMS;
+    LastMappingSymbols[getPreviousSection().first] = LastEMS;
     LastEMS = LastMappingSymbols.lookup(Section);
 
-    MCELFStreamer::ChangeSection(Section);
+    MCELFStreamer::ChangeSection(Section, Subsection);
   }
 
   /// This function is the one used to emit instruction data into the ELF
@@ -129,7 +130,7 @@ private:
     MCELF::SetType(SD, ELF::STT_NOTYPE);
     MCELF::SetBinding(SD, ELF::STB_LOCAL);
     SD.setExternal(false);
-    Symbol->setSection(*getCurrentSection());
+    Symbol->setSection(*getCurrentSection().first);
 
     const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
     Symbol->setVariableValue(Value);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index c0e3b29..d9798ae 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -133,6 +133,26 @@ public:
     return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx);
   }
 
+  static const AArch64MCExpr *CreateABS_G3(const MCExpr *Expr,
+                                           MCContext &Ctx) {
+    return Create(VK_AARCH64_ABS_G3, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateABS_G2_NC(const MCExpr *Expr,
+                                           MCContext &Ctx) {
+    return Create(VK_AARCH64_ABS_G2_NC, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateABS_G1_NC(const MCExpr *Expr,
+                                           MCContext &Ctx) {
+    return Create(VK_AARCH64_ABS_G1_NC, Expr, Ctx);
+  }
+
+  static const AArch64MCExpr *CreateABS_G0_NC(const MCExpr *Expr,
+                                           MCContext &Ctx) {
+    return Create(VK_AARCH64_ABS_G0_NC, Expr, Ctx);
+  }
+
   /// @}
   /// @name Accessors
   /// @{
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 7960db0..819eead 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -81,6 +81,12 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
 
   if (CM == CodeModel::Default)
     CM = CodeModel::Small;
+  else if (CM == CodeModel::JITDefault) {
+    // The default MCJIT memory managers make no guarantees about where they can
+    // find an executable page; JITed code needs to be able to refer to globals
+    // no matter how far away they are.
+    CM = CodeModel::Large;
+  }
 
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index b8099cb..fc706a4 100644
--- a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -19,6 +19,6 @@ using namespace llvm;
 Target llvm::TheAArch64Target;
 
 extern "C" void LLVMInitializeAArch64TargetInfo() {
-  RegisterTarget<Triple::aarch64>
+    RegisterTarget<Triple::aarch64, /*HasJIT=*/true>
     X(TheAArch64Target, "aarch64", "AArch64");
 }
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 1678559..bedccb5 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -981,8 +981,11 @@ bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) {
       Rotation = RepeatWidth - Rotation;
     }
 
-    uint64_t ReplicatedOnes = (ReplicatedMask >> Rotation)
-      | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+    uint64_t ReplicatedOnes = ReplicatedMask;
+    if (Rotation != 0 && Rotation != 64)
+      ReplicatedOnes = (ReplicatedMask >> Rotation)
+        | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
+
     // Of course, they may not actually be ones, so we have to check that:
     if (!isMask_64(ReplicatedOnes))
       continue;
@@ -1051,13 +1054,14 @@ bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits,
   int Rotation = (ImmR & (Width - 1));
   uint64_t Mask = (1ULL << Num1s) - 1;
   uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1;
-  Mask = (Mask >> Rotation)
-    | ((Mask << (Width - Rotation)) & WidthMask);
+  if (Rotation != 0 && Rotation != 64)
+    Mask = (Mask >> Rotation)
+      | ((Mask << (Width - Rotation)) & WidthMask);
 
-  Imm = 0;
-  for (unsigned i = 0; i < RegWidth / Width; ++i) {
-    Imm |= Mask;
+  Imm = Mask;
+  for (unsigned i = 1; i < RegWidth / Width; ++i) {
     Mask <<= Width;
+    Imm |= Mask;
   }
 
   return true;
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 1b773d6..9a1ca61 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -1037,7 +1037,14 @@ namespace AArch64II {
 
     // MO_LO12 - On a symbol operand, this represents a relocation containing
     // lower 12 bits of the address. Used in add/sub/ldr/str.
-    MO_LO12
+    MO_LO12,
+
+    // MO_ABS_G* - Represent the 16-bit granules of an absolute reference using
+    // movz/movk instructions.
+    MO_ABS_G3,
+    MO_ABS_G2_NC,
+    MO_ABS_G1_NC,
+    MO_ABS_G0_NC
   };
 }
 
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 6838084..2d747091 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -59,6 +59,8 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
                                          "FP compare + branch is slow">;
 def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
                           "Floating point unit supports single precision only">;
+def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
+                          "Enable support for TrustZone security extensions">;
 
 // Some processors have FP multiply-accumulate instructions that don't
 // play nicely with other VFP / NEON instructions, and it's generally better
@@ -144,29 +146,33 @@ include "ARMSchedule.td"
 def ProcA5      : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
                                    "Cortex-A5 ARM processors",
                                    [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
-                                    FeatureVMLxForwarding, FeatureT2XtPk]>;
+                                    FeatureVMLxForwarding, FeatureT2XtPk,
+                                    FeatureTrustZone]>;
 def ProcA8      : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
                                    "Cortex-A8 ARM processors",
                                    [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
-                                    FeatureVMLxForwarding, FeatureT2XtPk]>;
+                                    FeatureVMLxForwarding, FeatureT2XtPk,
+                                    FeatureTrustZone]>;
 def ProcA9      : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
                                    "Cortex-A9 ARM processors",
                                    [FeatureVMLxForwarding,
                                     FeatureT2XtPk, FeatureFP16,
-                                    FeatureAvoidPartialCPSR]>;
+                                    FeatureAvoidPartialCPSR,
+                                    FeatureTrustZone]>;
 def ProcSwift   : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
                                    "Swift ARM processors",
                                    [FeatureNEONForFP, FeatureT2XtPk,
                                     FeatureVFP4, FeatureMP, FeatureHWDiv,
                                     FeatureHWDivARM, FeatureAvoidPartialCPSR,
                                     FeatureAvoidMOVsShOp,
-                                    FeatureHasSlowFPVMLx]>;
+                                    FeatureHasSlowFPVMLx, FeatureTrustZone]>;
 
 // FIXME: It has not been determined if A15 has these features.
 def ProcA15      : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
                                    "Cortex-A15 ARM processors",
                                    [FeatureT2XtPk, FeatureFP16,
-                                    FeatureAvoidPartialCPSR]>;
+                                    FeatureAvoidPartialCPSR,
+                                    FeatureTrustZone]>;
 def ProcR5      : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
                                    "Cortex-R5 ARM processors",
                                    [FeatureSlowFPBrcc, FeatureHWDivARM,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9e68ff4..6005054 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -283,14 +283,20 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
       return false;
     --I;
   }
-  if (!isUnpredicatedTerminator(I))
-    return false;
 
   // Get the last instruction in the block.
   MachineInstr *LastInst = I;
+  unsigned LastOpc = LastInst->getOpcode();
+
+  // Check if it's an indirect branch first, this should return 'unanalyzable'
+  // even if it's predicated.
+  if (isIndirectBranchOpcode(LastOpc))
+    return true;
+
+  if (!isUnpredicatedTerminator(I))
+    return false;
 
   // If there is only one terminator instruction, process it.
-  unsigned LastOpc = LastInst->getOpcode();
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
     if (isUncondBranchOpcode(LastOpc)) {
       TBB = LastInst->getOperand(0).getMBB();
@@ -747,10 +753,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Mov->addRegisterKilled(SrcReg, TRI);
 }
 
-static const
-MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
-                             unsigned Reg, unsigned SubIdx, unsigned State,
-                             const TargetRegisterInfo *TRI) {
+const MachineInstrBuilder &
+ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+                          unsigned SubIdx, unsigned State,
+                          const TargetRegisterInfo *TRI) const {
   if (!SubIdx)
     return MIB.addReg(Reg, State);
 
@@ -795,12 +801,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
       } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
-        MachineInstrBuilder MIB =
-          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
-                       .addFrameIndex(FI))
-                       .addMemOperand(MMO);
-          MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
-                AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+        if (Subtarget.hasV5TEOps()) {
+          MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
+          AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+          AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+          MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+          AddDefaultPred(MIB);
+        } else {
+          // Fallback to STM instruction, which has existed since the dawn of
+          // time.
+          MachineInstrBuilder MIB =
+            AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
+                             .addFrameIndex(FI).addMemOperand(MMO));
+          AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+          AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+        }
       } else
         llvm_unreachable("Unknown reg class!");
       break;
@@ -948,7 +964,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
   MachineMemOperand *MMO =
@@ -975,12 +990,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
     } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
-      unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA;
-      MachineInstrBuilder MIB =
-        AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc))
-                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
-      MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
-      MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+      MachineInstrBuilder MIB;
+
+      if (Subtarget.hasV5TEOps()) {
+        MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
+        AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+        AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+        MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+        AddDefaultPred(MIB);
+      } else {
+        // Fallback to LDM instruction, which has existed since the dawn of
+        // time.
+        MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
+                                 .addFrameIndex(FI).addMemOperand(MMO));
+        MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+        MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+      }
+
       if (TargetRegisterInfo::isPhysicalRegister(DestReg))
         MIB.addReg(DestReg, RegState::ImplicitDefine);
     } else
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 7c107bb..2ef659c 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -141,6 +141,10 @@ public:
 
   MachineInstr *commuteInstruction(MachineInstr*, bool=false) const;
 
+  const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+                                     unsigned SubIdx, unsigned State,
+                                     const TargetRegisterInfo *TRI) const;
+
   virtual bool produceSameValue(const MachineInstr *MI0,
                                 const MachineInstr *MI1,
                                 const MachineRegisterInfo *MRI) const;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b6b27f8..b0d34a7 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -75,6 +75,12 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
 }
 
 const uint32_t*
+ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
+  return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+    ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask;
+}
+
+const uint32_t*
 ARMBaseRegisterInfo::getNoPreservedMask() const {
   return CSR_NoRegs_RegMask;
 }
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 725033b..0679919 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -96,6 +96,7 @@ public:
   /// Code Generation virtual methods...
   const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
   const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+  const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
   const uint32_t *getNoPreservedMask() const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index e6e8c3d..4f94ad2 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -74,9 +74,15 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
   static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
   static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
   static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
+  static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
 
   unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
   if (Reg == 0) {
+
+    // If we had R3 unallocated only, now we still must to waste it.
+    Reg = State.AllocateReg(GPRArgRegs, 4);
+    assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
+
     // For the 2nd half of a v2f64, do not just fail.
     if (CanFail)
       return false;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index b378b96..8ff666e 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -111,8 +111,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
   // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
   // (and the same is true for f64 if VFP is not enabled)
   CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
-  CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
-                       "ArgFlags.getOrigAlign() != 8",
+  CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8",
                        CCAssignToReg<[R0, R1, R2, R3]>>>,
 
   CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
@@ -195,10 +194,21 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
 def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
                                      (sequence "D%u", 15, 8))>;
 
+// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
+// and the pointer return value are both passed in R0 in these cases, this can
+// be partially modelled by treating R0 as a callee-saved register
+// Only the resulting RegMask is used; the SaveList is ignored
+def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
+                                            R5, R4, (sequence "D%u", 15, 8),
+                                            R0)>;
+
 // iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
 // Also save R7-R4 first to match the stack frame fixed spill areas.
 def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
 
+def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
+                                          (sub CSR_AAPCS_ThisReturn, R9))>;
+
 // GHC set of callee saved regs is empty as all those regs are
 // used for passing STG regs around
 // add is a workaround for not being able to compile empty list:
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 29fcd40..5d45f64 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -144,8 +144,8 @@ class ARMFastISel : public FastISel {
     virtual bool TargetSelectInstruction(const Instruction *I);
     virtual unsigned TargetMaterializeConstant(const Constant *C);
     virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
-    virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
-                               const LoadInst *LI);
+    virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                     const LoadInst *LI);
     virtual bool FastLowerArguments();
   private:
   #include "ARMGenFastISel.inc"
@@ -2605,7 +2605,7 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
 
   unsigned Opc;
   bool isBoolZext = false;
-  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+  const TargetRegisterClass *RC;
   switch (SrcVT.SimpleTy) {
   default: return 0;
   case MVT::i16:
@@ -2797,12 +2797,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
   return false;
 }
 
-/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// \brief The specified machine instr operand is a vreg, and that
 /// vreg is being provided by the specified load instruction.  If possible,
 /// try to fold the load as an operand to the instruction, returning true if
 /// successful.
-bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
-                                const LoadInst *LI) {
+bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                      const LoadInst *LI) {
   // Verify we have a legal type before going any further.
   MVT VT;
   if (!isLoadTypeLegal(LI->getType(), VT))
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 7a02adf..483802b 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -141,7 +141,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   assert(!AFI->isThumb1OnlyFunction() &&
          "This emitPrologue does not support Thumb1!");
   bool isARM = !AFI->isThumbFunction();
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
   unsigned NumBytes = MFI->getStackSize();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -159,8 +159,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
     return;
 
   // Allocate the vararg register save area. This is not counted in NumBytes.
-  if (VARegSaveSize)
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
+  if (ArgRegsSaveSize)
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
                  MachineInstr::FrameSetup);
 
   if (!AFI->hasStackFrame()) {
@@ -357,7 +357,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
          "This emitEpilogue does not support Thumb1!");
   bool isARM = !AFI->isThumbFunction();
 
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
   int NumBytes = (int)MFI->getStackSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
 
@@ -471,8 +471,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
     MBBI = NewMI;
   }
 
-  if (VARegSaveSize)
-    emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+  if (ArgRegsSaveSize)
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
 }
 
 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -1003,7 +1003,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
   unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
 
   // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
@@ -1174,7 +1174,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   if (AFI->isThumb1OnlyFunction()) {
     // Spill LR if Thumb1 function uses variable length argument lists.
-    if (AFI->getVarArgsRegSaveSize() > 0)
+    if (AFI->getArgRegsSaveSize() > 0)
       MRI.setPhysRegUsed(ARM::LR);
 
     // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 2c51de2..9e1782e 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1469,14 +1469,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
                        CurDAG->getRegister(0, MVT::i32), Chain };
       return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
-                                    MVT::i32, MVT::Other, Ops, 5);
+                                    MVT::i32, MVT::Other, Ops);
     } else {
       SDValue Chain = LD->getChain();
       SDValue Base = LD->getBasePtr();
       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
                        CurDAG->getRegister(0, MVT::i32), Chain };
       return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
-                                    MVT::i32, MVT::Other, Ops, 6);
+                                    MVT::i32, MVT::Other, Ops);
     }
   }
 
@@ -1525,7 +1525,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
     SDValue Ops[]= { Base, Offset, getAL(CurDAG),
                      CurDAG->getRegister(0, MVT::i32), Chain };
     return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
-                                  MVT::Other, Ops, 5);
+                                  MVT::Other, Ops);
   }
 
   return NULL;
@@ -1539,7 +1539,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form a D register from a pair of S registers.
@@ -1550,7 +1550,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form a quad register from a pair of D registers.
@@ -1560,7 +1560,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form 4 consecutive D registers from a pair of Q registers.
@@ -1570,7 +1570,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form 4 consecutive S registers.
@@ -1585,7 +1585,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
                                     V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form 4 consecutive D registers.
@@ -1599,7 +1599,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
                                     V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// \brief Form 4 consecutive Q registers.
@@ -1613,7 +1613,7 @@ SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
                                     V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
 }
 
 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
@@ -1761,7 +1761,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Pred);
     Ops.push_back(Reg0);
     Ops.push_back(Chain);
-    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 
   } else {
     // Otherwise, quad registers are loaded with two separate instructions,
@@ -1774,7 +1774,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
-                                          ResTy, AddrTy, MVT::Other, OpsA, 7);
+                                          ResTy, AddrTy, MVT::Other, OpsA);
     Chain = SDValue(VLdA, 2);
 
     // Load the odd subregs.
@@ -1791,8 +1791,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Pred);
     Ops.push_back(Reg0);
     Ops.push_back(Chain);
-    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
-                                 Ops.data(), Ops.size());
+    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
   }
 
   // Transfer memoperands.
@@ -1913,8 +1912,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Pred);
     Ops.push_back(Reg0);
     Ops.push_back(Chain);
-    SDNode *VSt =
-      CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+    SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
 
     // Transfer memoperands.
     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
@@ -1939,7 +1937,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
                                         MemAddr.getValueType(),
-                                        MVT::Other, OpsA, 7);
+                                        MVT::Other, OpsA);
   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
   Chain = SDValue(VStA, 1);
 
@@ -1958,7 +1956,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   Ops.push_back(Reg0);
   Ops.push_back(Chain);
   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
-                                        Ops.data(), Ops.size());
+                                        Ops);
   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
   return VStB;
 }
@@ -2063,8 +2061,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
 
   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
                                   QOpcodes[OpcodeIndex]);
-  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
-                                         Ops.data(), Ops.size());
+  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
   if (!IsLoad)
     return VLdLn;
@@ -2150,8 +2147,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
   if (isUpdating)
     ResTys.push_back(MVT::i32);
   ResTys.push_back(MVT::Other);
-  SDNode *VLdDup =
-    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+  SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
   SuperReg = SDValue(VLdDup, 0);
 
@@ -2197,7 +2193,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
   Ops.push_back(getAL(CurDAG)); // predicate
   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
-  return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
 }
 
 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
@@ -2542,7 +2538,7 @@ SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
   SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
                                            MVT::i32, MVT::i32, MVT::Other,
-                                           Ops.data() ,Ops.size());
+                                           Ops);
   cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
   return ResNode;
 }
@@ -2599,7 +2595,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
-                                         Ops, 4);
+                                         Ops);
       } else {
         SDValue Ops[] = {
           CPIdx,
@@ -2609,7 +2605,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
           CurDAG->getEntryNode()
         };
         ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
-                                       Ops, 5);
+                                       Ops);
       }
       ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
       return NULL;
@@ -2719,7 +2715,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                                                   MVT::i32);
         SDValue Ops[] = { N0.getOperand(0), Imm16,
                           getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
-        return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
+        return CurDAG->getMachineNode(Opc, dl, VT, Ops);
       }
     }
     break;
@@ -2733,16 +2729,15 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       break;
     if (Subtarget->isThumb()) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
-                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
-                        CurDAG->getRegister(0, MVT::i32) };
-      return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
     } else {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                     ARM::UMULL : ARM::UMULLv5,
-                                    dl, MVT::i32, MVT::i32, Ops, 5);
+                                    dl, MVT::i32, MVT::i32, Ops);
     }
   }
   case ISD::SMUL_LOHI: {
@@ -2751,14 +2746,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     if (Subtarget->isThumb()) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
-      return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
+      return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
     } else {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                     ARM::SMULL : ARM::SMULLv5,
-                                    dl, MVT::i32, MVT::i32, Ops, 5);
+                                    dl, MVT::i32, MVT::i32, Ops);
     }
   }
   case ARMISD::UMLAL:{
@@ -2766,7 +2761,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                         N->getOperand(3), getAL(CurDAG),
                         CurDAG->getRegister(0, MVT::i32)};
-      return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+      return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
     }else{
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                         N->getOperand(3), getAL(CurDAG),
@@ -2774,7 +2769,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                       ARM::UMLAL : ARM::UMLALv5,
-                                      dl, MVT::i32, MVT::i32, Ops, 7);
+                                      dl, MVT::i32, MVT::i32, Ops);
     }
   }
   case ARMISD::SMLAL:{
@@ -2782,7 +2777,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                         N->getOperand(3), getAL(CurDAG),
                         CurDAG->getRegister(0, MVT::i32)};
-      return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+      return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
     }else{
       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
                         N->getOperand(3), getAL(CurDAG),
@@ -2790,7 +2785,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                         CurDAG->getRegister(0, MVT::i32) };
       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
                                       ARM::SMLAL : ARM::SMLALv5,
-                                      dl, MVT::i32, MVT::i32, Ops, 7);
+                                      dl, MVT::i32, MVT::i32, Ops);
     }
   }
   case ISD::LOAD: {
@@ -2833,7 +2828,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                                MVT::i32);
     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
-                                             MVT::Glue, Ops, 5);
+                                             MVT::Glue, Ops);
     Chain = SDValue(ResNode, 0);
     if (N->getNumValues() == 2) {
       InFlag = SDValue(ResNode, 1);
@@ -2863,7 +2858,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
-    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
   }
   case ARMISD::VUZP: {
     unsigned Opc = 0;
@@ -2883,7 +2878,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
-    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
   }
   case ARMISD::VTRN: {
     unsigned Opc = 0;
@@ -2902,7 +2897,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     SDValue Pred = getAL(CurDAG);
     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
-    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
   }
   case ARMISD::BUILD_VECTOR: {
     EVT VecVT = N->getValueType(0);
@@ -3147,8 +3142,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       Ops.push_back(getAL(CurDAG));
       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
       Ops.push_back(Chain);
-      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
-                                          Ops.size());
+      SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
       // Transfer memoperands.
       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -3211,8 +3205,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
 
       unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
 
-      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
-                                          Ops.size());
+      SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
       // Transfer memoperands.
       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -3398,7 +3391,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     Ops.push_back(N->getOperand(1));
     Ops.push_back(getAL(CurDAG));                    // Predicate
     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
-    return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
+    return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
   }
   case ARMISD::VTBL2: {
     DebugLoc dl = N->getDebugLoc();
@@ -3414,8 +3407,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     Ops.push_back(N->getOperand(2));
     Ops.push_back(getAL(CurDAG));                    // Predicate
     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
-    return CurDAG->getMachineNode(ARM::VTBL2, dl, VT,
-                                  Ops.data(), Ops.size());
+    return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
   }
 
   case ISD::CONCAT_VECTORS:
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index bb26090..e49cfc4 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -729,7 +729,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
       (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
     // membarrier needs custom lowering; the rest are legal and handled
     // normally.
-    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
     setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
     // Custom lowering for 64-bit ops
     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Custom);
@@ -747,7 +746,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setInsertFencesForAtomic(true);
   } else {
     // Set them all for expansion, which will force libcalls.
-    setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
     setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other, Expand);
     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
@@ -765,8 +763,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     // Unordered/Monotonic case.
     setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
     setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
-    // Since the libcalls include locking, fold in the fences
-    setShouldFoldAtomicFences(true);
   }
 
   setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
@@ -1238,7 +1234,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                    CallingConv::ID CallConv, bool isVarArg,
                                    const SmallVectorImpl<ISD::InputArg> &Ins,
                                    DebugLoc dl, SelectionDAG &DAG,
-                                   SmallVectorImpl<SDValue> &InVals) const {
+                                   SmallVectorImpl<SDValue> &InVals,
+                                   bool isThisReturn, SDValue ThisVal) const {
 
   // Assign locations to each value returned by this call.
   SmallVector<CCValAssign, 16> RVLocs;
@@ -1252,6 +1249,15 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
     CCValAssign VA = RVLocs[i];
 
+    // Pass 'this' value directly from the argument to return value, to avoid
+    // reg unit interference
+    if (i == 0 && isThisReturn) {
+      assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
+             "unexpected return calling convention register assignment");
+      InVals.push_back(ThisVal);
+      continue;
+    }
+
     SDValue Val;
     if (VA.needsCustom()) {
       // Handle f64 or half of a v2f64.
@@ -1363,21 +1369,22 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   bool isVarArg                         = CLI.IsVarArg;
 
   MachineFunction &MF = DAG.getMachineFunction();
-  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
-  bool IsSibCall = false;
+  bool isStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+  bool isThisReturn   = false;
+  bool isSibCall      = false;
   // Disable tail calls if they're not supported.
   if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
     isTailCall = false;
   if (isTailCall) {
     // Check if it's really possible to do a tail call.
     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
-                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+                    isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
                                                    Outs, OutVals, Ins, DAG);
     // We don't support GuaranteedTailCallOpt for ARM, only automatically
     // detected sibcalls.
     if (isTailCall) {
       ++NumTailCalls;
-      IsSibCall = true;
+      isSibCall = true;
     }
   }
 
@@ -1393,12 +1400,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   unsigned NumBytes = CCInfo.getNextStackOffset();
 
   // For tail calls, memory operands are available in our caller's stack.
-  if (IsSibCall)
+  if (isSibCall)
     NumBytes = 0;
 
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
-  if (!IsSibCall)
+  if (!isSibCall)
     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
 
   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
@@ -1460,6 +1467,13 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                          StackPtr, MemOpChains, Flags);
       }
     } else if (VA.isRegLoc()) {
+      if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
+        assert(VA.getLocVT() == MVT::i32 &&
+               "unexpected calling convention register assignment");
+        assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
+               "unexpected use of 'returned'");
+        isThisReturn = true;
+      }
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
     } else if (isByVal) {
       assert(VA.isMemLoc());
@@ -1467,10 +1481,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
       // True if this byval aggregate will be split between registers
       // and memory.
-      if (CCInfo.isFirstByValRegValid()) {
+      unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
+      unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+
+      if (CurByValIdx < ByValArgsCount) {
+
+        unsigned RegBegin, RegEnd;
+        CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
+
         EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
         unsigned int i, j;
-        for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+        for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
           SDValue Const = DAG.getConstant(4*i, MVT::i32);
           SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
@@ -1479,11 +1500,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(j, Load));
         }
-        offset = ARM::R4 - CCInfo.getFirstByValReg();
-        CCInfo.clearFirstByValReg();
+
+        // If parameter size outsides register area, "offset" value
+        // helps us to calculate stack slot for remained part properly.
+        offset = RegEnd - RegBegin;
+
+        CCInfo.nextInRegsParam();
       }
 
-      if (Flags.getByValSize() - 4*offset > 0) {
+      if (Flags.getByValSize() > 4*offset) {
         unsigned LocMemOffset = VA.getLocMemOffset();
         SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
         SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
@@ -1499,7 +1524,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
                                           Ops, array_lengthof(Ops)));
       }
-    } else if (!IsSibCall) {
+    } else if (!isSibCall) {
       assert(VA.isMemLoc());
 
       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@@ -1539,7 +1564,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                RegsToPass[i].second, InFlag);
       InFlag = Chain.getValue(1);
     }
-    InFlag =SDValue();
+    InFlag = SDValue();
   }
 
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -1680,8 +1705,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                   RegsToPass[i].second.getValueType()));
 
   // Add a register mask operand representing the call-preserved registers.
+  const uint32_t *Mask;
   const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
-  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
+  if (isThisReturn)
+    // For 'this' returns, use the R0-preserving mask
+    Mask = ARI->getThisReturnPreservedMask(CallConv);
+  else
+    Mask = ARI->getCallPreservedMask(CallConv);
+
   assert(Mask && "Missing call preserved mask for calling convention");
   Ops.push_back(DAG.getRegisterMask(Mask));
 
@@ -1703,8 +1735,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Handle result values, copying them out of physregs into vregs that we
   // return.
-  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
-                         dl, DAG, InVals);
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+                         InVals, isThisReturn,
+                         isThisReturn ? OutVals[0] : SDValue());
 }
 
 /// HandleByVal - Every parameter *after* a byval parameter is passed
@@ -1718,8 +1751,24 @@ ARMTargetLowering::HandleByVal(
   assert((State->getCallOrPrologue() == Prologue ||
           State->getCallOrPrologue() == Call) &&
          "unhandled ParmContext");
-  if ((!State->isFirstByValRegValid()) &&
-      (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+
+  // For in-prologue parameters handling, we also introduce stack offset
+  // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal.
+  // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how
+  // NSAA should be evaluted (NSAA means "next stacked argument address").
+  // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs.
+  // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs.
+  unsigned NSAAOffset = State->getNextStackOffset();
+  if (State->getCallOrPrologue() != Call) {
+    for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) {
+      unsigned RB, RE;
+      State->getInRegsParamInfo(i, RB, RE);
+      assert(NSAAOffset >= (RE-RB)*4 &&
+             "Stack offset for byval regs doesn't introduced anymore?");
+      NSAAOffset -= (RE-RB)*4;
+    }
+  }
+  if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
     if (Subtarget->isAAPCS_ABI() && Align > 4) {
       unsigned AlignInRegs = Align / 4;
       unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
@@ -1727,22 +1776,45 @@ ARMTargetLowering::HandleByVal(
         reg = State->AllocateReg(GPRArgRegs, 4);
     }
     if (reg != 0) {
-      State->setFirstByValReg(reg);
+      unsigned excess = 4 * (ARM::R4 - reg);
+
+      // Special case when NSAA != SP and parameter size greater than size of
+      // all remained GPR regs. In that case we can't split parameter, we must
+      // send it to stack. We also must set NCRN to R4, so waste all
+      // remained registers.
+      if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
+        while (State->AllocateReg(GPRArgRegs, 4))
+          ;
+        return;
+      }
+
+      // First register for byval parameter is the first register that wasn't
+      // allocated before this method call, so it would be "reg".
+      // If parameter is small enough to be saved in range [reg, r4), then
+      // the end (first after last) register would be reg + param-size-in-regs,
+      // else parameter would be splitted between registers and stack,
+      // end register would be r4 in this case.
+      unsigned ByValRegBegin = reg;
+      unsigned ByValRegEnd = (size < excess) ? reg + size/4 : ARM::R4;
+      State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+      // Note, first register is allocated in the beginning of function already,
+      // allocate remained amount of registers we need.
+      for (unsigned i = reg+1; i != ByValRegEnd; ++i)
+        State->AllocateReg(GPRArgRegs, 4);
       // At a call site, a byval parameter that is split between
       // registers and memory needs its size truncated here.  In a
       // function prologue, such byval parameters are reassembled in
       // memory, and are not truncated.
       if (State->getCallOrPrologue() == Call) {
-        unsigned excess = 4 * (ARM::R4 - reg);
-        assert(size >= excess && "expected larger existing stack allocation");
-        size -= excess;
+        // Make remained size equal to 0 in case, when
+        // the whole structure may be stored into registers.
+        if (size < excess)
+          size = 0;
+        else
+          size -= excess;
       }
     }
   }
-  // Confiscate any remaining parameter registers to preclude their
-  // assignment to subsequent parameters.
-  while (State->AllocateReg(GPRArgRegs, 4))
-    ;
 }
 
 /// MatchingStackOffset - Return true if the given stack call argument is
@@ -1874,7 +1946,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   // local frame.
   const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
                                       getInfo<ARMFunctionInfo>();
-  if (AFI_Caller->getVarArgsRegSaveSize())
+  if (AFI_Caller->getArgRegsSaveSize())
     return false;
 
   // If the callee takes no arguments then go on to check the results of the
@@ -2461,35 +2533,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
   }
 }
 
-static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
-                               const ARMSubtarget *Subtarget) {
-  DebugLoc dl = Op.getDebugLoc();
-  if (!Subtarget->hasDataBarrier()) {
-    // Some ARMv6 cpus can support data barriers with an mcr instruction.
-    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
-    // here.
-    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
-           "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
-    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
-                       DAG.getConstant(0, MVT::i32));
-  }
-
-  SDValue Op5 = Op.getOperand(5);
-  bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
-  unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-  unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
-  bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
-
-  ARM_MB::MemBOpt DMBOpt;
-  if (isDeviceBarrier)
-    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
-  else
-    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
-  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
-                     DAG.getConstant(DMBOpt, MVT::i32));
-}
-
-
 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
                                  const ARMSubtarget *Subtarget) {
   // FIXME: handle "fence singlethread" more efficiently.
@@ -2586,12 +2629,16 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
 
 void
 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
-                                  unsigned &VARegSize, unsigned &VARegSaveSize)
+                                  unsigned InRegsParamRecordIdx,
+                                  unsigned &ArgRegsSize,
+                                  unsigned &ArgRegsSaveSize)
   const {
   unsigned NumGPRs;
-  if (CCInfo.isFirstByValRegValid())
-    NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
-  else {
+  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+    unsigned RBegin, REnd;
+    CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+    NumGPRs = REnd - RBegin;
+  } else {
     unsigned int firstUnalloced;
     firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
                                                 sizeof(GPRArgRegs) /
@@ -2600,8 +2647,8 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
   }
 
   unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
-  VARegSize = NumGPRs * 4;
-  VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+  ArgRegsSize = NumGPRs * 4;
+  ArgRegsSaveSize = (ArgRegsSize + Align - 1) & ~(Align - 1);
 }
 
 // The remaining GPRs hold either the beginning of variable-argument
@@ -2611,40 +2658,60 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
 // If this is a variadic function, the va_list pointer will begin with
 // these values; otherwise, this reassembles a (byval) structure that
 // was split between registers and memory.
-void
-ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
-                                        DebugLoc dl, SDValue &Chain,
-                                        const Value *OrigArg,
-                                        unsigned OffsetFromOrigArg,
-                                        unsigned ArgOffset,
-                                        bool ForceMutable) const {
+// Return: The frame index registers were stored into.
+int
+ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+                                  DebugLoc dl, SDValue &Chain,
+                                  const Value *OrigArg,
+                                  unsigned InRegsParamRecordIdx,
+                                  unsigned OffsetFromOrigArg,
+                                  unsigned ArgOffset,
+                                  bool ForceMutable) const {
+
+  // Currently, two use-cases possible:
+  // Case #1. Non var-args function, and we meet first byval parameter.
+  //          Setup first unallocated register as first byval register;
+  //          eat all remained registers
+  //          (these two actions are performed by HandleByVal method).
+  //          Then, here, we initialize stack frame with
+  //          "store-reg" instructions.
+  // Case #2. Var-args function, that doesn't contain byval parameters.
+  //          The same: eat all remained unallocated registers,
+  //          initialize stack frame.
+
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned firstRegToSaveIndex;
-  if (CCInfo.isFirstByValRegValid())
-    firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
-  else {
+  unsigned firstRegToSaveIndex, lastRegToSaveIndex;
+  unsigned RBegin, REnd;
+  if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+    CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+    firstRegToSaveIndex = RBegin - ARM::R0;
+    lastRegToSaveIndex = REnd - ARM::R0;
+  } else {
     firstRegToSaveIndex = CCInfo.getFirstUnallocated
       (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+    lastRegToSaveIndex = 4;
   }
 
-  unsigned VARegSize, VARegSaveSize;
-  computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
-  if (VARegSaveSize) {
-    // If this function is vararg, store any remaining integer argument regs
-    // to their spots on the stack so that they may be loaded by deferencing
-    // the result of va_next.
-    AFI->setVarArgsRegSaveSize(VARegSaveSize);
-    AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
-                                                     ArgOffset + VARegSaveSize
-                                                     - VARegSize,
-                                                     false));
-    SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
-                                    getPointerTy());
+  unsigned ArgRegsSize, ArgRegsSaveSize;
+  computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgRegsSize, ArgRegsSaveSize);
+
+  // Store any by-val regs to their spots on the stack so that they may be
+  // loaded by deferencing the result of formal parameter pointer or va_next.
+  // Note: once stack area for byval/varargs registers
+  // was initialized, it can't be initialized again.
+  if (ArgRegsSaveSize) {
+
+    int FrameIndex = MFI->CreateFixedObject(
+                      ArgRegsSaveSize,
+                      ArgOffset + ArgRegsSaveSize - ArgRegsSize,
+                      false);
+    SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
 
     SmallVector<SDValue, 4> MemOps;
-    for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
+    for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
+         ++firstRegToSaveIndex, ++i) {
       const TargetRegisterClass *RC;
       if (AFI->isThumb1OnlyFunction())
         RC = &ARM::tGPRRegClass;
@@ -2661,13 +2728,37 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
       FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
                         DAG.getConstant(4, getPointerTy()));
     }
+
+    AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
+
     if (!MemOps.empty())
       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                           &MemOps[0], MemOps.size());
+    return FrameIndex;
   } else
     // This will point to the next argument passed via stack.
-    AFI->setVarArgsFrameIndex(
-        MFI->CreateFixedObject(4, ArgOffset, !ForceMutable));
+    return MFI->CreateFixedObject(4, ArgOffset, !ForceMutable);
+}
+
+// Setup stack frame, the va_list pointer will start from.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+                                        DebugLoc dl, SDValue &Chain,
+                                        unsigned ArgOffset,
+                                        bool ForceMutable) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  // Try to store any remaining integer argument regs
+  // to their spots on the stack so that they may be loaded by deferencing
+  // the result of va_next.
+  // If there is no regs to be stored, just point address after last
+  // argument passed via stack.
+  int FrameIndex =
+    StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
+                   0, ArgOffset, ForceMutable);
+
+  AFI->setVarArgsFrameIndex(FrameIndex);
 }
 
 SDValue
@@ -2696,6 +2787,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
   SDValue ArgValue;
   Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
   unsigned CurArgIdx = 0;
+
+  // Initially ArgRegsSaveSize is zero.
+  // Then we increase this value each time we meet byval parameter.
+  // We also increase this value in case of varargs function.
+  AFI->setArgRegsSaveSize(0);
+
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
     std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
@@ -2793,20 +2890,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
           // Since they could be overwritten by lowering of arguments in case of
           // a tail call.
           if (Flags.isByVal()) {
-            ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-            if (!AFI->getVarArgsFrameIndex()) {
-              VarArgStyleRegisters(CCInfo, DAG,
-                                   dl, Chain, CurOrigArg,
-                                   Ins[VA.getValNo()].PartOffset,
-                                   VA.getLocMemOffset(),
-                                   true /*force mutable frames*/);
-              int VAFrameIndex = AFI->getVarArgsFrameIndex();
-              InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy()));
-            } else {
-              int FI = MFI->CreateFixedObject(Flags.getByValSize(),
-                                              VA.getLocMemOffset(), false);
-              InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
-            }
+            unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
+            int FrameIndex = StoreByValRegs(
+                CCInfo, DAG, dl, Chain, CurOrigArg,
+                CurByValIndex,
+                Ins[VA.getValNo()].PartOffset,
+                VA.getLocMemOffset(),
+                true /*force mutable frames*/);
+            InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
+            CCInfo.nextInRegsParam();
           } else {
             int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
                                             VA.getLocMemOffset(), true);
@@ -2824,7 +2916,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
 
   // varargs
   if (isVarArg)
-    VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
+    VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
                          CCInfo.getNextStackOffset());
 
   return Chain;
@@ -5165,6 +5257,23 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
   return false;
 }
 
+static EVT getExtensionTo64Bits(const EVT &OrigVT) {
+  if (OrigVT.getSizeInBits() >= 64)
+    return OrigVT;
+
+  assert(OrigVT.isSimple() && "Expecting a simple value type");
+
+  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
+  switch (OrigSimpleTy) {
+  default: llvm_unreachable("Unexpected Vector Type");
+  case MVT::v2i8:
+  case MVT::v2i16:
+     return MVT::v2i32;
+  case MVT::v4i8:
+    return  MVT::v4i16;
+  }
+}
+
 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
 /// We insert the required extension here to get the vector to fill a D register.
@@ -5180,18 +5289,8 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
     return N;
 
   // Must extend size to at least 64 bits to be used as an operand for VMULL.
-  MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
-  EVT NewVT;
-  switch (OrigSimpleTy) {
-  default: llvm_unreachable("Unexpected Orig Vector Type");
-  case MVT::v2i8:
-  case MVT::v2i16:
-    NewVT = MVT::v2i32;
-    break;
-  case MVT::v4i8:
-    NewVT = MVT::v4i16;
-    break;
-  }
+  EVT NewVT = getExtensionTo64Bits(OrigTy);
+
   return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
 }
 
@@ -5201,22 +5300,22 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
 /// reach a total size of 64 bits. We have to add the extension separately
 /// because ARM does not have a sign/zero extending load for vectors.
 static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
-  SDValue NonExtendingLoad =
-    DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
+  EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
+
+  // The load already has the right type.
+  if (ExtendedTy == LD->getMemoryVT())
+    return DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
                 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
                 LD->isNonTemporal(), LD->isInvariant(),
                 LD->getAlignment());
-  unsigned ExtOp = 0;
-  switch (LD->getExtensionType()) {
-  default: llvm_unreachable("Unexpected LoadExtType");
-  case ISD::EXTLOAD:
-  case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
-  case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
-  }
-  MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
-  MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
-  return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
-                                      MemType, ExtType, ExtOp);
+
+  // We need to create a zextload/sextload. We cannot just create a load
+  // followed by a zext/zext node because LowerMUL is also run during normal
+  // operation legalization where we can't create illegal types.
+  return DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), ExtendedTy,
+                        LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
+                        LD->getMemoryVT(), LD->isVolatile(),
+                        LD->isNonTemporal(), LD->getAlignment());
 }
 
 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
@@ -5614,7 +5713,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
   case ISD::VASTART:       return LowerVASTART(Op, DAG);
-  case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
   case ISD::ATOMIC_FENCE:  return LowerATOMIC_FENCE(Op, DAG, Subtarget);
   case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
   case ISD::SINT_TO_FP:
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 9ee17f0..426010e 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -464,7 +464,8 @@ namespace llvm {
                             CallingConv::ID CallConv, bool isVarArg,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals) const;
+                            SmallVectorImpl<SDValue> &InVals,
+                            bool isThisReturn, SDValue ThisVal) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
@@ -473,16 +474,23 @@ namespace llvm {
                            DebugLoc dl, SelectionDAG &DAG,
                            SmallVectorImpl<SDValue> &InVals) const;
 
+    int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+                       DebugLoc dl, SDValue &Chain,
+                       const Value *OrigArg,
+                       unsigned InRegsParamRecordIdx,
+                       unsigned OffsetFromOrigArg,
+                       unsigned ArgOffset,
+                       bool ForceMutable) const;
+
     void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
                               DebugLoc dl, SDValue &Chain,
-                              const Value *OrigArg,
-                              unsigned OffsetFromOrigArg,
                               unsigned ArgOffset,
-                              bool ForceMutable = false)
-      const;
+                              bool ForceMutable = false) const;
 
     void computeRegArea(CCState &CCInfo, MachineFunction &MF,
-                        unsigned &VARegSize, unsigned &VARegSaveSize) const;
+                        unsigned InRegsParamRecordIdx,
+                        unsigned &ArgRegsSize,
+                        unsigned &ArgRegsSaveSize) const;
 
     virtual SDValue
       LowerCall(TargetLowering::CallLoweringInfo &CLI,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 11550c5..1bd174e 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -221,6 +221,9 @@ def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
 def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
                                  AssemblerPredicate<"FeatureMP",
                                                     "mp-extensions">;
+def HasTrustZone     : Predicate<"Subtarget->hasTrustZone()">,
+                                 AssemblerPredicate<"FeatureTrustZone",
+                                                    "TrustZone">;
 def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
 def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
 def IsThumb          : Predicate<"Subtarget->isThumb()">,
@@ -578,6 +581,17 @@ def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
 def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
 def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
 
+/// imm0_4 predicate - Immediate in the range [0,4].
+def Imm0_4AsmOperand : ImmAsmOperand
+{ 
+  let Name = "Imm0_4"; 
+  let DiagnosticType = "ImmRange0_4";  
+}
+def imm0_4 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 5; }]> {
+  let ParserMatchClass = Imm0_4AsmOperand;
+  let DecoderMethod = "DecodeImm0_4";
+}
+
 /// imm0_7 predicate - Immediate in the range [0,7].
 def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
 def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
@@ -741,18 +755,26 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
 // addrmode_imm12 := reg +/- imm12
 //
 def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
-def addrmode_imm12 : Operand<i32>,
+class AddrMode_Imm12 : Operand<i32>,
                      ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
   // 12-bit immediate operand. Note that instructions using this encode
   // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
   // immediate values are as normal.
 
   let EncoderMethod = "getAddrModeImm12OpValue";
-  let PrintMethod = "printAddrModeImm12Operand";
   let DecoderMethod = "DecodeAddrModeImm12Operand";
   let ParserMatchClass = MemImm12OffsetAsmOperand;
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
 }
+
+def addrmode_imm12 : AddrMode_Imm12 {
+  let PrintMethod = "printAddrModeImm12Operand<false>";
+}
+
+def addrmode_imm12_pre : AddrMode_Imm12 {
+  let PrintMethod = "printAddrModeImm12Operand<true>";
+}
+
 // ldst_so_reg := reg +/- reg shop imm
 //
 def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
@@ -852,14 +874,23 @@ def am2offset_imm : Operand<i32>,
 //
 // FIXME: split into imm vs. reg versions.
 def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
-def addrmode3 : Operand<i32>,
-                ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+class AddrMode3 : Operand<i32>,
+                  ComplexPattern<i32, 3, "SelectAddrMode3", []> {
   let EncoderMethod = "getAddrMode3OpValue";
-  let PrintMethod = "printAddrMode3Operand";
   let ParserMatchClass = AddrMode3AsmOperand;
   let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
 }
 
+def addrmode3 : AddrMode3
+{
+  let PrintMethod = "printAddrMode3Operand<false>";
+}
+
+def addrmode3_pre : AddrMode3
+{
+  let PrintMethod = "printAddrMode3Operand<true>";
+}
+
 // FIXME: split into imm vs. reg versions.
 // FIXME: parser method to handle +/- register.
 def AM3OffsetAsmOperand : AsmOperandClass {
@@ -885,15 +916,22 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
 // addrmode5 := reg +/- imm8*4
 //
 def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
-def addrmode5 : Operand<i32>,
-                ComplexPattern<i32, 2, "SelectAddrMode5", []> {
-  let PrintMethod = "printAddrMode5Operand";
+class AddrMode5 : Operand<i32>,
+                  ComplexPattern<i32, 2, "SelectAddrMode5", []> {
   let EncoderMethod = "getAddrMode5OpValue";
   let DecoderMethod = "DecodeAddrMode5Operand";
   let ParserMatchClass = AddrMode5AsmOperand;
   let MIOperandInfo = (ops GPR:$base, i32imm);
 }
 
+def addrmode5 : AddrMode5 {
+   let PrintMethod = "printAddrMode5Operand<false>";
+}
+
+def addrmode5_pre : AddrMode5 {
+   let PrintMethod = "printAddrMode5Operand<true>";
+}
+
 // addrmode6 := reg with optional alignment
 //
 def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
@@ -1668,11 +1706,11 @@ def ATOMUMAX6432  : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
                               NoItinerary, []>;
 }
 
-def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
+def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary,
               "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
-  bits<8> imm;
-  let Inst{27-8} = 0b00110010000011110000;
-  let Inst{7-0} = imm;
+  bits<3> imm;
+  let Inst{27-3} = 0b0011001000001111000000000;
+  let Inst{2-0} = imm;
 }
 
 def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
@@ -2077,7 +2115,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
 
 // Secure Monitor Call is a system instruction.
 def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
-              []> {
+              []>, Requires<[IsARM, HasTrustZone]> {
   bits<4> opt;
   let Inst{23-4} = 0b01100000000000000111;
   let Inst{3-0} = opt;
@@ -2238,7 +2276,7 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
 multiclass AI2_ldridx<bit isByte, string opc,
                       InstrItinClass iii, InstrItinClass iir> {
   def _PRE_IMM  : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
-                      (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii,
+                      (ins addrmode_imm12_pre:$addr), IndexModePre, LdFrm, iii,
                       opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
     bits<17> addr;
     let Inst{25} = 0;
@@ -2275,6 +2313,7 @@ multiclass AI2_ldridx<bit isByte, string opc,
      let Inst{23} = offset{12};
      let Inst{19-16} = addr;
      let Inst{11-0} = offset{11-0};
+     let Inst{4} = 0;
 
     let DecoderMethod = "DecodeAddrMode2IdxInstruction";
    }
@@ -2307,7 +2346,7 @@ defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>;
 
 multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
   def _PRE  : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
-                        (ins addrmode3:$addr), IndexModePre,
+                        (ins addrmode3_pre:$addr), IndexModePre,
                         LdMiscFrm, itin,
                         opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
     bits<14> addr;
@@ -2341,7 +2380,7 @@ defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>;
 defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>;
 let hasExtraDefRegAllocReq = 1 in {
 def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
-                          (ins addrmode3:$addr), IndexModePre,
+                          (ins addrmode3_pre:$addr), IndexModePre,
                           LdMiscFrm, IIC_iLoad_d_ru,
                           "ldrd", "\t$Rt, $Rt2, $addr!",
                           "$addr.base = $Rn_wb", []> {
@@ -2497,7 +2536,7 @@ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
 multiclass AI2_stridx<bit isByte, string opc,
                       InstrItinClass iii, InstrItinClass iir> {
   def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
-                            (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre,
+                            (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre,
                             StFrm, iii,
                             opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
     bits<17> addr;
@@ -2619,7 +2658,7 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
 
 
 def STRH_PRE  : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
-                           (ins GPR:$Rt, addrmode3:$addr), IndexModePre,
+                           (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre,
                            StMiscFrm, IIC_iStore_bh_ru,
                            "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
   bits<14> addr;
@@ -2651,7 +2690,7 @@ def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
 
 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
 def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
-                          (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
+                          (ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr),
                           IndexModePre, StMiscFrm, IIC_iStore_d_ru,
                           "strd", "\t$Rt, $Rt2, $addr!",
                           "$addr.base = $Rn_wb", []> {
@@ -4426,7 +4465,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> {
     let Inst{7-0} = addr{7-0};
     let DecoderMethod = "DecodeCopMemInstruction";
   }
-  def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+  def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
                  asm, "\t$cop, $CRd, $addr!", IndexModePre> {
     bits<13> addr;
     bits<4> cop;
@@ -4497,7 +4536,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
     let Inst{7-0} = addr{7-0};
     let DecoderMethod = "DecodeCopMemInstruction";
   }
-  def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+  def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
                     asm, "\t$cop, $CRd, $addr!", IndexModePre> {
     bits<13> addr;
     bits<4> cop;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 0411ac4..896fd0f 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4316,6 +4316,24 @@ def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
 defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
                         IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
 
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+                   (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+                   (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+                   (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+                   (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+                   (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+                   (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+                   (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+                   (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+
 // Vector Bitwise Operations.
 
 def vnotd : PatFrag<(ops node:$in),
@@ -4889,6 +4907,29 @@ def  VABSfq   : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
                      "vabs", "f32",
                       v4f32, v4f32, fabs>;
 
+def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
+               (v2i32 (bitconvert (v8i8 (add DPR:$src,
+                                             (NEONvshrs DPR:$src, (i32 7))))))),
+          (VABSv8i8 DPR:$src)>;
+def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))),
+               (v2i32 (bitconvert (v4i16 (add DPR:$src,
+                                            (NEONvshrs DPR:$src, (i32 15))))))),
+          (VABSv4i16 DPR:$src)>;
+def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))),
+               (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))),
+          (VABSv2i32 DPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))),
+               (v4i32 (bitconvert (v16i8 (add QPR:$src,
+                                             (NEONvshrs QPR:$src, (i32 7))))))),
+          (VABSv16i8 QPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))),
+               (v4i32 (bitconvert (v8i16 (add QPR:$src,
+                                            (NEONvshrs QPR:$src, (i32 15))))))),
+          (VABSv8i16 QPR:$src)>;
+def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))),
+               (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))),
+          (VABSv4i32 QPR:$src)>;
+
 def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>;
 def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>;
 
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index c9d709e..4dacb86 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -150,7 +150,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
 def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
 def t2addrmode_imm12 : Operand<i32>,
                        ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
-  let PrintMethod = "printAddrModeImm12Operand";
+  let PrintMethod = "printAddrModeImm12Operand<false>";
   let EncoderMethod = "getAddrModeImm12OpValue";
   let DecoderMethod = "DecodeT2AddrModeImm12";
   let ParserMatchClass = t2addrmode_imm12_asmoperand;
@@ -3401,12 +3401,7 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
   bits<5> mode;
   bit M;
 
-  let Inst{31-27} = 0b11110;
-  let Inst{26}    = 0;
-  let Inst{25-20} = 0b111010;
-  let Inst{19-16} = 0b1111;
-  let Inst{15-14} = 0b10;
-  let Inst{12}    = 0;
+  let Inst{31-11} = 0b111100111010111110000;
   let Inst{10-9}  = imod;
   let Inst{8}     = M;
   let Inst{7-5}   = iflags;
@@ -3425,13 +3420,13 @@ let imod = 0, iflags = 0, M = 1 in
 
 // A6.3.4 Branches and miscellaneous control
 // Table A6-14 Change Processor State, and hint instructions
-def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{
-  bits<8> imm;
-  let Inst{31-8} = 0b111100111010111110000000;
-  let Inst{7-0} = imm;
+def t2HINT : T2I<(outs), (ins imm0_4:$imm), NoItinerary, "hint", "\t$imm",[]> {
+  bits<3> imm;
+  let Inst{31-3} = 0b11110011101011111000000000000;
+  let Inst{2-0} = imm;
 }
 
-def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>;
+def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_4:$imm, pred:$p)>;
 def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
 def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
 def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
@@ -3449,7 +3444,8 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
 
 // Secure Monitor Call is a system instruction.
 // Option = Inst{19-16}
-def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> {
+def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", 
+                []>, Requires<[IsThumb2, HasTrustZone]> {
   let Inst{31-27} = 0b11110;
   let Inst{26-20} = 0b1111111;
   let Inst{15-12} = 0b1000;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index b7ac5d5..c8ed576 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -87,53 +87,6 @@ namespace {
                       MachineBasicBlock::iterator i)
         : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
     };
-    class UnitRegsMap {
-    public:
-      UnitRegsMap(const TargetRegisterInfo* _TRI) : TRI(_TRI) {}
-      const SmallVector<unsigned, 4>& operator[](unsigned Reg) {
-        DenseMap<unsigned, SmallVector<unsigned, 4> >::iterator found =
-            Cache.find(Reg);
-        if (found != Cache.end())
-          return found->second;
-        else
-          return Cache.insert(std::make_pair(Reg, this->getUnitRegs(Reg)))
-                      .first->second;
-      }
-    private:
-      SmallVector<unsigned, 4> getUnitRegs(unsigned Reg) {
-        SmallVector<unsigned, 4> Res;
-
-        const TargetRegisterClass* TRC = TRI->getMinimalPhysRegClass(Reg);
-        if (TRC == &ARM::QPRRegClass) {
-          if (Reg > ARM::Q7) {
-            Res.push_back(TRI->getSubReg(Reg, ARM::dsub_0));
-            Res.push_back(TRI->getSubReg(Reg, ARM::dsub_1));
-            return Res;
-          }
-
-          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
-          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
-          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_2));
-          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_3));
-
-          return Res;
-        }
-
-        if (TRC == &ARM::DPRRegClass && Reg < ARM::D15) {
-          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_0));
-          Res.push_back(TRI->getSubReg(Reg, ARM::ssub_1));
-
-          return Res;
-        }
-
-        Res.push_back(Reg);
-
-        return Res;
-
-      }
-      const TargetRegisterInfo* TRI;
-      DenseMap<unsigned, SmallVector<unsigned, 4> > Cache;
-    };
     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
     typedef MemOpQueue::iterator MemOpQueueIter;
 
@@ -175,11 +128,6 @@ namespace {
                                    MachineBasicBlock::iterator MBBI,
                                    bool &Advance,
                                    MachineBasicBlock::iterator &I);
-    unsigned AddMemOp(MemOpQueue& MemOps,
-                      const MemOpQueueEntry newEntry,
-                      UnitRegsMap& UnitRegsInfo,
-                      SmallSet<unsigned, 4>& UsedUnitRegs,
-                      unsigned At = -1U);
     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
   };
@@ -1265,103 +1213,12 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
   return false;
 }
 
-/// AddMemOp - helper for ARMLoadStoreOpt::LoadStoreMultipleOpti.
-/// It adds store mem ops with simple push_back/insert method,
-/// without any additional logic.
-/// For load operation it does the next:
-/// 1. Adds new load operation into MemOp collection at "At" position.
-/// 2. Removes any "load" operations from MemOps, that changes "Reg" register
-/// contents, prior to "At".
-/// UnitRegsInfo - Map of type Map< Register, UnitRegisters-vector >
-/// UsedUnitRegs - set of unit-registers currently in use.
-/// At - position at which it would added, and prior which the clean-up
-/// should be made (for load operation).
-/// FIXME: The clean-up also should be made for store operations,
-/// but the memory address should be analyzed instead of unit registers.
-unsigned ARMLoadStoreOpt::AddMemOp(MemOpQueue& MemOps,
-                                   const MemOpQueueEntry NewEntry,
-                                   UnitRegsMap& UnitRegsInfo,
-                                   SmallSet<unsigned, 4>& UsedUnitRegs,
-                                   unsigned At) {
-  unsigned Cleaned = 0;
-
-  if (At == -1U) {
-    At = MemOps.size();
-    MemOps.push_back(NewEntry);
-  } else
-    MemOps.insert(&MemOps[At], NewEntry);
-
-  // FIXME:
-  // If operation is not load, leave it as is by now,
-  // So 0 overridden ops would cleaned in this case.
-  if (!NewEntry.MBBI->mayLoad())
-    return 0;
-
-  const SmallVector<unsigned, 4>& NewEntryUnitRegs = UnitRegsInfo[NewEntry.Reg];
-
-  bool FoundOverriddenLoads = false;
-
-  for (unsigned i = 0, e = NewEntryUnitRegs.size(); i != e; ++i)
-    if (UsedUnitRegs.count(NewEntryUnitRegs[i])) {
-      FoundOverriddenLoads = true;
-      break;
-    }
-
-  // If we detect that this register is used by load operations that are
-  // predecessors for the new one, remove them from MemOps then.
-  if (FoundOverriddenLoads) {
-    MemOpQueue UpdatedMemOps;
-
-    // Scan through MemOps entries.
-    for (unsigned i = 0; i != At; ++i) {
-      MemOpQueueEntry& MemOpEntry = MemOps[i];
-
-      // FIXME: Skip non-load operations by now.
-      if (!MemOpEntry.MBBI->mayLoad())
-        continue;
-
-      const SmallVector<unsigned, 4>& MemOpUnitRegs =
-          UnitRegsInfo[MemOpEntry.Reg];
-
-      // Lookup entry that loads contents into register used by new entry.
-      bool ReleaseThisEntry = false;
-      for (unsigned m = 0, em = MemOpUnitRegs.size(); m != em; ++m) {
-        if (std::find(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end(),
-                      MemOpUnitRegs[m]) != NewEntryUnitRegs.end()) {
-          ReleaseThisEntry = true;
-          ++Cleaned;
-          break;
-        }
-      }
-
-      if (ReleaseThisEntry) {
-        const SmallVector<unsigned, 4>& RelesedRegs = UnitRegsInfo[MemOpEntry.Reg];
-        for (unsigned r = 0, er = RelesedRegs.size(); r != er; ++r)
-          UsedUnitRegs.erase(RelesedRegs[r]);
-      } else
-        UpdatedMemOps.push_back(MemOpEntry);
-    }
-
-    // Keep anything without changes after At position.
-    for (unsigned i = At, e = MemOps.size(); i != e; ++i)
-      UpdatedMemOps.push_back(MemOps[i]);
-
-    MemOps.swap(UpdatedMemOps);
-  }
-
-  UsedUnitRegs.insert(NewEntryUnitRegs.begin(), NewEntryUnitRegs.end());
-
-  return Cleaned;
-}
-
 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
 /// ops of the same base and incrementing offset into LDM / STM ops.
 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   unsigned NumMerges = 0;
   unsigned NumMemOps = 0;
   MemOpQueue MemOps;
-  UnitRegsMap UnitRegsInfo(TRI);
-  SmallSet<unsigned, 4> UsedRegUnits;
   unsigned CurrBase = 0;
   int CurrOpc = -1;
   unsigned CurrSize = 0;
@@ -1401,6 +1258,22 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       // merge the ldr's so far, including this one. But don't try to
       // combine the following ldr(s).
       Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+
+      // Watch out for:
+      // r4 := ldr [r0, #8]
+      // r4 := ldr [r0, #4]
+      //
+      // The optimization may reorder the second ldr in front of the first
+      // ldr, which violates write after write(WAW) dependence. The same as
+      // str. Try to merge inst(s) already in MemOps.
+      bool Overlap = false;
+      for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
+        if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
+          Overlap = true;
+          break;
+        }
+      }
+
       if (CurrBase == 0 && !Clobber) {
         // Start of a new chain.
         CurrBase = Base;
@@ -1408,13 +1281,10 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
         CurrSize = Size;
         CurrPred = Pred;
         CurrPredReg = PredReg;
-
         MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
         ++NumMemOps;
-        const SmallVector<unsigned, 4>& EntryUnitRegs = UnitRegsInfo[Reg];
-        UsedRegUnits.insert(EntryUnitRegs.begin(), EntryUnitRegs.end());
         Advance = true;
-      } else {
+      } else if (!Overlap) {
         if (Clobber) {
           TryMerge = true;
           Advance = true;
@@ -1424,24 +1294,20 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
           // No need to match PredReg.
           // Continue adding to the queue.
           if (Offset > MemOps.back().Offset) {
-            unsigned OverridesCleaned =
-              AddMemOp(MemOps,
-                           MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI),
-                           UnitRegsInfo, UsedRegUnits) != 0;
-            NumMemOps += 1 - OverridesCleaned;
+            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+                                             Position, MBBI));
+            ++NumMemOps;
             Advance = true;
           } else {
-            for (unsigned I = 0; I != NumMemOps; ++I) {
-              if (Offset < MemOps[I].Offset) {
-                MemOpQueueEntry entry(Offset, Reg, isKill, Position, MBBI);
-                unsigned OverridesCleaned =
-                    AddMemOp(MemOps, entry, UnitRegsInfo,
-                                 UsedRegUnits, I) != 0;
-                NumMemOps += 1 - OverridesCleaned;
-
+            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+                 I != E; ++I) {
+              if (Offset < I->Offset) {
+                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+                                                 Position, MBBI));
+                ++NumMemOps;
                 Advance = true;
                 break;
-              } else if (Offset == MemOps[I].Offset) {
+              } else if (Offset == I->Offset) {
                 // Collision! This can't be merged!
                 break;
               }
@@ -1512,7 +1378,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       CurrPredReg = 0;
       if (NumMemOps) {
         MemOps.clear();
-        UsedRegUnits.clear();
         NumMemOps = 0;
       }
 
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 88d96c0..f4248fc 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -38,7 +38,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
 
   /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
   ///
-  unsigned VarArgsRegSaveSize;
+  unsigned ArgRegsSaveSize;
 
   /// HasStackFrame - True if this function has a stack frame. Set by
   /// processFunctionBeforeCalleeSavedScan().
@@ -117,7 +117,7 @@ public:
   ARMFunctionInfo() :
     isThumb(false),
     hasThumb2(false),
-    VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+    ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
     LRSpilledForFarJump(false),
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -129,7 +129,7 @@ public:
   explicit ARMFunctionInfo(MachineFunction &MF) :
     isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
     hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
-    VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+    ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
     LRSpilledForFarJump(false),
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -141,8 +141,8 @@ public:
   bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
   bool isThumb2Function() const { return isThumb && hasThumb2; }
 
-  unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
-  void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+  unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
+  void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
 
   bool hasStackFrame() const { return HasStackFrame; }
   void setHasStackFrame(bool s) { HasStackFrame = s; }
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 739300e..8653c46 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -91,6 +91,7 @@ void ARMSubtarget::initializeEnvironment() {
   HasRAS = false;
   HasMPExtension = false;
   FPOnlySP = false;
+  HasTrustZone = false;
   AllowsUnalignedMem = false;
   Thumb2DSP = false;
   UseNaClTrap = false;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 5b5ee6a..038eb76 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -148,6 +148,9 @@ protected:
   /// precision.
   bool FPOnlySP;
 
+  /// HasTrustZone - if true, processor supports TrustZone security extensions
+  bool HasTrustZone;
+
   /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
   /// accesses for some types.  For details, see
   /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
@@ -251,6 +254,7 @@ public:
   bool hasVMLxForwarding() const { return HasVMLxForwarding; }
   bool isFPBrccSlow() const { return SlowFPBrcc; }
   bool isFPOnlySP() const { return FPOnlySP; }
+  bool hasTrustZone() const { return HasTrustZone; }
   bool prefers32BitThumb() const { return Pref32BitThumb; }
   bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
   bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 1019b97..53ece66 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -125,6 +125,10 @@ public:
   unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
 
   unsigned getAddressComputationCost(Type *Val) const;
+
+  unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+                                  OperandValueKind Op1Info = OK_AnyValue,
+                                  OperandValueKind Op2Info = OK_AnyValue) const;
   /// @}
 };
 
@@ -223,9 +227,9 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
     { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
 
-    // Operations that we legalize using load/stores to the stack.
-    { ISD::TRUNCATE,    MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 },
-    { ISD::TRUNCATE,    MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 },
+    // Operations that we legalize using splitting.
+    { ISD::TRUNCATE,    MVT::v16i8, MVT::v16i32, 6 },
+    { ISD::TRUNCATE,    MVT::v8i8, MVT::v8i32, 3 },
 
     // Vector float <-> i32 conversions.
     { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
@@ -456,3 +460,67 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
 
   return LT.first * NEONShuffleTbl[Idx].Cost;
 }
+
+unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+                                        OperandValueKind Op2Info) const {
+
+  int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+  const unsigned FunctionCallDivCost = 20;
+  const unsigned ReciprocalDivCost = 10;
+  static const CostTblEntry<MVT> CostTbl[] = {
+    // Division.
+    // These costs are somewhat random. Choose a cost of 20 to indicate that
+    // vectorizing devision (added function call) is going to be very expensive.
+    // Double registers types.
+    { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+    { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+    { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
+    { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+    { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+    { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
+    { ISD::SDIV, MVT::v4i16,     ReciprocalDivCost},
+    { ISD::UDIV, MVT::v4i16,     ReciprocalDivCost},
+    { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
+    { ISD::SDIV, MVT::v8i8,      ReciprocalDivCost},
+    { ISD::UDIV, MVT::v8i8,      ReciprocalDivCost},
+    { ISD::SREM, MVT::v8i8,  8 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v8i8,  8 * FunctionCallDivCost},
+    // Quad register types.
+    { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+    { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+    { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
+    { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+    { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+    { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
+    { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+    { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+    { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
+    { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+    { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+    { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
+    { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
+    // Multiplication.
+  };
+
+  int Idx = -1;
+
+  if (ST->hasNEON())
+    Idx = CostTableLookup<MVT>(CostTbl, array_lengthof(CostTbl), ISDOpcode,
+                               LT.second);
+
+  if (Idx != -1)
+    return LT.first * CostTbl[Idx].Cost;
+
+
+  return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+                                                     Op2Info);
+}
+
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index ed7b7ec..1dd2953 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -86,11 +86,11 @@ class ARMAsmParser : public MCTargetAsmParser {
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
 
   bool Warning(SMLoc L, const Twine &Msg,
-               ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+               ArrayRef<SMRange> Ranges = None) {
     return Parser.Warning(L, Msg, Ranges);
   }
   bool Error(SMLoc L, const Twine &Msg,
-             ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+             ArrayRef<SMRange> Ranges = None) {
     return Parser.Error(L, Msg, Ranges);
   }
 
@@ -610,6 +610,13 @@ public:
     int64_t Value = CE->getValue();
     return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
   }
+  bool isImm0_4() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 5;
+  }
   bool isImm0_1020s4() const {
     if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -4745,6 +4752,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
       Mnemonic == "mls"   || Mnemonic == "smmls"  || Mnemonic == "vcls"  ||
       Mnemonic == "vmls"  || Mnemonic == "vnmls"  || Mnemonic == "vacge" ||
       Mnemonic == "vcge"  || Mnemonic == "vclt"   || Mnemonic == "vacgt" ||
+      Mnemonic == "vaclt" || Mnemonic == "vacle"  ||
       Mnemonic == "vcgt"  || Mnemonic == "vcle"   || Mnemonic == "smlal" ||
       Mnemonic == "umaal" || Mnemonic == "umlal"  || Mnemonic == "vabal" ||
       Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
@@ -5014,8 +5022,8 @@ static bool isDataTypeToken(StringRef Tok) {
 static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
   return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
 }
-
-static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features,
+                                 unsigned VariantID);
 /// Parse an arm instruction mnemonic followed by its operands.
 bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
                                     SMLoc NameLoc,
@@ -5026,7 +5034,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // MatchInstructionImpl(), but that's too late for aliases that include
   // any sort of suffix.
   unsigned AvailableFeatures = getAvailableFeatures();
-  applyMnemonicAliases(Name, AvailableFeatures);
+  unsigned AssemblerDialect = getParser().getAssemblerDialect();
+  applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect);
 
   // First check for the ARM-specific .req directive.
   if (Parser.getTok().is(AsmToken::Identifier) &&
@@ -7613,6 +7622,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return Error(IDLoc, "instruction variant requires ARMv6 or later");
   case Match_RequiresThumb2:
     return Error(IDLoc, "instruction variant requires Thumb2");
+  case Match_ImmRange0_4: {
+    SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+    if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    return Error(ErrorLoc, "immediate operand must be in the range [0,4]");
+  }
   case Match_ImmRange0_15: {
     SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
     if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 2e009e5..ac937f3 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -308,6 +308,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                 const void *Decoder);
 
 
 static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
@@ -1951,10 +1953,12 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
     Inst.addOperand(MCOperand::CreateImm(mode));
     if (iflags) S = MCDisassembler::SoftFail;
   } else {
-    // imod == '00' && M == '0' --> UNPREDICTABLE
-    Inst.setOpcode(ARM::t2CPS1p);
-    Inst.addOperand(MCOperand::CreateImm(mode));
-    S = MCDisassembler::SoftFail;
+    // imod == '00' && M == '0' --> this is a HINT instruction
+    int imm = fieldFromInstruction(Insn, 0, 8);
+    // HINT are defined only for immediate in [0..4]
+    if(imm > 4) return MCDisassembler::Fail;
+    Inst.setOpcode(ARM::t2HINT);
+    Inst.addOperand(MCOperand::CreateImm(imm));
   }
 
   return S;
@@ -1996,9 +2000,10 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
   imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
 
   if (Inst.getOpcode() == ARM::MOVTi16)
-    if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+    if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
       return MCDisassembler::Fail;
-  if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
 
   if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
@@ -3570,7 +3575,7 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
   unsigned Rn = fieldFromInstruction(Insn, 16, 4);
   unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
-  if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
 
   if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
@@ -4496,6 +4501,15 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
   return S;
 }
 
+static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
+                                 const void *Decoder)
+{
+  unsigned Imm = fieldFromInstruction(Insn, 0, 3);
+  if (Imm > 4) return MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::CreateImm(Imm));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 2afb20d..3bcd083 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -490,7 +490,8 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
 }
 
 void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
-                                                raw_ostream &O) {
+                                                raw_ostream &O,
+                                                bool AlwaysPrintImm0) {
   const MCOperand &MO1 = MI->getOperand(Op);
   const MCOperand &MO2 = MI->getOperand(Op+1);
   const MCOperand &MO3 = MI->getOperand(Op+2);
@@ -509,7 +510,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
   unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
   ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
 
-  if (ImmOffs || (op == ARM_AM::sub)) {
+  if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
     O << ", "
       << markup("<imm:")
       << "#"
@@ -520,6 +521,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
   O << ']' << markup(">");
 }
 
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(Op);
@@ -535,7 +537,7 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
     printAM3PostIndexOp(MI, Op, O);
     return;
   }
-  printAM3PreOrOffsetIndexOp(MI, Op, O);
+  printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0);
 }
 
 void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
@@ -593,6 +595,7 @@ void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
   O << ARM_AM::getAMSubModeStr(Mode);
 }
 
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
                                            raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
@@ -608,7 +611,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
 
   unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
   unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
-  if (ImmOffs || Op == ARM_AM::sub) {
+  if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
     O << ", "
       << markup("<imm:")
       << "#"
@@ -1022,6 +1025,7 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
                    ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
 }
 
+template <bool AlwaysPrintImm0>
 void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
                                                raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
@@ -1042,13 +1046,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
     OffImm = 0;
   if (isSub) {
     O << ", "
-      << markup("<imm:") 
+      << markup("<imm:")
       << "#-" << -OffImm
       << markup(">");
   }
-  else if (OffImm > 0) {
+  else if (AlwaysPrintImm0 || OffImm > 0) {
     O << ", "
-      << markup("<imm:") 
+      << markup("<imm:")
       << "#" << OffImm
       << markup(">");
   }
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index edff75d..344104e 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -47,12 +47,13 @@ public:
                                   raw_ostream &O);
   void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
                                    raw_ostream &O);
-
+  template <bool AlwaysPrintImm0>
   void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
                                    raw_ostream &O);
   void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O);
-  void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O);
+  void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
+                                  bool AlwaysPrintImm0);
   void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
                                raw_ostream &O);
   void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -60,6 +61,7 @@ public:
                                raw_ostream &O);
 
   void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  template <bool AlwaysPrintImm0>
   void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -91,6 +93,7 @@ public:
                                    raw_ostream &O);
 
   void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  template<bool AlwaysPrintImm0>
   void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
                                  raw_ostream &O);
   void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 418971d..6c3d247 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -13,7 +13,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMRegisterInfo.h"
 #include "ARMUnwindOp.h"
+#include "ARMUnwindOpAsm.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmBackend.h"
@@ -26,6 +28,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
@@ -33,11 +36,15 @@
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+static std::string GetAEABIUnwindPersonalityName(unsigned Index) {
+  assert(Index < NUM_PERSONALITY_INDEX && "Invalid personality index");
+  return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str();
+}
+
 namespace {
 
 /// Extend the generic ELFStreamer class so that it can emit mapping symbols at
@@ -57,8 +64,9 @@ public:
   ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
                  MCCodeEmitter *Emitter, bool IsThumb)
       : MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter),
-        IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0),
-        FnStart(0), Personality(0), CantUnwind(false) {}
+        IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) {
+    Reset();
+  }
 
   ~ARMELFStreamer() {}
 
@@ -75,14 +83,15 @@ public:
   virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
                            bool isVector);
 
-  virtual void ChangeSection(const MCSection *Section) {
+  virtual void ChangeSection(const MCSection *Section,
+                             const MCExpr *Subsection) {
     // We have to keep track of the mapping symbol state of any sections we
     // use. Each one should start off as EMS_None, which is provided as the
     // default constructor by DenseMap::lookup.
-    LastMappingSymbols[getPreviousSection()] = LastEMS;
+    LastMappingSymbols[getPreviousSection().first] = LastEMS;
     LastEMS = LastMappingSymbols.lookup(Section);
 
-    MCELFStreamer::ChangeSection(Section);
+    MCELFStreamer::ChangeSection(Section, Subsection);
   }
 
   /// This function is the one used to emit instruction data into the ELF
@@ -175,7 +184,7 @@ private:
     MCELF::SetType(SD, ELF::STT_NOTYPE);
     MCELF::SetBinding(SD, ELF::STB_LOCAL);
     SD.setExternal(false);
-    Symbol->setSection(*getCurrentSection());
+    Symbol->setSection(*getCurrentSection().first);
 
     const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
     Symbol->setVariableValue(Value);
@@ -194,6 +203,7 @@ private:
   void Reset();
 
   void EmitPersonalityFixup(StringRef Name);
+  void CollectUnwindOpcodes();
 
   void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
                          SectionKind Kind, const MCSymbol &Fn);
@@ -210,9 +220,16 @@ private:
   MCSymbol *ExTab;
   MCSymbol *FnStart;
   const MCSymbol *Personality;
+  uint32_t VFPRegSave; // Register mask for {d31-d0}
+  uint32_t RegSave; // Register mask for {r15-r0}
+  int64_t SPOffset;
+  uint16_t FPReg;
+  int64_t FPOffset;
+  bool UsedFP;
   bool CantUnwind;
+  UnwindOpcodeAssembler UnwindOpAsm;
 };
-}
+} // end anonymous namespace
 
 inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
                                               unsigned Type,
@@ -238,7 +255,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
   } else {
     EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind);
   }
-  assert(EHSection);
+  assert(EHSection && "Failed to get the required EH section");
 
   // Switch to .ARM.extab or .ARM.exidx section
   SwitchSection(EHSection);
@@ -262,10 +279,20 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
 }
 
 void ARMELFStreamer::Reset() {
+  const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
   ExTab = NULL;
   FnStart = NULL;
   Personality = NULL;
+  VFPRegSave = 0;
+  RegSave = 0;
+  FPReg = MRI.getEncodingValue(ARM::SP);
+  FPOffset = 0;
+  SPOffset = 0;
+  UsedFP = false;
   CantUnwind = false;
+
+  UnwindOpAsm.Reset();
 }
 
 // Add the R_ARM_NONE fixup at the same position
@@ -284,6 +311,18 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
                     MCFixup::getKindForSize(4, false)));
 }
 
+void ARMELFStreamer::CollectUnwindOpcodes() {
+  if (UsedFP) {
+    UnwindOpAsm.EmitSetFP(FPReg);
+    UnwindOpAsm.EmitSPOffset(-FPOffset);
+  } else {
+    UnwindOpAsm.EmitSPOffset(SPOffset);
+  }
+  UnwindOpAsm.EmitVFPRegSave(VFPRegSave);
+  UnwindOpAsm.EmitRegSave(RegSave);
+  UnwindOpAsm.Finalize();
+}
+
 void ARMELFStreamer::EmitFnStart() {
   assert(FnStart == 0);
   FnStart = getContext().CreateTempSymbol();
@@ -294,35 +333,29 @@ void ARMELFStreamer::EmitFnEnd() {
   assert(FnStart && ".fnstart must preceeds .fnend");
 
   // Emit unwind opcodes if there is no .handlerdata directive
-  int PersonalityIndex = -1;
   if (!ExTab && !CantUnwind) {
-    // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab.
-    SwitchToExTabSection(*FnStart);
-
-    // Create .ARM.extab label for offset in .ARM.exidx
-    ExTab = getContext().CreateTempSymbol();
-    EmitLabel(ExTab);
-
-    PersonalityIndex = 1;
-
-    uint32_t Entry = 0;
-    uint32_t NumExtraEntryWords = 0;
-    Entry |= NumExtraEntryWords << 24;
-    Entry |= (EHT_COMPACT | PersonalityIndex) << 16;
-
-    // TODO: This should be generated according to .save, .vsave, .setfp
-    // directives.  Currently, we are simply generating FINISH opcode.
-    Entry |= UNWIND_OPCODE_FINISH << 8;
-    Entry |= UNWIND_OPCODE_FINISH;
-
-    EmitIntValue(Entry, 4, 0);
+    CollectUnwindOpcodes();
+
+    unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
+    if (PersonalityIndex == AEABI_UNWIND_CPP_PR1 ||
+        PersonalityIndex == AEABI_UNWIND_CPP_PR2) {
+      // For the __aeabi_unwind_cpp_pr1 and __aeabi_unwind_cpp_pr2, we have to
+      // emit the unwind opcodes in the corresponding ".ARM.extab" section, and
+      // then emit a reference to these unwind opcodes in the second word of
+      // the exception index table entry.
+      SwitchToExTabSection(*FnStart);
+      ExTab = getContext().CreateTempSymbol();
+      EmitLabel(ExTab);
+      EmitBytes(UnwindOpAsm.data(), 0);
+    }
   }
 
   // Emit the exception index table entry
   SwitchToExIdxSection(*FnStart);
 
-  if (PersonalityIndex == 1)
-    EmitPersonalityFixup("__aeabi_unwind_cpp_pr1");
+  unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
+  if (PersonalityIndex < NUM_PERSONALITY_INDEX)
+    EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex));
 
   const MCSymbolRefExpr *FnStartRef =
     MCSymbolRefExpr::Create(FnStart,
@@ -333,12 +366,22 @@ void ARMELFStreamer::EmitFnEnd() {
 
   if (CantUnwind) {
     EmitIntValue(EXIDX_CANTUNWIND, 4, 0);
-  } else {
+  } else if (ExTab) {
+    // Emit a reference to the unwind opcodes in the ".ARM.extab" section.
     const MCSymbolRefExpr *ExTabEntryRef =
       MCSymbolRefExpr::Create(ExTab,
                               MCSymbolRefExpr::VK_ARM_PREL31,
                               getContext());
     EmitValue(ExTabEntryRef, 4, 0);
+  } else {
+    // For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in
+    // the second word of exception index table entry.  The size of the unwind
+    // opcodes should always be 4 bytes.
+    assert(PersonalityIndex == AEABI_UNWIND_CPP_PR0 &&
+           "Compact model must use __aeabi_cpp_unwind_pr0 as personality");
+    assert(UnwindOpAsm.size() == 4u &&
+           "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4");
+    EmitBytes(UnwindOpAsm.data(), 0);
   }
 
   // Clean exception handling frame information
@@ -368,36 +411,54 @@ void ARMELFStreamer::EmitHandlerData() {
   EmitValue(PersonalityRef, 4, 0);
 
   // Emit unwind opcodes
-  uint32_t Entry = 0;
-  uint32_t NumExtraEntryWords = 0;
-
-  // TODO: This should be generated according to .save, .vsave, .setfp
-  // directives.  Currently, we are simply generating FINISH opcode.
-  Entry |= NumExtraEntryWords << 24;
-  Entry |= UNWIND_OPCODE_FINISH << 16;
-  Entry |= UNWIND_OPCODE_FINISH << 8;
-  Entry |= UNWIND_OPCODE_FINISH;
-
-  EmitIntValue(Entry, 4, 0);
+  CollectUnwindOpcodes();
+  EmitBytes(UnwindOpAsm.data(), 0);
 }
 
 void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
   Personality = Per;
+  UnwindOpAsm.setPersonality(Per);
 }
 
-void ARMELFStreamer::EmitSetFP(unsigned NewFpReg,
-                               unsigned NewSpReg,
+void ARMELFStreamer::EmitSetFP(unsigned NewFPReg,
+                               unsigned NewSPReg,
                                int64_t Offset) {
-  // TODO: Not implemented
+  assert(SPOffset == 0 &&
+         "Current implementation assumes .setfp precedes .pad");
+
+  const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
+  uint16_t NewFPRegEncVal = MRI.getEncodingValue(NewFPReg);
+#ifndef NDEBUG
+  uint16_t NewSPRegEncVal = MRI.getEncodingValue(NewSPReg);
+#endif
+
+  assert((NewSPReg == ARM::SP || NewSPRegEncVal == FPReg) &&
+         "the operand of .setfp directive should be either $sp or $fp");
+
+  UsedFP = true;
+  FPReg = NewFPRegEncVal;
+  FPOffset = Offset;
 }
 
 void ARMELFStreamer::EmitPad(int64_t Offset) {
-  // TODO: Not implemented
+  SPOffset += Offset;
 }
 
 void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
                                  bool IsVector) {
-  // TODO: Not implemented
+  const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
+#ifndef NDEBUG
+  unsigned Max = IsVector ? 32 : 16;
+#endif
+  uint32_t &RegMask = IsVector ? VFPRegSave : RegSave;
+
+  for (size_t i = 0; i < RegList.size(); ++i) {
+    unsigned Reg = MRI.getEncodingValue(RegList[i]);
+    assert(Reg < Max && "Register encoded value out of range");
+    RegMask |= 1u << Reg;
+  }
 }
 
 namespace llvm {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
index dad5576..fa4add6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
@@ -107,6 +107,19 @@ namespace llvm {
     UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0
   };
 
+  /// ARM-defined Personality Routine Index
+  enum ARMPersonalityRoutineIndex {
+    // To make the exception handling table become more compact, ARM defined
+    // several personality routines in EHABI.  There are 3 different
+    // personality routines in ARM EHABI currently.  It is possible to have 16
+    // pre-defined personality routines at most.
+    AEABI_UNWIND_CPP_PR0 = 0,
+    AEABI_UNWIND_CPP_PR1 = 1,
+    AEABI_UNWIND_CPP_PR2 = 2,
+
+    NUM_PERSONALITY_INDEX
+  };
+
 }
 
 #endif // ARM_UNWIND_OP_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
new file mode 100644
index 0000000..191db69
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -0,0 +1,198 @@
+//===-- ARMUnwindOpAsm.cpp - ARM Unwind Opcodes Assembler -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMUnwindOpAsm.h"
+
+#include "ARMUnwindOp.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+
+void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
+  if (RegSave == 0u)
+    return;
+
+  // One byte opcode to save register r14 and r11-r4
+  if (RegSave & (1u << 4)) {
+    // The one byte opcode will always save r4, thus we can't use the one byte
+    // opcode when r4 is not in .save directive.
+
+    // Compute the consecutive registers from r4 to r11.
+    uint32_t Range = 0;
+    uint32_t Mask = (1u << 4);
+    for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) {
+      if ((RegSave & Bit) == 0u)
+        break;
+      ++Range;
+      Mask |= Bit;
+    }
+
+    // Emit this opcode when the mask covers every registers.
+    uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask);
+    if (UnmaskedReg == 0u) {
+      // Pop r[4 : (4 + n)]
+      Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range);
+      RegSave &= 0x000fu;
+    } else if (UnmaskedReg == (1u << 14)) {
+      // Pop r[14] + r[4 : (4 + n)]
+      Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range);
+      RegSave &= 0x000fu;
+    }
+  }
+
+  // Two bytes opcode to save register r15-r4
+  if ((RegSave & 0xfff0u) != 0) {
+    uint32_t Op = UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4);
+    Ops.push_back(static_cast<uint8_t>(Op >> 8));
+    Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+  }
+
+  // Opcode to save register r3-r0
+  if ((RegSave & 0x000fu) != 0) {
+    uint32_t Op = UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu);
+    Ops.push_back(static_cast<uint8_t>(Op >> 8));
+    Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+  }
+}
+
+/// Emit unwind opcodes for .vsave directives
+void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
+  size_t i = 32;
+
+  while (i > 16) {
+    uint32_t Bit = 1u << (i - 1);
+    if ((VFPRegSave & Bit) == 0u) {
+      --i;
+      continue;
+    }
+
+    uint32_t Range = 0;
+
+    --i;
+    Bit >>= 1;
+
+    while (i > 16 && (VFPRegSave & Bit)) {
+      --i;
+      ++Range;
+      Bit >>= 1;
+    }
+
+    uint32_t Op =
+        UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | ((i - 16) << 4) | Range;
+    Ops.push_back(static_cast<uint8_t>(Op >> 8));
+    Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+  }
+
+  while (i > 0) {
+    uint32_t Bit = 1u << (i - 1);
+    if ((VFPRegSave & Bit) == 0u) {
+      --i;
+      continue;
+    }
+
+    uint32_t Range = 0;
+
+    --i;
+    Bit >>= 1;
+
+    while (i > 0 && (VFPRegSave & Bit)) {
+      --i;
+      ++Range;
+      Bit >>= 1;
+    }
+
+    uint32_t Op = UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range;
+    Ops.push_back(static_cast<uint8_t>(Op >> 8));
+    Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+  }
+}
+
+/// Emit unwind opcodes for .setfp directives
+void UnwindOpcodeAssembler::EmitSetFP(uint16_t FPReg) {
+  Ops.push_back(UNWIND_OPCODE_SET_VSP | FPReg);
+}
+
+/// Emit unwind opcodes to update stack pointer
+void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) {
+  if (Offset > 0x200) {
+    uint8_t Buff[10];
+    size_t Size = encodeULEB128((Offset - 0x204) >> 2, Buff);
+    Ops.push_back(UNWIND_OPCODE_INC_VSP_ULEB128);
+    Ops.append(Buff, Buff + Size);
+  } else if (Offset > 0) {
+    if (Offset > 0x100) {
+      Ops.push_back(UNWIND_OPCODE_INC_VSP | 0x3fu);
+      Offset -= 0x100;
+    }
+    Ops.push_back(UNWIND_OPCODE_INC_VSP |
+                  static_cast<uint8_t>((Offset - 4) >> 2));
+  } else if (Offset < 0) {
+    while (Offset < -0x100) {
+      Ops.push_back(UNWIND_OPCODE_DEC_VSP | 0x3fu);
+      Offset += 0x100;
+    }
+    Ops.push_back(UNWIND_OPCODE_DEC_VSP |
+                  static_cast<uint8_t>(((-Offset) - 4) >> 2));
+  }
+}
+
+void UnwindOpcodeAssembler::AddOpcodeSizePrefix(size_t Pos) {
+  size_t SizeInWords = (size() + 3) / 4;
+  assert(SizeInWords <= 0x100u &&
+         "Only 256 additional words are allowed for unwind opcodes");
+  Ops[Pos] = static_cast<uint8_t>(SizeInWords - 1);
+}
+
+void UnwindOpcodeAssembler::AddPersonalityIndexPrefix(size_t Pos, unsigned PI) {
+  assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix");
+  Ops[Pos] = EHT_COMPACT | PI;
+}
+
+void UnwindOpcodeAssembler::EmitFinishOpcodes() {
+  for (size_t i = (0x4u - (size() & 0x3u)) & 0x3u; i > 0; --i)
+    Ops.push_back(UNWIND_OPCODE_FINISH);
+}
+
+void UnwindOpcodeAssembler::Finalize() {
+  if (HasPersonality) {
+    // Personality specified by .personality directive
+    Offset = 1;
+    AddOpcodeSizePrefix(1);
+  } else {
+    if (getOpcodeSize() <= 3) {
+      // __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ]
+      Offset = 1;
+      PersonalityIndex = AEABI_UNWIND_CPP_PR0;
+      AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+    } else {
+      // __aeabi_unwind_cpp_pr1: [ 0x81 , SIZE , OP1 , OP2 , ... ]
+      Offset = 0;
+      PersonalityIndex = AEABI_UNWIND_CPP_PR1;
+      AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+      AddOpcodeSizePrefix(1);
+    }
+  }
+
+  // Emit the padding finish opcodes if the size() is not multiple of 4.
+  EmitFinishOpcodes();
+
+  // Swap the byte order
+  uint8_t *Ptr = Ops.begin() + Offset;
+  assert(size() % 4 == 0 && "Final unwind opcodes should align to 4");
+  for (size_t i = 0, n = size(); i < n; i += 4) {
+    std::swap(Ptr[i], Ptr[i + 3]);
+    std::swap(Ptr[i + 1], Ptr[i + 2]);
+  }
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
new file mode 100644
index 0000000..f6ecaeb
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -0,0 +1,114 @@
+//===-- ARMUnwindOpAsm.h - ARM Unwind Opcodes Assembler ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_UNWIND_OP_ASM_H
+#define ARM_UNWIND_OP_ASM_H
+
+#include "ARMUnwindOp.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MCSymbol;
+
+class UnwindOpcodeAssembler {
+private:
+  llvm::SmallVector<uint8_t, 8> Ops;
+
+  unsigned Offset;
+  unsigned PersonalityIndex;
+  bool HasPersonality;
+
+  enum {
+    // The number of bytes to be preserved for the size and personality index
+    // prefix of unwind opcodes.
+    NUM_PRESERVED_PREFIX_BUF = 2
+  };
+
+public:
+  UnwindOpcodeAssembler()
+      : Ops(NUM_PRESERVED_PREFIX_BUF), Offset(NUM_PRESERVED_PREFIX_BUF),
+        PersonalityIndex(NUM_PERSONALITY_INDEX), HasPersonality(0) {
+  }
+
+  /// Reset the unwind opcode assembler.
+  void Reset() {
+    Ops.resize(NUM_PRESERVED_PREFIX_BUF);
+    Offset = NUM_PRESERVED_PREFIX_BUF;
+    PersonalityIndex = NUM_PERSONALITY_INDEX;
+    HasPersonality = 0;
+  }
+
+  /// Get the size of the payload (including the size byte)
+  size_t size() const {
+    return Ops.size() - Offset;
+  }
+
+  /// Get the beginning of the payload
+  const uint8_t *begin() const {
+    return Ops.begin() + Offset;
+  }
+
+  /// Get the payload
+  StringRef data() const {
+    return StringRef(reinterpret_cast<const char *>(begin()), size());
+  }
+
+  /// Set the personality index
+  void setPersonality(const MCSymbol *Per) {
+    HasPersonality = 1;
+  }
+
+  /// Get the personality index
+  unsigned getPersonalityIndex() const {
+    return PersonalityIndex;
+  }
+
+  /// Emit unwind opcodes for .save directives
+  void EmitRegSave(uint32_t RegSave);
+
+  /// Emit unwind opcodes for .vsave directives
+  void EmitVFPRegSave(uint32_t VFPRegSave);
+
+  /// Emit unwind opcodes for .setfp directives
+  void EmitSetFP(uint16_t FPReg);
+
+  /// Emit unwind opcodes to update stack pointer
+  void EmitSPOffset(int64_t Offset);
+
+  /// Finalize the unwind opcode sequence for EmitBytes()
+  void Finalize();
+
+private:
+  /// Get the size of the opcodes in bytes.
+  size_t getOpcodeSize() const {
+    return Ops.size() - NUM_PRESERVED_PREFIX_BUF;
+  }
+
+  /// Add the length prefix to the payload
+  void AddOpcodeSizePrefix(size_t Pos);
+
+  /// Add personality index prefix in some compact format
+  void AddPersonalityIndexPrefix(size_t Pos, unsigned PersonalityIndex);
+
+  /// Fill the words with finish opcode if it is not aligned
+  void EmitFinishOpcodes();
+};
+
+} // namespace llvm
+
+#endif // ARM_UNWIND_OP_ASM_H
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index e17eb4d..a7ac5ca 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_library(LLVMARMDesc
   ARMMCTargetDesc.cpp
   ARMMachObjectWriter.cpp
   ARMELFObjectWriter.cpp
+  ARMUnwindOpAsm.cpp
   )
 add_dependencies(LLVMARMDesc ARMCommonTableGen)
 
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 2c3388c..1e2a8b0 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -88,7 +88,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
   const Thumb1InstrInfo &TII =
     *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
 
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
   unsigned NumBytes = MFI->getStackSize();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -104,8 +104,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
   unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
   int FramePtrSpillFI = 0;
 
-  if (VARegSaveSize)
-    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize,
+  if (ArgRegsSaveSize)
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
                  MachineInstr::FrameSetup);
 
   if (!AFI->hasStackFrame()) {
@@ -249,7 +249,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
   const Thumb1InstrInfo &TII =
     *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
 
-  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
   int NumBytes = (int)MFI->getStackSize();
   const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -300,7 +300,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
     }
   }
 
-  if (VARegSaveSize) {
+  if (ArgRegsSaveSize) {
     // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
     // to LR, and we can't pop the value directly to the PC since
     // we need to update the SP after popping the value. Therefore, we
@@ -313,7 +313,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
     AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
       .addReg(ARM::R3, RegState::Define);
 
-    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
 
     MachineInstrBuilder MIB =
       BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
@@ -376,7 +376,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
 
-  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+  bool isVarArg = AFI->getArgRegsSaveSize() > 0;
   DebugLoc DL = MI->getDebugLoc();
   MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
   AddDefaultPred(MIB);
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 67e8ec7..a1b48c2 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/CommandLine.h"
 
@@ -126,25 +127,41 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
                     const TargetRegisterClass *RC,
                     const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+                            MachineMemOperand::MOStore,
+                            MFI.getObjectSize(FI),
+                            MFI.getObjectAlignment(FI));
+
   if (RC == &ARM::GPRRegClass   || RC == &ARM::tGPRRegClass ||
       RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
       RC == &ARM::GPRnopcRegClass) {
-    DebugLoc DL;
-    if (I != MBB.end()) DL = I->getDebugLoc();
-
-    MachineFunction &MF = *MBB.getParent();
-    MachineFrameInfo &MFI = *MF.getFrameInfo();
-    MachineMemOperand *MMO =
-      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                              MachineMemOperand::MOStore,
-                              MFI.getObjectSize(FI),
-                              MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
     return;
   }
 
+  if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+    // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for
+    // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+    // otherwise).
+    MachineRegisterInfo *MRI = &MF.getRegInfo();
+    MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+    MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
+    AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+    AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+    MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+    AddDefaultPred(MIB);
+    return;
+  }
+
   ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
 }
 
@@ -153,24 +170,42 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC,
                      const TargetRegisterInfo *TRI) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+                            MachineMemOperand::MOLoad,
+                            MFI.getObjectSize(FI),
+                            MFI.getObjectAlignment(FI));
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
   if (RC == &ARM::GPRRegClass   || RC == &ARM::tGPRRegClass ||
       RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
       RC == &ARM::GPRnopcRegClass) {
-    DebugLoc DL;
-    if (I != MBB.end()) DL = I->getDebugLoc();
-
-    MachineFunction &MF = *MBB.getParent();
-    MachineFrameInfo &MFI = *MF.getFrameInfo();
-    MachineMemOperand *MMO =
-      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                              MachineMemOperand::MOLoad,
-                              MFI.getObjectSize(FI),
-                              MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
     return;
   }
 
+  if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+    // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for
+    // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+    // otherwise).
+    MachineRegisterInfo *MRI = &MF.getRegInfo();
+    MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+    MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
+    AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+    AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+    MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+    AddDefaultPred(MIB);
+
+    if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+      MIB.addReg(DestReg, RegState::ImplicitDefine);
+    return;
+  }
+
   ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
 }
 
@@ -514,6 +549,15 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
         Offset = -Offset;
         isSub = true;
       }
+    } else if (AddrMode == ARMII::AddrModeT2_i8s4) {
+      Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
+      NumBits = 8;
+      // MCInst operand has already scaled value.
+      Scale = 1;
+      if (Offset < 0) {
+        isSub = true;
+        Offset = -Offset;
+      }
     } else {
       llvm_unreachable("Unsupported addressing mode!");
     }
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index d50f5d9..4795aae 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -926,13 +926,11 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
   HighLatencyCPSR = false;
 
   // Check predecessors for the latest CPSRDef.
-  bool HasBackEdges = false;
   for (MachineBasicBlock::pred_iterator
        I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) {
     const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()];
     if (!PInfo.Visited) {
       // Since blocks are visited in RPO, this must be a back-edge.
-      HasBackEdges = true;
       continue;
     }
     if (PInfo.HighLatencyCPSR) {
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index dfbefc8..a9b00a2 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -29,26 +29,25 @@ namespace llvm {
   class HexagonTargetMachine;
   class raw_ostream;
 
-  FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+  FunctionPass *createHexagonISelDag(const HexagonTargetMachine &TM,
                                      CodeGenOpt::Level OptLevel);
-  FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM);
-  FunctionPass *createHexagonFPMoverPass(TargetMachine &TM);
-  FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM);
-  FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM);
-
-  FunctionPass *createHexagonSplitTFRCondSets(HexagonTargetMachine &TM);
-  FunctionPass *createHexagonExpandPredSpillCode(HexagonTargetMachine &TM);
+  FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM);
+  FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM);
+  FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
+  FunctionPass *createHexagonCFGOptimizer(const HexagonTargetMachine &TM);
 
+  FunctionPass *createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM);
+  FunctionPass *createHexagonExpandPredSpillCode(
+                      const HexagonTargetMachine &TM);
   FunctionPass *createHexagonHardwareLoops();
   FunctionPass *createHexagonPeephole();
   FunctionPass *createHexagonFixupHwLoops();
   FunctionPass *createHexagonPacketizer();
   FunctionPass *createHexagonNewValueJump();
 
-
 /* TODO: object output.
   MCCodeEmitter *createHexagonMCCodeEmitter(const Target &,
-                                            TargetMachine &TM,
+                                            const TargetMachine &TM,
                                             MCContext &Ctx);
 */
 /* TODO: assembler input.
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 8a5ee40..9b3a643 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -84,12 +84,36 @@ def getPredOpcode : InstrMapping {
 }
 
 //===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicate-true instructions with their
+// predicate-false forms
+//
+def getFalsePredOpcode : InstrMapping {
+  let FilterClass = "PredRel";
+  let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"];
+  let ColFields = ["PredSense"];
+  let KeyCol = ["true"];
+  let ValueCols = [["false"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicate-false instructions with their
+// predicate-true forms
+//
+def getTruePredOpcode : InstrMapping {
+  let FilterClass = "PredRel";
+  let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"];
+  let ColFields = ["PredSense"];
+  let KeyCol = ["false"];
+  let ValueCols = [["true"]];
+}
+
+//===----------------------------------------------------------------------===//
 // Generate mapping table to relate predicated instructions with their .new
 // format.
 //
 def getPredNewOpcode : InstrMapping {
   let FilterClass = "PredNewRel";
-  let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
+  let RowFields = ["BaseOpcode", "PredSense", "isNVStore", "isBrTaken"];
   let ColFields = ["PNewValue"];
   let KeyCol = [""];
   let ValueCols = [["new"]];
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index d4078ad..8597f11 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -26,21 +26,27 @@
 
 using namespace llvm;
 
+namespace llvm {
+  void initializeHexagonCFGOptimizerPass(PassRegistry&);
+}
+
+
 namespace {
 
 class HexagonCFGOptimizer : public MachineFunctionPass {
 
 private:
-  HexagonTargetMachine& QTM;
+  const HexagonTargetMachine& QTM;
   const HexagonSubtarget &QST;
 
   void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*);
 
  public:
   static char ID;
-  HexagonCFGOptimizer(HexagonTargetMachine& TM) : MachineFunctionPass(ID),
-                                                  QTM(TM),
-                                                  QST(*TM.getSubtargetImpl()) {}
+  HexagonCFGOptimizer(const HexagonTargetMachine& TM)
+    : MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
+    initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry());
+  }
 
   const char *getPassName() const {
     return "Hexagon CFG Optimizer";
@@ -52,8 +58,8 @@ private:
 char HexagonCFGOptimizer::ID = 0;
 
 static bool IsConditionalBranch(int Opc) {
-  return (Opc == Hexagon::JMP_c) || (Opc == Hexagon::JMP_cNot)
-    || (Opc == Hexagon::JMP_cdnPt) || (Opc == Hexagon::JMP_cdnNotPt);
+  return (Opc == Hexagon::JMP_t) || (Opc == Hexagon::JMP_f)
+    || (Opc == Hexagon::JMP_tnew_t) || (Opc == Hexagon::JMP_fnew_t);
 }
 
 
@@ -68,20 +74,20 @@ HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
   const HexagonInstrInfo *QII = QTM.getInstrInfo();
   int NewOpcode = 0;
   switch(MI->getOpcode()) {
-  case Hexagon::JMP_c:
-    NewOpcode = Hexagon::JMP_cNot;
+  case Hexagon::JMP_t:
+    NewOpcode = Hexagon::JMP_f;
     break;
 
-  case Hexagon::JMP_cNot:
-    NewOpcode = Hexagon::JMP_c;
+  case Hexagon::JMP_f:
+    NewOpcode = Hexagon::JMP_t;
     break;
 
-  case Hexagon::JMP_cdnPt:
-    NewOpcode = Hexagon::JMP_cdnNotPt;
+  case Hexagon::JMP_tnew_t:
+    NewOpcode = Hexagon::JMP_fnew_t;
     break;
 
-  case Hexagon::JMP_cdnNotPt:
-    NewOpcode = Hexagon::JMP_cdnPt;
+  case Hexagon::JMP_fnew_t:
+    NewOpcode = Hexagon::JMP_tnew_t;
     break;
 
   default:
@@ -156,8 +162,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
         // The target of the unconditional branch must be JumpAroundTarget.
         // TODO: If not, we should not invert the unconditional branch.
         MachineBasicBlock* CondBranchTarget = NULL;
-        if ((MI->getOpcode() == Hexagon::JMP_c) ||
-            (MI->getOpcode() == Hexagon::JMP_cNot)) {
+        if ((MI->getOpcode() == Hexagon::JMP_t) ||
+            (MI->getOpcode() == Hexagon::JMP_f)) {
           CondBranchTarget = MI->getOperand(1).getMBB();
         }
 
@@ -231,6 +237,16 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
 
-FunctionPass *llvm::createHexagonCFGOptimizer(HexagonTargetMachine &TM) {
+static void initializePassOnce(PassRegistry &Registry) {
+  PassInfo *PI = new PassInfo("Hexagon CFG Optimizer", "hexagon-cfg",
+                              &HexagonCFGOptimizer::ID, 0, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonCFGOptimizerPass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+FunctionPass *llvm::createHexagonCFGOptimizer(const HexagonTargetMachine &TM) {
   return new HexagonCFGOptimizer(TM);
 }
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index 0814421..8a5991f 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -41,16 +41,24 @@
 using namespace llvm;
 
 
+namespace llvm {
+  void initializeHexagonExpandPredSpillCodePass(PassRegistry&);
+}
+
+
 namespace {
 
 class HexagonExpandPredSpillCode : public MachineFunctionPass {
-    HexagonTargetMachine& QTM;
+    const HexagonTargetMachine& QTM;
     const HexagonSubtarget &QST;
 
  public:
     static char ID;
-    HexagonExpandPredSpillCode(HexagonTargetMachine& TM) :
-      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+    HexagonExpandPredSpillCode(const HexagonTargetMachine& TM) :
+      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
+      PassRegistry &Registry = *PassRegistry::getPassRegistry();
+      initializeHexagonExpandPredSpillCodePass(Registry);
+    }
 
     const char *getPassName() const {
       return "Hexagon Expand Predicate Spill Code";
@@ -175,6 +183,19 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
 
-FunctionPass *llvm::createHexagonExpandPredSpillCode(HexagonTargetMachine &TM) {
+static void initializePassOnce(PassRegistry &Registry) {
+  const char *Name = "Hexagon Expand Predicate Spill Code";
+  PassInfo *PI = new PassInfo(Name, "hexagon-spill-pred",
+                              &HexagonExpandPredSpillCode::ID,
+                              0, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonExpandPredSpillCodePass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+FunctionPass*
+llvm::createHexagonExpandPredSpillCode(const HexagonTargetMachine &TM) {
   return new HexagonExpandPredSpillCode(TM);
 }
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index d6a9329..de993ee 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -189,7 +189,7 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
 
     // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
     // versions.
-    if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPR
+    if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPret
                         && !DisableDeallocRet) {
       // Remove jumpr node.
       MBB.erase(MBBI);
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 1786624..d002788 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -541,12 +541,6 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
     case Hexagon::CMPEQrr:
       Cmp = !Negated ? Comparison::EQ : Comparison::NE;
       break;
-    case Hexagon::CMPLTrr:
-      Cmp = !Negated ? Comparison::LTs : Comparison::GEs;
-      break;
-    case Hexagon::CMPLTUrr:
-      Cmp = !Negated ? Comparison::LTu : Comparison::GEu;
-      break;
     case Hexagon::CMPGTUri:
     case Hexagon::CMPGTUrr:
       Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
@@ -1125,8 +1119,8 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
   // The loop ends with either:
   //  - a conditional branch followed by an unconditional branch, or
   //  - a conditional branch to the loop start.
-  if (LastI->getOpcode() == Hexagon::JMP_c ||
-      LastI->getOpcode() == Hexagon::JMP_cNot) {
+  if (LastI->getOpcode() == Hexagon::JMP_t ||
+      LastI->getOpcode() == Hexagon::JMP_f) {
     // Delete one and change/add an uncond. branch to out of the loop.
     MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
     LastI = LastMBB->erase(LastI);
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 8fc9ba1..54ca2c9 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -49,11 +49,11 @@ class HexagonDAGToDAGISel : public SelectionDAGISel {
   const HexagonSubtarget &Subtarget;
 
   // Keep a reference to HexagonTargetMachine.
-  HexagonTargetMachine& TM;
+  const HexagonTargetMachine& TM;
   const HexagonInstrInfo *TII;
   DenseMap<const GlobalValue *, unsigned> GlobalAddressUseCountMap;
 public:
-  explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine,
+  explicit HexagonDAGToDAGISel(const HexagonTargetMachine &targetmachine,
                                CodeGenOpt::Level OptLevel)
     : SelectionDAGISel(targetmachine, OptLevel),
       Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
@@ -160,6 +160,17 @@ inline SDValue XformU7ToU7M1Imm(signed Imm) {
   return CurDAG->getTargetConstant(Imm - 1, MVT::i8);
 }
 
+// XformS8ToS8M1Imm - Return a target constant decremented by 1.
+inline SDValue XformSToSM1Imm(signed Imm) {
+  return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
+}
+
+// XformU8ToU8M1Imm - Return a target constant decremented by 1.
+inline SDValue XformUToUM1Imm(unsigned Imm) {
+  assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
+  return CurDAG->getTargetConstant(Imm - 1, MVT::i32);
+}
+
 // Include the pieces autogenerated from the target description.
 #include "HexagonGenDAGISel.inc"
 };
@@ -169,7 +180,7 @@ inline SDValue XformU7ToU7M1Imm(signed Imm) {
 /// createHexagonISelDag - This pass converts a legalized DAG into a
 /// Hexagon-specific DAG, ready for instruction scheduling.
 ///
-FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM,
+FunctionPass *llvm::createHexagonISelDag(const HexagonTargetMachine &TM,
                                          CodeGenOpt::Level OptLevel) {
   return new HexagonDAGToDAGISel(TM, OptLevel);
 }
@@ -697,7 +708,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
 
     // Build post increment store.
     SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
-                                            MVT::Other, Ops, 4);
+                                            MVT::Other, Ops);
     MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
     MemOp[0] = ST->getMemOperand();
     cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
@@ -723,8 +734,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
 
   // Build regular store.
   SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
-  SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops,
-                                            4);
+  SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   // Build splitted incriment instruction.
   SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
                                             Base,
@@ -780,7 +790,7 @@ SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
                          Value, Chain};
         // build indexed store
         SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
-                                                MVT::Other, Ops, 4);
+                                                MVT::Other, Ops);
         MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
         MemOp[0] = ST->getMemOperand();
         cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
@@ -1230,8 +1240,7 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
     }
     EVT ReturnValueVT = N->getValueType(0);
     SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl,
-                                            ReturnValueVT,
-                                            Ops.data(), Ops.size());
+                                            ReturnValueVT, Ops);
     ReplaceUses(N, Result);
     return Result;
   }
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 15858a9..0e5b8dc 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1002,14 +1002,6 @@ HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   return FrameAddr;
 }
 
-
-SDValue HexagonTargetLowering::LowerMEMBARRIER(SDValue Op,
-                                               SelectionDAG& DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
-  return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other,  Op.getOperand(0));
-}
-
-
 SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op,
                                                  SelectionDAG& DAG) const {
   DebugLoc dl = Op.getDebugLoc();
@@ -1361,7 +1353,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
 
     }
 
-    setOperationAction(ISD::BRIND, MVT::Other, Expand);
     if (EmitJumpTables) {
       setOperationAction(ISD::BR_JT, MVT::Other, Custom);
     } else {
@@ -1377,7 +1368,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
     setOperationAction(ISD::BR_CC, MVT::i32, Expand);
     setOperationAction(ISD::BR_CC, MVT::i64, Expand);
 
-    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
     setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
 
     setOperationAction(ISD::FSIN , MVT::f64, Expand);
@@ -1444,7 +1434,7 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
     setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
     setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
 
-    setOperationAction(ISD::EH_RETURN,     MVT::Other, Expand);
+    setOperationAction(ISD::EH_RETURN,     MVT::Other, Custom);
 
     if (TM.getSubtargetImpl()->isSubtargetV2()) {
       setExceptionPointerRegister(Hexagon::R20);
@@ -1499,6 +1489,7 @@ HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
     case HexagonISD::RET_FLAG:    return "HexagonISD::RET_FLAG";
     case HexagonISD::BR_JT:       return "HexagonISD::BR_JT";
     case HexagonISD::TC_RETURN:   return "HexagonISD::TC_RETURN";
+  case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
   }
 }
 
@@ -1520,16 +1511,43 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
 }
 
 SDValue
+HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain     = Op.getOperand(0);
+  SDValue Offset    = Op.getOperand(1);
+  SDValue Handler   = Op.getOperand(2);
+  DebugLoc dl       = Op.getDebugLoc();
+
+  // Mark function as containing a call to EH_RETURN.
+  HexagonMachineFunctionInfo *FuncInfo =
+    DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
+  FuncInfo->setHasEHReturn();
+
+  unsigned OffsetReg = Hexagon::R28;
+
+  SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                  DAG.getRegister(Hexagon::R30, getPointerTy()),
+                                  DAG.getIntPtrConstant(4));
+  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+                       false, false, 0);
+  Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
+
+  // Not needed we already use it as explict input to EH_RETURN.
+  // MF.getRegInfo().addLiveOut(OffsetReg);
+
+  return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
+}
+
+SDValue
 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
     default: llvm_unreachable("Should not custom lower this!");
     case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+    case ISD::EH_RETURN:          return LowerEH_RETURN(Op, DAG);
       // Frame & Return address.  Currently unimplemented.
     case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
     case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
     case ISD::GlobalTLSAddress:
                           llvm_unreachable("TLS not implemented for Hexagon.");
-    case ISD::MEMBARRIER:         return LowerMEMBARRIER(Op, DAG);
     case ISD::ATOMIC_FENCE:       return LowerATOMIC_FENCE(Op, DAG);
     case ISD::GlobalAddress:      return LowerGLOBALADDRESS(Op, DAG);
     case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 3279cc6..bb1acc1 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -62,7 +62,8 @@ namespace llvm {
       WrapperShuffEH,
       WrapperShuffOB,
       WrapperShuffOH,
-      TC_RETURN
+      TC_RETURN,
+      EH_RETURN
     };
   }
 
@@ -101,6 +102,7 @@ namespace llvm {
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFormalArguments(SDValue Chain,
                                  CallingConv::ID CallConv, bool isVarArg,
                                  const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -122,7 +124,6 @@ namespace llvm {
 
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
     SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
 
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 60b12ac..f114170 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -23,7 +23,9 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #define GET_INSTRINFO_CTOR
 #define GET_INSTRMAP_INFO
 #include "HexagonGenInstrInfo.inc"
@@ -118,16 +120,16 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
                              DebugLoc DL) const{
 
     int BOpc   = Hexagon::JMP;
-    int BccOpc = Hexagon::JMP_c;
+    int BccOpc = Hexagon::JMP_t;
 
     assert(TBB && "InsertBranch must not be told to insert a fallthrough");
 
     int regPos = 0;
     // Check if ReverseBranchCondition has asked to reverse this branch
     // If we want to reverse the branch an odd number of times, we want
-    // JMP_cNot.
+    // JMP_f.
     if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
-      BccOpc = Hexagon::JMP_cNot;
+      BccOpc = Hexagon::JMP_f;
       regPos = 1;
     }
 
@@ -174,8 +176,8 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   FBB = NULL;
 
   // If the block has no terminators, it just falls into the block after it.
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
+  MachineBasicBlock::instr_iterator I = MBB.instr_end();
+  if (I == MBB.instr_begin())
     return false;
 
   // A basic block may looks like this:
@@ -194,13 +196,24 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     --I;
     if (I->isEHLabel())
       return true;
-  } while (I != MBB.begin());
+  } while (I != MBB.instr_begin());
 
-  I = MBB.end();
+  I = MBB.instr_end();
   --I;
 
   while (I->isDebugValue()) {
-    if (I == MBB.begin())
+    if (I == MBB.instr_begin())
+      return false;
+    --I;
+  }
+
+  // Delete the JMP if it's equivalent to a fall-through.
+  if (AllowModify && I->getOpcode() == Hexagon::JMP &&
+      MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+    DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
+    I->eraseFromParent();
+    I = MBB.instr_end();
+    if (I == MBB.instr_begin())
       return false;
     --I;
   }
@@ -209,23 +222,42 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
   // Get the last instruction in the block.
   MachineInstr *LastInst = I;
+  MachineInstr *SecondLastInst = NULL;
+  // Find one more terminator if present.
+  do {
+    if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(I)) {
+      if (!SecondLastInst)
+        SecondLastInst = I;
+      else
+        // This is a third branch.
+        return true;
+    }
+    if (I == MBB.instr_begin())
+      break;
+    --I;
+  } while(I);
+
+  int LastOpcode = LastInst->getOpcode();
+
+  bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode);
+  bool LastOpcodeHasNot = PredOpcodeHasNot(LastOpcode);
 
   // If there is only one terminator instruction, process it.
-  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
-    if (LastInst->getOpcode() == Hexagon::JMP) {
+  if (LastInst && !SecondLastInst) {
+    if (LastOpcode == Hexagon::JMP) {
       TBB = LastInst->getOperand(0).getMBB();
       return false;
     }
-    if (LastInst->getOpcode() == Hexagon::JMP_c) {
-      // Block ends with fall-through true condbranch.
-      TBB = LastInst->getOperand(1).getMBB();
+    if (LastOpcode == Hexagon::ENDLOOP0) {
+      TBB = LastInst->getOperand(0).getMBB();
       Cond.push_back(LastInst->getOperand(0));
       return false;
     }
-    if (LastInst->getOpcode() == Hexagon::JMP_cNot) {
-      // Block ends with fall-through false condbranch.
+    if (LastOpcodeHasJMP_c) {
       TBB = LastInst->getOperand(1).getMBB();
-      Cond.push_back(MachineOperand::CreateImm(0));
+      if (LastOpcodeHasNot) {
+        Cond.push_back(MachineOperand::CreateImm(0));
+      }
       Cond.push_back(LastInst->getOperand(0));
       return false;
     }
@@ -233,29 +265,14 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     return true;
   }
 
-  // Get the instruction before it if it's a terminator.
-  MachineInstr *SecondLastInst = I;
-
-  // If there are three terminators, we don't know what sort of block this is.
-  if (SecondLastInst && I != MBB.begin() &&
-      isUnpredicatedTerminator(--I))
-    return true;
+  int SecLastOpcode = SecondLastInst->getOpcode();
 
-  // If the block ends with Hexagon::BRCOND and Hexagon:JMP, handle it.
-  if (((SecondLastInst->getOpcode() == Hexagon::BRCOND) ||
-      (SecondLastInst->getOpcode() == Hexagon::JMP_c)) &&
-      LastInst->getOpcode() == Hexagon::JMP) {
+  bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode);
+  bool SecLastOpcodeHasNot = PredOpcodeHasNot(SecLastOpcode);
+  if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::JMP)) {
     TBB =  SecondLastInst->getOperand(1).getMBB();
-    Cond.push_back(SecondLastInst->getOperand(0));
-    FBB = LastInst->getOperand(0).getMBB();
-    return false;
-  }
-
-  // If the block ends with Hexagon::JMP_cNot and Hexagon:JMP, handle it.
-  if ((SecondLastInst->getOpcode() == Hexagon::JMP_cNot) &&
-      LastInst->getOpcode() == Hexagon::JMP) {
-    TBB =  SecondLastInst->getOperand(1).getMBB();
-    Cond.push_back(MachineOperand::CreateImm(0));
+    if (SecLastOpcodeHasNot)
+      Cond.push_back(MachineOperand::CreateImm(0));
     Cond.push_back(SecondLastInst->getOperand(0));
     FBB = LastInst->getOperand(0).getMBB();
     return false;
@@ -263,8 +280,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
   // If the block ends with two Hexagon:JMPs, handle it.  The second one is not
   // executed, so remove it.
-  if (SecondLastInst->getOpcode() == Hexagon::JMP &&
-      LastInst->getOpcode() == Hexagon::JMP) {
+  if (SecLastOpcode == Hexagon::JMP && LastOpcode == Hexagon::JMP) {
     TBB = SecondLastInst->getOperand(0).getMBB();
     I = LastInst;
     if (AllowModify)
@@ -272,6 +288,15 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     return false;
   }
 
+  // If the block ends with an ENDLOOP, and JMP, handle it.
+  if (SecLastOpcode == Hexagon::ENDLOOP0 &&
+      LastOpcode == Hexagon::JMP) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
   // Otherwise, can't handle this.
   return true;
 }
@@ -279,8 +304,8 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
 unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   int BOpc   = Hexagon::JMP;
-  int BccOpc = Hexagon::JMP_c;
-  int BccOpcNot = Hexagon::JMP_cNot;
+  int BccOpc = Hexagon::JMP_t;
+  int BccOpcNot = Hexagon::JMP_f;
 
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin()) return 0;
@@ -325,8 +350,6 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
     case Hexagon::CMPGTUrr:
     case Hexagon::CMPGTri:
     case Hexagon::CMPGTrr:
-    case Hexagon::CMPLTUrr:
-    case Hexagon::CMPLTrr:
       SrcReg = MI->getOperand(1).getReg();
       Mask = ~0;
       break;
@@ -366,8 +389,6 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
     case Hexagon::CMPhEQrr_xor_V4:
     case Hexagon::CMPhGTUrr_V4:
     case Hexagon::CMPhGTrr_shl_V4:
-    case Hexagon::CMPLTUrr:
-    case Hexagon::CMPLTrr:
       SrcReg2 = MI->getOperand(2).getReg();
       return true;
 
@@ -605,110 +626,8 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
   return  false;
 }
 
-bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
-    default: return false;
-    // JMP_EQri
-    case Hexagon::JMP_EQriPt_nv_V4:
-    case Hexagon::JMP_EQriPnt_nv_V4:
-    case Hexagon::JMP_EQriNotPt_nv_V4:
-    case Hexagon::JMP_EQriNotPnt_nv_V4:
-    case Hexagon::JMP_EQriPt_ie_nv_V4:
-    case Hexagon::JMP_EQriPnt_ie_nv_V4:
-    case Hexagon::JMP_EQriNotPt_ie_nv_V4:
-    case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
-
-    // JMP_EQri - with -1
-    case Hexagon::JMP_EQriPtneg_nv_V4:
-    case Hexagon::JMP_EQriPntneg_nv_V4:
-    case Hexagon::JMP_EQriNotPtneg_nv_V4:
-    case Hexagon::JMP_EQriNotPntneg_nv_V4:
-    case Hexagon::JMP_EQriPtneg_ie_nv_V4:
-    case Hexagon::JMP_EQriPntneg_ie_nv_V4:
-    case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
-    case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
-
-    // JMP_EQrr
-    case Hexagon::JMP_EQrrPt_nv_V4:
-    case Hexagon::JMP_EQrrPnt_nv_V4:
-    case Hexagon::JMP_EQrrNotPt_nv_V4:
-    case Hexagon::JMP_EQrrNotPnt_nv_V4:
-    case Hexagon::JMP_EQrrPt_ie_nv_V4:
-    case Hexagon::JMP_EQrrPnt_ie_nv_V4:
-    case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
-    case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
-
-    // JMP_GTri
-    case Hexagon::JMP_GTriPt_nv_V4:
-    case Hexagon::JMP_GTriPnt_nv_V4:
-    case Hexagon::JMP_GTriNotPt_nv_V4:
-    case Hexagon::JMP_GTriNotPnt_nv_V4:
-    case Hexagon::JMP_GTriPt_ie_nv_V4:
-    case Hexagon::JMP_GTriPnt_ie_nv_V4:
-    case Hexagon::JMP_GTriNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
-
-    // JMP_GTri - with -1
-    case Hexagon::JMP_GTriPtneg_nv_V4:
-    case Hexagon::JMP_GTriPntneg_nv_V4:
-    case Hexagon::JMP_GTriNotPtneg_nv_V4:
-    case Hexagon::JMP_GTriNotPntneg_nv_V4:
-    case Hexagon::JMP_GTriPtneg_ie_nv_V4:
-    case Hexagon::JMP_GTriPntneg_ie_nv_V4:
-    case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
-    case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
-
-    // JMP_GTrr
-    case Hexagon::JMP_GTrrPt_nv_V4:
-    case Hexagon::JMP_GTrrPnt_nv_V4:
-    case Hexagon::JMP_GTrrNotPt_nv_V4:
-    case Hexagon::JMP_GTrrNotPnt_nv_V4:
-    case Hexagon::JMP_GTrrPt_ie_nv_V4:
-    case Hexagon::JMP_GTrrPnt_ie_nv_V4:
-    case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
-
-    // JMP_GTrrdn
-    case Hexagon::JMP_GTrrdnPt_nv_V4:
-    case Hexagon::JMP_GTrrdnPnt_nv_V4:
-    case Hexagon::JMP_GTrrdnNotPt_nv_V4:
-    case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
-    case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
-    case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
-    case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
-
-    // JMP_GTUri
-    case Hexagon::JMP_GTUriPt_nv_V4:
-    case Hexagon::JMP_GTUriPnt_nv_V4:
-    case Hexagon::JMP_GTUriNotPt_nv_V4:
-    case Hexagon::JMP_GTUriNotPnt_nv_V4:
-    case Hexagon::JMP_GTUriPt_ie_nv_V4:
-    case Hexagon::JMP_GTUriPnt_ie_nv_V4:
-    case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
-
-    // JMP_GTUrr
-    case Hexagon::JMP_GTUrrPt_nv_V4:
-    case Hexagon::JMP_GTUrrPnt_nv_V4:
-    case Hexagon::JMP_GTUrrNotPt_nv_V4:
-    case Hexagon::JMP_GTUrrNotPnt_nv_V4:
-    case Hexagon::JMP_GTUrrPt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
-
-    // JMP_GTUrrdn
-    case Hexagon::JMP_GTUrrdnPt_nv_V4:
-    case Hexagon::JMP_GTUrrdnPnt_nv_V4:
-    case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
-    case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
-    case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
-    case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
-      return true;
-  }
+bool HexagonInstrInfo::isBranch (const MachineInstr *MI) const {
+  return MI->getDesc().isBranch();
 }
 
 bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
@@ -746,11 +665,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
     case Hexagon::STrib_abs_cdnPt_nv_V4:
     case Hexagon::STrib_abs_cNotPt_nv_V4:
     case Hexagon::STrib_abs_cdnNotPt_nv_V4:
-    case Hexagon::STrib_imm_abs_nv_V4:
-    case Hexagon::STrib_imm_abs_cPt_nv_V4:
-    case Hexagon::STrib_imm_abs_cdnPt_nv_V4:
-    case Hexagon::STrib_imm_abs_cNotPt_nv_V4:
-    case Hexagon::STrib_imm_abs_cdnNotPt_nv_V4:
 
     // Store Halfword
     case Hexagon::STrih_nv_V4:
@@ -784,11 +698,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
     case Hexagon::STrih_abs_cdnPt_nv_V4:
     case Hexagon::STrih_abs_cNotPt_nv_V4:
     case Hexagon::STrih_abs_cdnNotPt_nv_V4:
-    case Hexagon::STrih_imm_abs_nv_V4:
-    case Hexagon::STrih_imm_abs_cPt_nv_V4:
-    case Hexagon::STrih_imm_abs_cdnPt_nv_V4:
-    case Hexagon::STrih_imm_abs_cNotPt_nv_V4:
-    case Hexagon::STrih_imm_abs_cdnNotPt_nv_V4:
 
     // Store Word
     case Hexagon::STriw_nv_V4:
@@ -822,11 +731,6 @@ bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
     case Hexagon::STriw_abs_cdnPt_nv_V4:
     case Hexagon::STriw_abs_cNotPt_nv_V4:
     case Hexagon::STriw_abs_cdnNotPt_nv_V4:
-    case Hexagon::STriw_imm_abs_nv_V4:
-    case Hexagon::STriw_imm_abs_cPt_nv_V4:
-    case Hexagon::STriw_imm_abs_cdnPt_nv_V4:
-    case Hexagon::STriw_imm_abs_cNotPt_nv_V4:
-    case Hexagon::STriw_imm_abs_cdnNotPt_nv_V4:
       return true;
   }
 }
@@ -1003,9 +907,6 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
   case Hexagon::ZXTB:
   case Hexagon::ZXTH:
     return Subtarget.hasV4TOps();
-
-  case Hexagon::JMPR:
-    return false;
   }
 
   return true;
@@ -1030,6 +931,12 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
 //  cNotPt  ---> cNotPt_nv
 //  cPt     ---> cPt_nv
 unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
+  int InvPredOpcode;
+  InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
+                                        : Hexagon::getTruePredOpcode(Opc);
+  if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate.
+    return InvPredOpcode;
+
   switch(Opc) {
     default: llvm_unreachable("Unexpected predicated instruction");
     case Hexagon::TFR_cPt:
@@ -1042,10 +949,10 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
     case Hexagon::TFRI_cNotPt:
       return Hexagon::TFRI_cPt;
 
-    case Hexagon::JMP_c:
-      return Hexagon::JMP_cNot;
-    case Hexagon::JMP_cNot:
-      return Hexagon::JMP_c;
+    case Hexagon::JMP_t:
+      return Hexagon::JMP_f;
+    case Hexagon::JMP_f:
+      return Hexagon::JMP_t;
 
     case Hexagon::ADD_ri_cPt:
       return Hexagon::ADD_ri_cNotPt;
@@ -1113,10 +1020,10 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
       return Hexagon::ZXTH_cPt_V4;
 
 
-    case Hexagon::JMPR_cPt:
-      return Hexagon::JMPR_cNotPt;
-    case Hexagon::JMPR_cNotPt:
-      return Hexagon::JMPR_cPt;
+    case Hexagon::JMPR_t:
+      return Hexagon::JMPR_f;
+    case Hexagon::JMPR_f:
+      return Hexagon::JMPR_t;
 
   // V4 indexed+scaled load.
     case Hexagon::LDrid_indexed_shl_cPt_V4:
@@ -1362,117 +1269,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
       return Hexagon::DEALLOC_RET_cNotPt_V4;
     case Hexagon::DEALLOC_RET_cNotPt_V4:
       return Hexagon::DEALLOC_RET_cPt_V4;
-
-   // New Value Jump.
-   // JMPEQ_ri - with -1.
-    case Hexagon::JMP_EQriPtneg_nv_V4:
-      return Hexagon::JMP_EQriNotPtneg_nv_V4;
-    case Hexagon::JMP_EQriNotPtneg_nv_V4:
-      return Hexagon::JMP_EQriPtneg_nv_V4;
-
-    case Hexagon::JMP_EQriPntneg_nv_V4:
-      return Hexagon::JMP_EQriNotPntneg_nv_V4;
-    case Hexagon::JMP_EQriNotPntneg_nv_V4:
-      return Hexagon::JMP_EQriPntneg_nv_V4;
-
-   // JMPEQ_ri.
-     case Hexagon::JMP_EQriPt_nv_V4:
-      return Hexagon::JMP_EQriNotPt_nv_V4;
-    case Hexagon::JMP_EQriNotPt_nv_V4:
-      return Hexagon::JMP_EQriPt_nv_V4;
-
-     case Hexagon::JMP_EQriPnt_nv_V4:
-      return Hexagon::JMP_EQriNotPnt_nv_V4;
-    case Hexagon::JMP_EQriNotPnt_nv_V4:
-      return Hexagon::JMP_EQriPnt_nv_V4;
-
-   // JMPEQ_rr.
-     case Hexagon::JMP_EQrrPt_nv_V4:
-      return Hexagon::JMP_EQrrNotPt_nv_V4;
-    case Hexagon::JMP_EQrrNotPt_nv_V4:
-      return Hexagon::JMP_EQrrPt_nv_V4;
-
-     case Hexagon::JMP_EQrrPnt_nv_V4:
-      return Hexagon::JMP_EQrrNotPnt_nv_V4;
-    case Hexagon::JMP_EQrrNotPnt_nv_V4:
-      return Hexagon::JMP_EQrrPnt_nv_V4;
-
-   // JMPGT_ri - with -1.
-    case Hexagon::JMP_GTriPtneg_nv_V4:
-      return Hexagon::JMP_GTriNotPtneg_nv_V4;
-    case Hexagon::JMP_GTriNotPtneg_nv_V4:
-      return Hexagon::JMP_GTriPtneg_nv_V4;
-
-    case Hexagon::JMP_GTriPntneg_nv_V4:
-      return Hexagon::JMP_GTriNotPntneg_nv_V4;
-    case Hexagon::JMP_GTriNotPntneg_nv_V4:
-      return Hexagon::JMP_GTriPntneg_nv_V4;
-
-   // JMPGT_ri.
-     case Hexagon::JMP_GTriPt_nv_V4:
-      return Hexagon::JMP_GTriNotPt_nv_V4;
-    case Hexagon::JMP_GTriNotPt_nv_V4:
-      return Hexagon::JMP_GTriPt_nv_V4;
-
-     case Hexagon::JMP_GTriPnt_nv_V4:
-      return Hexagon::JMP_GTriNotPnt_nv_V4;
-    case Hexagon::JMP_GTriNotPnt_nv_V4:
-      return Hexagon::JMP_GTriPnt_nv_V4;
-
-   // JMPGT_rr.
-     case Hexagon::JMP_GTrrPt_nv_V4:
-      return Hexagon::JMP_GTrrNotPt_nv_V4;
-    case Hexagon::JMP_GTrrNotPt_nv_V4:
-      return Hexagon::JMP_GTrrPt_nv_V4;
-
-     case Hexagon::JMP_GTrrPnt_nv_V4:
-      return Hexagon::JMP_GTrrNotPnt_nv_V4;
-    case Hexagon::JMP_GTrrNotPnt_nv_V4:
-      return Hexagon::JMP_GTrrPnt_nv_V4;
-
-   // JMPGT_rrdn.
-     case Hexagon::JMP_GTrrdnPt_nv_V4:
-      return Hexagon::JMP_GTrrdnNotPt_nv_V4;
-    case Hexagon::JMP_GTrrdnNotPt_nv_V4:
-      return Hexagon::JMP_GTrrdnPt_nv_V4;
-
-     case Hexagon::JMP_GTrrdnPnt_nv_V4:
-      return Hexagon::JMP_GTrrdnNotPnt_nv_V4;
-    case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
-      return Hexagon::JMP_GTrrdnPnt_nv_V4;
-
-   // JMPGTU_ri.
-     case Hexagon::JMP_GTUriPt_nv_V4:
-      return Hexagon::JMP_GTUriNotPt_nv_V4;
-    case Hexagon::JMP_GTUriNotPt_nv_V4:
-      return Hexagon::JMP_GTUriPt_nv_V4;
-
-     case Hexagon::JMP_GTUriPnt_nv_V4:
-      return Hexagon::JMP_GTUriNotPnt_nv_V4;
-    case Hexagon::JMP_GTUriNotPnt_nv_V4:
-      return Hexagon::JMP_GTUriPnt_nv_V4;
-
-   // JMPGTU_rr.
-     case Hexagon::JMP_GTUrrPt_nv_V4:
-      return Hexagon::JMP_GTUrrNotPt_nv_V4;
-    case Hexagon::JMP_GTUrrNotPt_nv_V4:
-      return Hexagon::JMP_GTUrrPt_nv_V4;
-
-     case Hexagon::JMP_GTUrrPnt_nv_V4:
-      return Hexagon::JMP_GTUrrNotPnt_nv_V4;
-    case Hexagon::JMP_GTUrrNotPnt_nv_V4:
-      return Hexagon::JMP_GTUrrPnt_nv_V4;
-
-   // JMPGTU_rrdn.
-     case Hexagon::JMP_GTUrrdnPt_nv_V4:
-      return Hexagon::JMP_GTUrrdnNotPt_nv_V4;
-    case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
-      return Hexagon::JMP_GTUrrdnPt_nv_V4;
-
-     case Hexagon::JMP_GTUrrdnPnt_nv_V4:
-      return Hexagon::JMP_GTUrrdnNotPnt_nv_V4;
-    case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
-      return Hexagon::JMP_GTUrrdnPnt_nv_V4;
   }
 }
 
@@ -1499,14 +1295,9 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
     return !invertPredicate ? Hexagon::TFRI_cPt :
                               Hexagon::TFRI_cNotPt;
   case Hexagon::JMP:
-    return !invertPredicate ? Hexagon::JMP_c :
-                              Hexagon::JMP_cNot;
-  case Hexagon::JMP_EQrrPt_nv_V4:
-    return !invertPredicate ? Hexagon::JMP_EQrrPt_nv_V4 :
-                              Hexagon::JMP_EQrrNotPt_nv_V4;
-  case Hexagon::JMP_EQriPt_nv_V4:
-    return !invertPredicate ? Hexagon::JMP_EQriPt_nv_V4 :
-                              Hexagon::JMP_EQriNotPt_nv_V4;
+    return !invertPredicate ? Hexagon::JMP_t :
+                              Hexagon::JMP_f;
+
   case Hexagon::COMBINE_rr:
     return !invertPredicate ? Hexagon::COMBINE_rr_cPt :
                               Hexagon::COMBINE_rr_cNotPt;
@@ -1530,8 +1321,8 @@ getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
                               Hexagon::ZXTH_cNotPt_V4;
 
   case Hexagon::JMPR:
-    return !invertPredicate ? Hexagon::JMPR_cPt :
-                              Hexagon::JMPR_cNotPt;
+    return !invertPredicate ? Hexagon::JMPR_t :
+                              Hexagon::JMPR_f;
 
   // V4 indexed+scaled load.
   case Hexagon::LDrid_indexed_shl_V4:
@@ -1830,11 +1621,15 @@ PredicateInstruction(MachineInstr *MI,
   // It is better to have an assert here to check this. But I don't know how
   // to write this assert because findFirstPredOperandIdx() would return -1
   if (oper < -1) oper = -1;
+
   MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
-                                          PredMO.isImplicit(), PredMO.isKill(),
+                                          PredMO.isImplicit(), false,
                                           PredMO.isDead(), PredMO.isUndef(),
                                           PredMO.isDebug());
 
+  MachineRegisterInfo &RegInfo = MI->getParent()->getParent()->getRegInfo();
+  RegInfo.clearKillFlags(PredMO.getReg());
+
   if (hasGAOpnd)
   {
     unsigned int i;
@@ -1883,13 +1678,41 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
   return true;
 }
 
-
+// Returns true if an instruction is predicated irrespective of the predicate
+// sense. For example, all of the following will return true.
+// if (p0) R1 = add(R2, R3)
+// if (!p0) R1 = add(R2, R3)
+// if (p0.new) R1 = add(R2, R3)
+// if (!p0.new) R1 = add(R2, R3)
 bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
   const uint64_t F = MI->getDesc().TSFlags;
 
   return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
 }
 
+bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+
+  return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+}
+
+bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+
+  assert(isPredicated(MI));
+  return (!((F >> HexagonII::PredicatedFalsePos) &
+            HexagonII::PredicatedFalseMask));
+}
+
+bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+
+  // Make sure that the instruction is predicated.
+  assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+  return (!((F >> HexagonII::PredicatedFalsePos) &
+            HexagonII::PredicatedFalseMask));
+}
+
 bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
   const uint64_t F = MI->getDesc().TSFlags;
 
@@ -1897,6 +1720,13 @@ bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
   return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
 }
 
+bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+
+  assert(isPredicated(Opcode));
+  return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+}
+
 bool
 HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
                                    std::vector<MachineOperand> &Pred) const {
@@ -2129,14 +1959,10 @@ bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const {
     default: return false;
     case Hexagon::CMPEQrr:
     case Hexagon::CMPEQri:
-    case Hexagon::CMPLTrr:
     case Hexagon::CMPGTrr:
     case Hexagon::CMPGTri:
-    case Hexagon::CMPLTUrr:
     case Hexagon::CMPGTUrr:
     case Hexagon::CMPGTUri:
-    case Hexagon::CMPGEri:
-    case Hexagon::CMPGEUri:
       return true;
   }
 }
@@ -2369,6 +2195,18 @@ isConditionalStore (const MachineInstr* MI) const {
   }
 }
 
+
+bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
+  if (isNewValue(MI) && isBranch(MI))
+    return true;
+  return false;
+}
+
+bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+}
+
 // Returns true, if any one of the operands is a dot new
 // insn, whether it is predicated dot new or register dot new.
 bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const {
@@ -2470,6 +2308,34 @@ bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const {
   return (ImmValue < MinValue || ImmValue > MaxValue);
 }
 
+// Returns the opcode to use when converting MI, which is a conditional jump,
+// into a conditional instruction which uses the .new value of the predicate.
+// We also use branch probabilities to add a hint to the jump.
+int
+HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI,
+                                  const
+                                  MachineBranchProbabilityInfo *MBPI) const {
+
+  // We assume that block can have at most two successors.
+  bool taken = false;
+  MachineBasicBlock *Src = MI->getParent();
+  MachineOperand *BrTarget = &MI->getOperand(1);
+  MachineBasicBlock *Dst = BrTarget->getMBB();
+
+  const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst);
+  if (Prediction >= BranchProbability(1,2))
+    taken = true;
+
+  switch (MI->getOpcode()) {
+  case Hexagon::JMP_t:
+    return taken ? Hexagon::JMP_tnew_t : Hexagon::JMP_tnew_nt;
+  case Hexagon::JMP_f:
+    return taken ? Hexagon::JMP_fnew_t : Hexagon::JMP_fnew_nt;
+
+  default:
+    llvm_unreachable("Unexpected jump instruction.");
+  }
+}
 // Returns true if a particular operand is extendable for an instruction.
 bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
                                          unsigned short OperandNum) const {
@@ -2574,3 +2440,18 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
   }
   return -1;
 }
+
+bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const {
+  return (Opcode == Hexagon::JMP_t) ||
+         (Opcode == Hexagon::JMP_f) ||
+         (Opcode == Hexagon::JMP_tnew_t) ||
+         (Opcode == Hexagon::JMP_fnew_t) ||
+         (Opcode == Hexagon::JMP_tnew_nt) ||
+         (Opcode == Hexagon::JMP_fnew_nt);
+}
+
+bool HexagonInstrInfo::PredOpcodeHasNot(Opcode_t Opcode) const {
+  return (Opcode == Hexagon::JMP_f) ||
+         (Opcode == Hexagon::JMP_fnew_t) ||
+         (Opcode == Hexagon::JMP_fnew_nt);
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 5df13a8..b721da4 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -16,9 +16,9 @@
 
 #include "HexagonRegisterInfo.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
-
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "HexagonGenInstrInfo.inc"
@@ -28,6 +28,8 @@ namespace llvm {
 class HexagonInstrInfo : public HexagonGenInstrInfo {
   const HexagonRegisterInfo RI;
   const HexagonSubtarget& Subtarget;
+  typedef unsigned Opcode_t;
+
 public:
   explicit HexagonInstrInfo(HexagonSubtarget &ST);
 
@@ -111,6 +113,7 @@ public:
 
   unsigned createVR(MachineFunction* MF, MVT VT) const;
 
+  virtual bool isBranch(const MachineInstr *MI) const;
   virtual bool isPredicable(MachineInstr *MI) const;
   virtual bool
   PredicateInstruction(MachineInstr *MI,
@@ -127,7 +130,11 @@ public:
                                    const BranchProbability &Probability) const;
 
   virtual bool isPredicated(const MachineInstr *MI) const;
+  virtual bool isPredicated(unsigned Opcode) const;
+  virtual bool isPredicatedTrue(const MachineInstr *MI) const;
+  virtual bool isPredicatedTrue(unsigned Opcode) const;
   virtual bool isPredicatedNew(const MachineInstr *MI) const;
+  virtual bool isPredicatedNew(unsigned Opcode) const;
   virtual bool DefinesPredicate(MachineInstr *MI,
                                 std::vector<MachineOperand> &Pred) const;
   virtual bool
@@ -176,6 +183,7 @@ public:
   bool isConditionalLoad (const MachineInstr* MI) const;
   bool isConditionalStore(const MachineInstr* MI) const;
   bool isNewValueInst(const MachineInstr* MI) const;
+  bool isNewValue(const MachineInstr* MI) const;
   bool isDotNewInst(const MachineInstr* MI) const;
   bool isDeallocRet(const MachineInstr *MI) const;
   unsigned getInvertedPredicatedOpcode(const int Opc) const;
@@ -189,6 +197,8 @@ public:
 
   void immediateExtend(MachineInstr *MI) const;
   bool isConstExtended(MachineInstr *MI) const;
+  int getDotNewPredJumpOp(MachineInstr *MI,
+                      const MachineBranchProbabilityInfo *MBPI) const;
   unsigned getAddrMode(const MachineInstr* MI) const;
   bool isOperandExtended(const MachineInstr *MI,
                          unsigned short OperandNum) const;
@@ -197,6 +207,9 @@ public:
   int getMaxValue(const MachineInstr *MI) const;
   bool NonExtEquivalentExists (const MachineInstr *MI) const;
   short getNonExtOpcode(const MachineInstr *MI) const;
+  bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
+  bool PredOpcodeHasNot(Opcode_t Opcode) const;
+
 private:
   int getMatchingCondBranchOpcode(int Opc, bool sense) const;
 
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index 74dc0ca..2a4b17b 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -14,6 +14,8 @@
 include "HexagonInstrFormats.td"
 include "HexagonOperands.td"
 
+//===----------------------------------------------------------------------===//
+
 // Multi-class for logical operators.
 multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
   def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
@@ -34,12 +36,6 @@ multiclass CMP64_rr<string OpcStr, PatFrag OpNode> {
                  [(set (i1 PredRegs:$dst),
                        (OpNode (i64 DoubleRegs:$b), (i64 DoubleRegs:$c)))]>;
 }
-multiclass CMP32_rr<string OpcStr, PatFrag OpNode> {
-  def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
-                 [(set (i1 PredRegs:$dst),
-                       (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
-}
 
 multiclass CMP32_rr_ri_s10<string OpcStr, string CextOp, PatFrag OpNode> {
   let CextOpcode = CextOp in {
@@ -75,14 +71,6 @@ multiclass CMP32_rr_ri_u9<string OpcStr, string CextOp, PatFrag OpNode> {
   }
 }
 
-multiclass CMP32_ri_u8<string OpcStr, PatFrag OpNode> {
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in
-  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u8Ext:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
-                 [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
-                                                   u8ExtPred:$c))]>;
-}
-
 multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
 let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
   def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Ext:$c),
@@ -95,22 +83,30 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
 //===----------------------------------------------------------------------===//
 // ALU32/ALU (Instructions with register-register form)
 //===----------------------------------------------------------------------===//
-multiclass ALU32_Pbase<string mnemonic, bit isNot,
-                       bit isPredNew> {
+def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
+  [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+
+def HexagonWrapperCombineII :
+  SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>;
 
-  let PNewValue = !if(isPredNew, "new", "") in
-  def NAME : ALU32_rr<(outs IntRegs:$dst),
+def HexagonWrapperCombineRR :
+  SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
+
+multiclass ALU32_Pbase<string mnemonic, RegisterClass RC, bit isNot,
+                       bit isPredNew> {
+  let isPredicatedNew = isPredNew in
+  def NAME : ALU32_rr<(outs RC:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
             ") $dst = ")#mnemonic#"($src2, $src3)",
             []>;
 }
 
-multiclass ALU32_Pred<string mnemonic, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
-    defm _c#NAME : ALU32_Pbase<mnemonic, PredNot, 0>;
+multiclass ALU32_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+  let isPredicatedFalse = PredNot in {
+    defm _c#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
-    defm _cdn#NAME : ALU32_Pbase<mnemonic, PredNot, 1>;
+    defm _cdn#NAME : ALU32_Pbase<mnemonic, RC, PredNot, 1>;
   }
 }
 
@@ -125,8 +121,8 @@ multiclass ALU32_base<string mnemonic, string CextOp, SDNode OpNode> {
                                               (i32 IntRegs:$src2)))]>;
 
     let neverHasSideEffects = 1, isPredicated = 1 in {
-      defm Pt : ALU32_Pred<mnemonic, 0>;
-      defm NotPt : ALU32_Pred<mnemonic, 1>;
+      defm Pt : ALU32_Pred<mnemonic, IntRegs, 0>;
+      defm NotPt : ALU32_Pred<mnemonic, IntRegs, 1>;
     }
   }
 }
@@ -140,11 +136,42 @@ let isCommutable = 1 in {
 
 defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel;
 
+// Combines the two integer registers SRC1 and SRC2 into a double register.
+let isPredicable = 1 in
+class T_Combine : ALU32_rr<(outs DoubleRegs:$dst),
+                           (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = combine($src1, $src2)",
+            [(set (i64 DoubleRegs:$dst),
+              (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
+                                            (i32 IntRegs:$src2))))]>;
+
+multiclass Combine_base {
+  let BaseOpcode = "combine" in {
+    def NAME : T_Combine;
+    let neverHasSideEffects = 1, isPredicated = 1 in {
+      defm Pt : ALU32_Pred<"combine", DoubleRegs, 0>;
+      defm NotPt : ALU32_Pred<"combine", DoubleRegs, 1>;
+    }
+  }
+}
+
+defm COMBINE_rr : Combine_base, PredNewRel;
+
+// Combines the two immediates SRC1 and SRC2 into a double register.
+class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
+  ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
+  "$dst = combine(#$src1, #$src2)",
+  [(set (i64 DoubleRegs:$dst),
+        (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
+def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
+
 //===----------------------------------------------------------------------===//
 // ALU32/ALU (ADD with register-immediate form)
 //===----------------------------------------------------------------------===//
 multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : ALU32_ri<(outs IntRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
@@ -153,7 +180,7 @@ multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
 }
 
 multiclass ALU32ri_Pred<string mnemonic, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ALU32ri_Pbase<mnemonic, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ALU32ri_Pbase<mnemonic, PredNot, 1>;
@@ -189,11 +216,6 @@ def OR_ri : ALU32_ri<(outs IntRegs:$dst),
             [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1),
                                           s10ExtPred:$src2))]>, ImmRegRel;
 
-def NOT_rr : ALU32_rr<(outs IntRegs:$dst),
-            (ins IntRegs:$src1),
-            "$dst = not($src1)",
-            [(set (i32 IntRegs:$dst), (not (i32 IntRegs:$src1)))]>;
-
 let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
 InputType = "imm", CextOpcode = "AND" in
 def AND_ri : ALU32_ri<(outs IntRegs:$dst),
@@ -201,10 +223,7 @@ def AND_ri : ALU32_ri<(outs IntRegs:$dst),
             "$dst = and($src1, #$src2)",
             [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1),
                                            s10ExtPred:$src2))]>, ImmRegRel;
-// Negate.
-def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
-          "$dst = neg($src1)",
-          [(set (i32 IntRegs:$dst), (ineg (i32 IntRegs:$src1)))]>;
+
 // Nop.
 let neverHasSideEffects = 1 in
 def NOP : ALU32_rr<(outs), (ins),
@@ -220,15 +239,21 @@ def SUB_ri : ALU32_ri<(outs IntRegs:$dst),
             [(set IntRegs:$dst, (sub s10ExtPred:$src1, IntRegs:$src2))]>,
             ImmRegRel;
 
+// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
+def : Pat<(not (i32 IntRegs:$src1)),
+          (SUB_ri -1, (i32 IntRegs:$src1))>;
+
+// Rd = neg(Rs) gets mapped to Rd=sub(#0, Rs).
+// Pattern definition for 'neg' was not necessary.
 
 multiclass TFR_Pred<bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     def _c#NAME : ALU32_rr<(outs IntRegs:$dst),
                            (ins PredRegs:$src1, IntRegs:$src2),
             !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2",
             []>;
     // Predicate new
-    let PNewValue = "new" in
+    let isPredicatedNew = 1 in
     def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
                              (ins PredRegs:$src1, IntRegs:$src2),
             !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2",
@@ -274,10 +299,10 @@ class T_TFR64_Pred<bit PredNot, bit isPredNew>
 }
 
 multiclass TFR64_Pred<bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     def _c#NAME : T_TFR64_Pred<PredNot, 0>;
 
-    let PNewValue = "new" in
+    let isPredicatedNew = 1 in
     def _cdn#NAME : T_TFR64_Pred<PredNot, 1>; // Predicate new
   }
 }
@@ -309,14 +334,14 @@ multiclass TFR64_base<string BaseName> {
 }
 
 multiclass TFRI_Pred<bit PredNot> {
-  let isMoveImm = 1, PredSense = !if(PredNot, "false", "true") in {
+  let isMoveImm = 1, isPredicatedFalse = PredNot in {
     def _c#NAME : ALU32_ri<(outs IntRegs:$dst),
                            (ins PredRegs:$src1, s12Ext:$src2),
             !if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2",
             []>;
 
     // Predicate new
-    let PNewValue = "new" in
+    let isPredicatedNew = 1 in
     def _cdn#NAME : ALU32_rr<(outs IntRegs:$dst),
                              (ins PredRegs:$src1, s12Ext:$src2),
             !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2",
@@ -359,52 +384,6 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
 // ALU32/PERM +
 //===----------------------------------------------------------------------===//
 
-// Combine.
-
-def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
-  [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
-
-def HexagonWrapperCombineII :
-  SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>;
-def HexagonWrapperCombineRR :
-  SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
-
-// Combines the two integer registers SRC1 and SRC2 into a double register.
-let isPredicable = 1 in
-def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1,
-                                                       IntRegs:$src2),
-  "$dst = combine($src1, $src2)",
-  [(set (i64 DoubleRegs:$dst),
-        (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
-                                      (i32 IntRegs:$src2))))]>;
-
-// Rd=combine(Rt.[HL], Rs.[HL])
-class COMBINE_halves<string A, string B>: ALU32_rr<(outs IntRegs:$dst),
-                                                   (ins IntRegs:$src1,
-                                                        IntRegs:$src2),
-  "$dst = combine($src1."# A #", $src2."# B #")", []>;
-
-let isPredicable = 1 in {
-  def COMBINE_hh : COMBINE_halves<"H", "H">;
-  def COMBINE_hl : COMBINE_halves<"H", "L">;
-  def COMBINE_lh : COMBINE_halves<"L", "H">;
-  def COMBINE_ll : COMBINE_halves<"L", "L">;
-}
-
-def : Pat<(i32 (trunc (i64 (srl (i64 DoubleRegs:$a), (i32 16))))),
-  (COMBINE_lh (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_hireg),
-              (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_loreg))>;
-
-// Combines the two immediates SRC1 and SRC2 into a double register.
-class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
-  ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
-  "$dst = combine(#$src1, #$src2)",
-  [(set (i64 DoubleRegs:$dst),
-        (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
-
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
-def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
-
 // Mux.
 def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
                                                    DoubleRegs:$src2,
@@ -507,40 +486,24 @@ def : Pat <(sext_inreg (i32 IntRegs:$src1), i16),
 // ALU32/PRED +
 //===----------------------------------------------------------------------===//
 
-// Conditional combine.
-let neverHasSideEffects = 1, isPredicated = 1 in {
-def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-            "if ($src1) $dst = combine($src2, $src3)",
-            []>;
-
-let isPredicatedFalse = 1 in
-def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-            "if (!$src1) $dst = combine($src2, $src3)",
-            []>;
-
-let isPredicatedNew = 1 in
-def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-            "if ($src1.new) $dst = combine($src2, $src3)",
-            []>;
-
-let isPredicatedNew = 1, isPredicatedFalse = 1 in
-def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
-            "if (!$src1.new) $dst = combine($src2, $src3)",
-            []>;
-}
-
 // Compare.
 defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel;
 defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", "CMPGT", setgt>, ImmRegRel;
-defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
-defm CMPLTU : CMP32_rr<"cmp.ltu", setult>;
 defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", "CMPEQ", seteq>, ImmRegRel;
-defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
-defm CMPGEU : CMP32_ri_u8<"cmp.geu", setuge>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
+   // Return the byte immediate const-1 as an SDNode.
+   int32_t imm = N->getSExtValue();
+   return XformSToSM1Imm(imm);
+}]>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
+   // Return the byte immediate const-1 as an SDNode.
+   uint32_t imm = N->getZExtValue();
+   return XformUToUM1Imm(imm);
+}]>;
 
 def CTLZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
     "$dst = cl0($src1)",
@@ -774,112 +737,153 @@ def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
 // CR -
 //===----------------------------------------------------------------------===//
 
+def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+                               [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone,
+                      [SDNPHasChain]>;
 
-//===----------------------------------------------------------------------===//
-// J +
-//===----------------------------------------------------------------------===//
-// Jump to address.
-let isBranch = 1, isTerminator=1, isBarrier = 1, isPredicable = 1 in {
-  def JMP : JInst< (outs),
-            (ins brtarget:$offset),
-            "jump $offset",
-            [(br bb:$offset)]>;
-}
+def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
 
-// if (p0) jump
-let isBranch = 1, isTerminator=1, Defs = [PC],
-    isPredicated = 1 in {
-  def JMP_c : JInst< (outs),
-                 (ins PredRegs:$src, brtarget:$offset),
-                 "if ($src) jump $offset",
-                 [(brcond (i1 PredRegs:$src), bb:$offset)]>;
-}
+let InputType = "imm", isBarrier = 1, isPredicable = 1,
+Defs = [PC], isExtendable = 1, opExtendable = 0, isExtentSigned = 1,
+opExtentBits = 24 in
+class T_JMP <dag InsDag, list<dag> JumpList = []>
+            : JInst<(outs), InsDag,
+            "jump $dst" , JumpList> {
+    bits<24> dst;
+
+    let IClass = 0b0101;
+
+    let Inst{27-25} = 0b100;
+    let Inst{24-16} = dst{23-15};
+    let Inst{13-1} = dst{14-2};
+}
+
+let InputType = "imm", isExtendable = 1, opExtendable = 1, isExtentSigned = 1,
+Defs = [PC], isPredicated = 1, opExtentBits = 17 in
+class T_JMP_c <bit PredNot, bit isPredNew, bit isTaken>:
+            JInst<(outs ), (ins PredRegs:$src, brtarget:$dst),
+            !if(PredNot, "if (!$src", "if ($src")#
+            !if(isPredNew, ".new) ", ") ")#"jump"#
+            !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> {
+
+    let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), "");
+    let isPredicatedFalse = PredNot;
+    let isPredicatedNew = isPredNew;
+    bits<2> src;
+    bits<17> dst;
+
+    let IClass = 0b0101;
+
+    let Inst{27-24} = 0b1100;
+    let Inst{21} = PredNot;
+    let Inst{12} = !if(isPredNew, isTaken, zero);
+    let Inst{11} = isPredNew;
+    let Inst{9-8} = src;
+    let Inst{23-22} = dst{16-15};
+    let Inst{20-16} = dst{14-10};
+    let Inst{13} = dst{9};
+    let Inst{7-1} = dst{8-2};
+  }
 
-// if (!p0) jump
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
-    isPredicated = 1 in {
-  def JMP_cNot : JInst< (outs),
-                    (ins PredRegs:$src, brtarget:$offset),
-                    "if (!$src) jump $offset",
-                    []>;
+let isBarrier = 1, Defs = [PC], isPredicable = 1, InputType = "reg" in
+class T_JMPr<dag InsDag = (ins IntRegs:$dst)>
+            : JRInst<(outs ), InsDag,
+            "jumpr $dst" ,
+            []> {
+    bits<5> dst;
+
+    let IClass = 0b0101;
+    let Inst{27-21} = 0b0010100;
+    let Inst{20-16} = dst;
 }
 
-let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC],
-    isPredicated = 1 in {
-  def BRCOND : JInst < (outs), (ins PredRegs:$pred, brtarget:$dst),
-               "if ($pred) jump $dst",
-               []>;
+let Defs = [PC], isPredicated = 1, InputType = "reg" in
+class T_JMPr_c <bit PredNot, bit isPredNew, bit isTaken>:
+            JRInst <(outs ), (ins PredRegs:$src, IntRegs:$dst),
+            !if(PredNot, "if (!$src", "if ($src")#
+            !if(isPredNew, ".new) ", ") ")#"jumpr"#
+            !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> {
+
+    let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), "");
+    let isPredicatedFalse = PredNot;
+    let isPredicatedNew = isPredNew;
+    bits<2> src;
+    bits<5> dst;
+
+    let IClass = 0b0101;
+
+    let Inst{27-22} = 0b001101;
+    let Inst{21} = PredNot;
+    let Inst{20-16} = dst;
+    let Inst{12} = !if(isPredNew, isTaken, zero);
+    let Inst{11} = isPredNew;
+    let Inst{9-8} = src;
+    let Predicates = !if(isPredNew, [HasV3T], [HasV2T]);
+    let validSubTargets = !if(isPredNew, HasV3SubT, HasV2SubT);
 }
 
-// Jump to address conditioned on new predicate.
-// if (p0) jump:t
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
-    isPredicated = 1 in {
-  def JMP_cdnPt : JInst< (outs),
-                   (ins PredRegs:$src, brtarget:$offset),
-                   "if ($src.new) jump:t $offset",
-                   []>;
+multiclass JMP_Pred<bit PredNot> {
+  def _#NAME : T_JMP_c<PredNot, 0, 0>;
+  // Predicate new
+  def _#NAME#new_t  : T_JMP_c<PredNot, 1, 1>; // taken
+  def _#NAME#new_nt : T_JMP_c<PredNot, 1, 0>; // not taken
 }
 
-// if (!p0) jump:t
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
-    isPredicated = 1 in {
-  def JMP_cdnNotPt : JInst< (outs),
-                      (ins PredRegs:$src, brtarget:$offset),
-                      "if (!$src.new) jump:t $offset",
-                      []>;
+multiclass JMP_base<string BaseOp> {
+  let BaseOpcode = BaseOp in {
+    def NAME : T_JMP<(ins brtarget:$dst), [(br bb:$dst)]>;
+    defm t : JMP_Pred<0>;
+    defm f : JMP_Pred<1>;
+  }
 }
 
-// Not taken.
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
-    isPredicated = 1 in {
-  def JMP_cdnPnt : JInst< (outs),
-                    (ins PredRegs:$src, brtarget:$offset),
-                    "if ($src.new) jump:nt $offset",
-                    []>;
+multiclass JMPR_Pred<bit PredNot> {
+  def NAME: T_JMPr_c<PredNot, 0, 0>;
+  // Predicate new
+  def NAME#new_tV3  : T_JMPr_c<PredNot, 1, 1>; // taken
+  def NAME#new_ntV3 : T_JMPr_c<PredNot, 1, 0>; // not taken
 }
 
-// Not taken.
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
-    isPredicated = 1 in {
-  def JMP_cdnNotPnt : JInst< (outs),
-                       (ins PredRegs:$src, brtarget:$offset),
-                       "if (!$src.new) jump:nt $offset",
-                       []>;
+multiclass JMPR_base<string BaseOp> {
+  let BaseOpcode = BaseOp in {
+    def NAME : T_JMPr;
+    defm _t : JMPR_Pred<0>;
+    defm _f : JMPR_Pred<1>;
+  }
 }
-//===----------------------------------------------------------------------===//
-// J -
-//===----------------------------------------------------------------------===//
 
-//===----------------------------------------------------------------------===//
-// JR +
-//===----------------------------------------------------------------------===//
-def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
-                               [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+let isTerminator = 1, neverHasSideEffects = 1 in {
+let isBranch = 1 in
+defm JMP : JMP_base<"JMP">, PredNewRel;
 
-// Jump to address from register.
-let isPredicable =1, isReturn = 1, isTerminator = 1, isBarrier = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR: JRInst<(outs), (ins),
-                   "jumpr r31",
-                   [(retflag)]>;
-}
+let isBranch = 1, isIndirectBranch = 1 in
+defm JMPR : JMPR_base<"JMPr">, PredNewRel;
 
-// Jump to address from register.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1),
-                       "if ($src1) jumpr r31",
-                       []>;
+let isReturn = 1, isCodeGenOnly = 1 in
+defm JMPret : JMPR_base<"JMPret">, PredNewRel;
 }
 
-// Jump to address from register.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1),
-                          "if (!$src1) jumpr r31",
-                          []>;
-}
+def : Pat<(retflag),
+          (JMPret (i32 R31))>;
+
+def : Pat <(brcond (i1 PredRegs:$src1), bb:$offset),
+      (JMP_t (i1 PredRegs:$src1), bb:$offset)>;
+
+// A return through builtin_eh_return.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, neverHasSideEffects = 1,
+isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in
+def EH_RETURN_JMPR : T_JMPr;
+
+def : Pat<(eh_return),
+          (EH_RETURN_JMPR (i32 R31))>;
+
+def : Pat<(HexagonBR_JT (i32 IntRegs:$dst)),
+          (JMPR (i32 IntRegs:$dst))>;
+
+def : Pat<(brind (i32 IntRegs:$dst)),
+          (JMPR (i32 IntRegs:$dst))>;
 
 //===----------------------------------------------------------------------===//
 // JR -
@@ -892,7 +896,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
 // Load -- MEMri operand
 multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
                           bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2<(outs RC:$dst),
                        (ins PredRegs:$src1, MEMri:$addr),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -901,7 +905,7 @@ multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
 }
 
 multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
@@ -958,7 +962,7 @@ def : Pat < (i64 (load ADDRriS11_3:$addr)),
 // Load - Base with Immediate offset addressing mode
 multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
                         bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2<(outs RC:$dst),
                      (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -968,7 +972,7 @@ multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
 
 multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
                         bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
@@ -1038,7 +1042,7 @@ def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))),
 
 multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
                             bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2PI<(outs RC:$dst, IntRegs:$dst2),
                        (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1049,7 +1053,7 @@ multiclass LD_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
 
 multiclass LD_PostInc_Pred<string mnemonic, RegisterClass RC,
                            Operand ImmOp, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : LD_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
     // Predicate new
     let Predicates = [HasV4T], validSubTargets = HasV4SubT in
@@ -1366,7 +1370,7 @@ def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
 
 multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
                             bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2PI<(outs IntRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1377,7 +1381,7 @@ multiclass ST_PostInc_Pbase<string mnemonic, RegisterClass RC, Operand ImmOp,
 
 multiclass ST_PostInc_Pred<string mnemonic, RegisterClass RC,
                            Operand ImmOp, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME# : ST_PostInc_Pbase<mnemonic, RC, ImmOp, PredNot, 0>;
     // Predicate new
     let Predicates = [HasV4T], validSubTargets = HasV4SubT in
@@ -1431,7 +1435,7 @@ def : Pat<(post_store (i64 DoubleRegs:$src1), IntRegs:$src2,
 //===----------------------------------------------------------------------===//
 multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
                           bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2<(outs),
             (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1440,7 +1444,7 @@ multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
 
     // Predicate new
@@ -1497,7 +1501,7 @@ def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr),
 //===----------------------------------------------------------------------===//
 multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
                         bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1507,7 +1511,7 @@ multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
 
 multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
                         bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true"), isPredicated = 1 in {
+  let isPredicatedFalse = PredNot, isPredicated = 1 in {
     defm _c#NAME : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
 
     // Predicate new
@@ -2023,20 +2027,18 @@ let isCall = 1, neverHasSideEffects = 1,
               []>;
  }
 
-// Tail Calls.
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
-  def TCRETURNtg : JInst<(outs), (ins calltarget:$dst),
-             "jump $dst // TAILCALL", []>;
-}
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
-  def TCRETURNtext : JInst<(outs), (ins calltarget:$dst),
-             "jump $dst // TAILCALL", []>;
-}
 
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
-  def TCRETURNR : JInst<(outs), (ins IntRegs:$dst),
-             "jumpr $dst // TAILCALL", []>;
+// Indirect tail-call.
+let isCodeGenOnly = 1, isCall = 1, isReturn = 1  in
+def TCRETURNR : T_JMPr;
+
+// Direct tail-calls.
+let isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+isTerminator = 1, isCodeGenOnly = 1 in {
+  def TCRETURNtg   : T_JMP<(ins calltarget:$dst)>;
+  def TCRETURNtext : T_JMP<(ins calltarget:$dst)>;
 }
+
 // Map call instruction.
 def : Pat<(call (i32 IntRegs:$dst)),
       (CALLR (i32 IntRegs:$dst))>, Requires<[HasV2TOnly]>;
@@ -2133,10 +2135,11 @@ def : Pat <(add (i1 PredRegs:$src1), -1),
 
 // Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) =>
 //   p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1).
+// cmp.lt(r0, r1) -> cmp.gt(r1, r0)
 def : Pat <(select (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
                    (i32 IntRegs:$src3),
                    (i32 IntRegs:$src4)),
-      (i32 (TFR_condset_rr (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+      (i32 (TFR_condset_rr (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)),
                            (i32 IntRegs:$src4), (i32 IntRegs:$src3)))>,
       Requires<[HasV2TOnly]>;
 
@@ -2154,18 +2157,25 @@ def : Pat <(select (not (i1 PredRegs:$src1)), s12ImmPred:$src2,
 
 // Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
 // => r0 = TFR_condset_ir(p0, #i, r1)
-def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, s12ImmPred:$src3),
+def : Pat <(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s12ImmPred:$src3),
       (i32 (TFR_condset_ir (i1 PredRegs:$src1), s12ImmPred:$src3,
                            (i32 IntRegs:$src2)))>;
 
 // Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
-def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
-      (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>;
+def : Pat <(brcond (not (i1 PredRegs:$src1)), bb:$offset),
+      (JMP_f (i1 PredRegs:$src1), bb:$offset)>;
 
 // Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2).
-def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
+def : Pat <(and (i1 PredRegs:$src1), (not (i1 PredRegs:$src2))),
       (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>;
 
+
+let AddedComplexity = 100 in
+def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))),
+      (i64 (COMBINE_rr (TFRI 0),
+                       (LDriub_indexed (CONST32_set tglobaladdr:$global), 0)))>,
+      Requires<[NoV4T]>;
+
 // Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
 let AddedComplexity = 10 in
 def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
@@ -2186,43 +2196,46 @@ def : Pat <(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
                                                  subreg_loreg))))))>;
 
 // We want to prevent emitting pnot's as much as possible.
-// Map brcond with an unsupported setcc to a JMP_cNot.
+// Map brcond with an unsupported setcc to a JMP_f.
 def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
                         bb:$offset),
-      (JMP_cNot (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+      (JMP_f (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
                 bb:$offset)>;
 
 def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
                         bb:$offset),
-      (JMP_cNot (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
+      (JMP_f (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
 
 def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
-      (JMP_cNot (i1 PredRegs:$src1), bb:$offset)>;
+      (JMP_f (i1 PredRegs:$src1), bb:$offset)>;
 
 def : Pat <(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
-      (JMP_c (i1 PredRegs:$src1), bb:$offset)>;
+      (JMP_t (i1 PredRegs:$src1), bb:$offset)>;
 
+// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
 def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)),
                         bb:$offset),
-      (JMP_cNot (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2), bb:$offset)>;
+      (JMP_f (CMPGTri (i32 IntRegs:$src1),
+                (DEC_CONST_SIGNED s8ImmPred:$src2)), bb:$offset)>;
 
+// cmp.lt(r0, r1) -> cmp.gt(r1, r0)
 def : Pat <(brcond (i1 (setlt (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
                         bb:$offset),
-      (JMP_c (CMPLTrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)), bb:$offset)>;
+      (JMP_t (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)), bb:$offset)>;
 
 def : Pat <(brcond (i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
                    bb:$offset),
-      (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
+      (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)),
                    bb:$offset)>;
 
 def : Pat <(brcond (i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
                         bb:$offset),
-      (JMP_cNot (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+      (JMP_f (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
                 bb:$offset)>;
 
 def : Pat <(brcond (i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
                    bb:$offset),
-      (JMP_cNot (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
+      (JMP_f (CMPGTU64rr (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
                 bb:$offset)>;
 
 // Map from a 64-bit select to an emulated 64-bit mux.
@@ -2300,8 +2313,8 @@ def : Pat<(i64 (anyext (i32 IntRegs:$src1))),
 
 // Map cmple -> cmpgt.
 // rs <= rt -> !(rs > rt).
-def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ImmPred:$src2)),
-      (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ImmPred:$src2)))>;
+def : Pat<(i1 (setle (i32 IntRegs:$src1), s10ExtPred:$src2)),
+      (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), s10ExtPred:$src2)))>;
 
 // rs <= rt -> !(rs > rt).
 def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
@@ -2314,8 +2327,8 @@ def : Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
 // Map cmpne -> cmpeq.
 // Hexagon_TODO: We should improve on this.
 // rs != rt -> !(rs == rt).
-def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
-      (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ImmPred:$src2))))>;
+def : Pat <(i1 (setne (i32 IntRegs:$src1), s10ExtPred:$src2)),
+      (i1 (NOT_p(i1 (CMPEQri (i32 IntRegs:$src1), s10ExtPred:$src2))))>;
 
 // Map cmpne(Rs) -> !cmpeqe(Rs).
 // rs != rt -> !(rs == rt).
@@ -2337,8 +2350,9 @@ def : Pat <(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
 def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
       (i1 (NOT_p (i1 (CMPGTrr (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
 
-def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ImmPred:$src2)),
-      (i1 (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2))>;
+// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
+def : Pat <(i1 (setge (i32 IntRegs:$src1), s8ExtPred:$src2)),
+      (i1 (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
 
 // Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
 // rss >= rtt -> !(rtt > rss).
@@ -2347,9 +2361,10 @@ def : Pat <(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
                                 (i64 DoubleRegs:$src1)))))>;
 
 // Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
+// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
 // rs < rt -> !(rs >= rt).
-def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)),
-      (i1 (NOT_p (CMPGEri (i32 IntRegs:$src1), s8ImmPred:$src2)))>;
+def : Pat <(i1 (setlt (i32 IntRegs:$src1), s8ExtPred:$src2)),
+      (i1 (NOT_p (CMPGTri (i32 IntRegs:$src1), (DEC_CONST_SIGNED s8ExtPred:$src2))))>;
 
 // Map cmplt(Rs, Rt) -> cmpgt(Rt, Rs).
 // rs < rt -> rt > rs.
@@ -2373,13 +2388,17 @@ def : Pat <(i1 (setult (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
 def : Pat <(i1 (setult (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
       (i1 (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1)))>;
 
-// Generate cmpgeu(Rs, #u8)
-def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ImmPred:$src2)),
-      (i1 (CMPGEUri (i32 IntRegs:$src1), u8ImmPred:$src2))>;
+// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
+def : Pat <(i1 (setuge (i32 IntRegs:$src1), 0)),
+      (i1 (CMPEQrr (i32 IntRegs:$src1), (i32 IntRegs:$src1)))>;
+
+// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
+def : Pat <(i1 (setuge (i32 IntRegs:$src1), u8ExtPred:$src2)),
+      (i1 (CMPGTUri (i32 IntRegs:$src1), (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>;
 
 // Generate cmpgtu(Rs, #u9)
-def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ImmPred:$src2)),
-      (i1 (CMPGTUri (i32 IntRegs:$src1), u9ImmPred:$src2))>;
+def : Pat <(i1 (setugt (i32 IntRegs:$src1), u9ExtPred:$src2)),
+      (i1 (CMPGTUri (i32 IntRegs:$src1), u9ExtPred:$src2))>;
 
 // Map from Rs >= Rt -> !(Rt > Rs).
 // rs >= rt -> !(rt > rs).
@@ -2391,7 +2410,7 @@ def : Pat <(i1 (setuge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
 def : Pat <(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
       (i1 (NOT_p (CMPGTU64rr (i64 DoubleRegs:$src2), (i64 DoubleRegs:$src1))))>;
 
-// Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs).
+// Map from cmpleu(Rs, Rt) -> !cmpgtu(Rs, Rt).
 // Map from (Rs <= Rt) -> !(Rs > Rt).
 def : Pat <(i1 (setule (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
       (i1 (NOT_p (CMPGTUrr (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
@@ -2487,6 +2506,13 @@ def:  Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
       (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
       Requires<[NoV4T]>;
 
+let AddedComplexity = 100 in
+def:  Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
+      (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
+                                  s11_2ExtPred:$offset)))>,
+      Requires<[NoV4T]>;
+
+let AddedComplexity = 10 in
 def:  Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
       (i32 (LDriw ADDRriS11_0:$src1))>;
 
@@ -2503,6 +2529,48 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
       (i64 (SXTW (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))))>;
 
 
+let AddedComplexity = 100 in
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+                           (i32 32))),
+               (i64 (zextloadi32 (i32 (add IntRegs:$src2,
+                                         s11_2ExtPred:$offset2)))))),
+        (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+                        (LDriw_indexed IntRegs:$src2,
+                                       s11_2ExtPred:$offset2)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+                           (i32 32))),
+               (i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
+        (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+                        (LDriw ADDRriS11_2:$srcLow)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+                           (i32 32))),
+               (i64 (zext (i32 IntRegs:$srcLow))))),
+        (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+                        IntRegs:$srcLow))>;
+
+let AddedComplexity = 100 in
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+                           (i32 32))),
+               (i64 (zextloadi32 (i32 (add IntRegs:$src2,
+                                         s11_2ExtPred:$offset2)))))),
+        (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+                        (LDriw_indexed IntRegs:$src2,
+                                       s11_2ExtPred:$offset2)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+                           (i32 32))),
+               (i64 (zextloadi32 ADDRriS11_2:$srcLow)))),
+        (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+                        (LDriw ADDRriS11_2:$srcLow)))>;
+
+def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh),
+                           (i32 32))),
+               (i64 (zext (i32 IntRegs:$srcLow))))),
+        (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg),
+                        IntRegs:$srcLow))>;
+
 // Any extended 64-bit load.
 // anyext i32 -> i64
 def:  Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
@@ -2637,19 +2705,6 @@ let AddedComplexity = 100 in
 def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
       (COPY (i32 IntRegs:$src1))>;
 
-def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
-
-let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
-def BR_JT : JRInst<(outs), (ins IntRegs:$src),
-                   "jumpr $src",
-                   [(HexagonBR_JT (i32 IntRegs:$src))]>;
-
-let isBranch=1, isIndirectBranch=1, isTerminator=1 in
-def BRIND : JRInst<(outs), (ins IntRegs:$src),
-                   "jumpr $src",
-                   [(brind (i32 IntRegs:$src))]>;
-
 def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
 
 def : Pat<(HexagonWrapperJT tjumptable:$dst),
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td
index 157ab3d..7e75554 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV3.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -11,6 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+def callv3 : SDNode<"HexagonISD::CALLv3", SDT_SPCall,
+           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall,
+           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
 
 //===----------------------------------------------------------------------===//
 // J +
@@ -40,41 +45,6 @@ let isCall = 1, neverHasSideEffects = 1,
               []>, Requires<[HasV3TOnly]>;
  }
 
-
-// Jump to address from register
-// if(p?.new) jumpr:t r?
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR_cdnPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
-                       "if ($src1.new) jumpr:t $src2",
-                       []>, Requires<[HasV3T]>;
-}
-
-// if (!p?.new) jumpr:t r?
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR_cdnNotPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
-                       "if (!$src1.new) jumpr:t $src2",
-                       []>, Requires<[HasV3T]>;
-}
-
-// Not taken.
-// if(p?.new) jumpr:nt r?
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR_cdnPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
-                       "if ($src1.new) jumpr:nt $src2",
-                       []>, Requires<[HasV3T]>;
-}
-
-// if (!p?.new) jumpr:nt r?
-let isReturn = 1, isTerminator = 1, isBarrier = 1,
-  Defs = [PC], Uses = [R31] in {
-  def JMPR_cdnNotPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
-                       "if (!$src1.new) jumpr:nt $src2",
-                       []>, Requires<[HasV3T]>;
-}
-
 //===----------------------------------------------------------------------===//
 // JR -
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index cd0e475..933239d 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -209,105 +209,31 @@ def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
 //===----------------------------------------------------------------------===//
 // LD +
 //===----------------------------------------------------------------------===//
-//
-// These absolute set addressing mode instructions accept immediate as
-// an operand. We have duplicated these patterns to take global address.
-
+//===----------------------------------------------------------------------===//
+// Template class for load instructions with Absolute set addressing mode.
+//===----------------------------------------------------------------------===//
 let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
-validSubTargets = HasV4SubT in {
-def LDrid_abs_setimm_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
-            (ins u0AlwaysExt:$addr),
-            "$dst1 = memd($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memb(Re=#U6)
-def LDrib_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins u0AlwaysExt:$addr),
-            "$dst1 = memb($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memh(Re=#U6)
-def LDrih_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins u0AlwaysExt:$addr),
-            "$dst1 = memh($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memub(Re=#U6)
-def LDriub_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
+validSubTargets = HasV4SubT in
+class T_LD_abs_set<string mnemonic, RegisterClass RC>:
+            LDInst2<(outs RC:$dst1, IntRegs:$dst2),
             (ins u0AlwaysExt:$addr),
-            "$dst1 = memub($dst2=##$addr)",
+            "$dst1 = "#mnemonic#"($dst2=##$addr)",
             []>,
             Requires<[HasV4T]>;
 
-// Rd=memuh(Re=#U6)
-def LDriuh_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins u0AlwaysExt:$addr),
-            "$dst1 = memuh($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
+def LDrid_abs_set_V4  : T_LD_abs_set <"memd", DoubleRegs>;
+def LDrib_abs_set_V4  : T_LD_abs_set <"memb", IntRegs>;
+def LDriub_abs_set_V4 : T_LD_abs_set <"memub", IntRegs>;
+def LDrih_abs_set_V4  : T_LD_abs_set <"memh", IntRegs>;
+def LDriw_abs_set_V4  : T_LD_abs_set <"memw", IntRegs>;
+def LDriuh_abs_set_V4 : T_LD_abs_set <"memuh", IntRegs>;
 
-// Rd=memw(Re=#U6)
-def LDriw_abs_setimm_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins u0AlwaysExt:$addr),
-            "$dst1 = memw($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-}
-
-// Following patterns are defined for absolute set addressing mode
-// instruction which take global address as operand.
-let isExtended = 1, opExtendable = 2, neverHasSideEffects = 1,
-validSubTargets = HasV4SubT in {
-def LDrid_abs_set_V4 : LDInst2<(outs DoubleRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdressExt:$addr),
-            "$dst1 = memd($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memb(Re=#U6)
-def LDrib_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdressExt:$addr),
-            "$dst1 = memb($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memh(Re=#U6)
-def LDrih_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdressExt:$addr),
-            "$dst1 = memh($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memub(Re=#U6)
-def LDriub_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdressExt:$addr),
-            "$dst1 = memub($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memuh(Re=#U6)
-def LDriuh_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdressExt:$addr),
-            "$dst1 = memuh($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-
-// Rd=memw(Re=#U6)
-def LDriw_abs_set_V4 : LDInst2<(outs IntRegs:$dst1, IntRegs:$dst2),
-            (ins globaladdressExt:$addr),
-            "$dst1 = memw($dst2=##$addr)",
-            []>,
-            Requires<[HasV4T]>;
-}
 
 // multiclass for load instructions with base + register offset
 // addressing mode
 multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
                              bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2<(outs RC:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -316,7 +242,7 @@ multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>;
@@ -527,78 +453,29 @@ def:  Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
 // ST +
 //===----------------------------------------------------------------------===//
 ///
-/// Assumptions::: ****** DO NOT IGNORE ********
-/// 1. Make sure that in post increment store, the zero'th operand is always the
-///    post increment operand.
-/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
-///    last operand.
-///
-
-// memd(Re=#U)=Rtt
-let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
-def STrid_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins DoubleRegs:$src1, u0AlwaysExt:$src2),
-            "memd($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-
-// memb(Re=#U)=Rs
-def STrib_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, u0AlwaysExt:$src2),
-            "memb($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-
-// memh(Re=#U)=Rs
-def STrih_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, u0AlwaysExt:$src2),
-            "memh($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-
-// memw(Re=#U)=Rs
-def STriw_abs_setimm_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, u0AlwaysExt:$src2),
-            "memw($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-}
-
-// memd(Re=#U)=Rtt
-let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in {
-def STrid_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins DoubleRegs:$src1, globaladdressExt:$src2),
-            "memd($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-
-// memb(Re=#U)=Rs
-def STrib_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, globaladdressExt:$src2),
-            "memb($dst1=##$src2) = $src1",
+//===----------------------------------------------------------------------===//
+// Template class for store instructions with Absolute set addressing mode.
+//===----------------------------------------------------------------------===//
+let isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in
+class T_ST_abs_set<string mnemonic, RegisterClass RC>:
+            STInst2<(outs IntRegs:$dst1),
+            (ins RC:$src1, u0AlwaysExt:$src2),
+            mnemonic#"($dst1=##$src2) = $src1",
             []>,
             Requires<[HasV4T]>;
 
-// memh(Re=#U)=Rs
-def STrih_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, globaladdressExt:$src2),
-            "memh($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-
-// memw(Re=#U)=Rs
-def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
-            (ins IntRegs:$src1, globaladdressExt:$src2),
-            "memw($dst1=##$src2) = $src1",
-            []>,
-            Requires<[HasV4T]>;
-}
+def STrid_abs_set_V4 : T_ST_abs_set <"memd", DoubleRegs>;
+def STrib_abs_set_V4 : T_ST_abs_set <"memb", IntRegs>;
+def STrih_abs_set_V4 : T_ST_abs_set <"memh", IntRegs>;
+def STriw_abs_set_V4 : T_ST_abs_set <"memw", IntRegs>;
 
+//===----------------------------------------------------------------------===//
 // multiclass for store instructions with base + register offset addressing
 // mode
+//===----------------------------------------------------------------------===//
 multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
                              bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
                  RC:$src5),
@@ -609,7 +486,7 @@ multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>;
@@ -637,7 +514,7 @@ multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
 // addressing mode.
 multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
                              bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInst_V4<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
                  RC:$src5),
@@ -648,7 +525,7 @@ multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>;
@@ -711,17 +588,59 @@ def : Pat<(store (i64 DoubleRegs:$src4),
                                 u2ImmPred:$src3, DoubleRegs:$src4)>;
 }
 
-// memd(Ru<<#u2+#U6)=Rtt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10,
-validSubTargets = HasV4SubT in
-def STrid_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, DoubleRegs:$src4),
-            "memd($src1<<#$src2+#$src3) = $src4",
-            [(store (i64 DoubleRegs:$src4),
+let isExtended = 1, opExtendable = 2 in
+class T_ST_LongOff <string mnemonic, PatFrag stOp, RegisterClass RC, ValueType VT> :
+            STInst<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, RC:$src4),
+            mnemonic#"($src1<<#$src2+##$src3) = $src4",
+            [(stOp (VT RC:$src4),
                     (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
                          u0AlwaysExtPred:$src3))]>,
             Requires<[HasV4T]>;
 
+let isExtended = 1, opExtendable = 2, mayStore = 1, isNVStore = 1 in
+class T_ST_LongOff_nv <string mnemonic> :
+            NVInst_V4<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
+            mnemonic#"($src1<<#$src2+##$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+multiclass ST_LongOff <string mnemonic, string BaseOp, PatFrag stOp> {
+  let  BaseOpcode = BaseOp#"_shl" in {
+    let isNVStorable = 1 in
+    def NAME#_V4 : T_ST_LongOff<mnemonic, stOp, IntRegs, i32>;
+
+    def NAME#_nv_V4 : T_ST_LongOff_nv<mnemonic>;
+  }
+}
+
+let AddedComplexity = 10, validSubTargets = HasV4SubT in {
+  def STrid_shl_V4 : T_ST_LongOff<"memd", store, DoubleRegs, i64>;
+  defm STrib_shl   : ST_LongOff <"memb", "STrib", truncstorei8>, NewValueRel;
+  defm STrih_shl   : ST_LongOff <"memh", "Strih", truncstorei16>, NewValueRel;
+  defm STriw_shl   : ST_LongOff <"memw", "STriw", store>, NewValueRel;
+}
+
+let AddedComplexity = 40 in
+multiclass T_ST_LOff_Pats <InstHexagon I, RegisterClass RC, ValueType VT,
+                           PatFrag stOp> {
+ def : Pat<(stOp (VT RC:$src4),
+           (add (shl IntRegs:$src1, u2ImmPred:$src2),
+               (NumUsesBelowThresCONST32 tglobaladdr:$src3))),
+           (I IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
+
+ def : Pat<(stOp (VT RC:$src4),
+           (add IntRegs:$src1,
+               (NumUsesBelowThresCONST32 tglobaladdr:$src3))),
+           (I IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
+}
+
+defm : T_ST_LOff_Pats<STrid_shl_V4, DoubleRegs, i64, store>;
+defm : T_ST_LOff_Pats<STriw_shl_V4, IntRegs, i32, store>;
+defm : T_ST_LOff_Pats<STrib_shl_V4, IntRegs, i32, truncstorei8>;
+defm : T_ST_LOff_Pats<STrih_shl_V4, IntRegs, i32, truncstorei16>;
+
 // memd(Rx++#s4:3)=Rtt
 // memd(Rx++#s4:3:circ(Mu))=Rtt
 // memd(Rx++I:circ(Mu))=Rtt
@@ -741,7 +660,7 @@ def STrid_shl_V4 : STInst<(outs),
 //===----------------------------------------------------------------------===//
 multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
                         bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : STInst2<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -751,7 +670,7 @@ multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
 }
 
 multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>;
@@ -799,17 +718,6 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
            (STrib_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
            Requires<[HasV4T]>;
 
-// memb(Ru<<#u2+#U6)=Rt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STrib_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
-            "memb($src1<<#$src2+#$src3) = $src4",
-            [(truncstorei8 (i32 IntRegs:$src4),
-                           (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
-                                u0AlwaysExtPred:$src3))]>,
-            Requires<[HasV4T]>;
-
 // memb(Rx++#s4:0:circ(Mu))=Rt
 // memb(Rx++I:circ(Mu))=Rt
 // memb(Rx++Mu)=Rt
@@ -830,17 +738,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
 // TODO: needs to be implemented.
 
 // memh(Ru<<#u2+#U6)=Rt.H
-// memh(Ru<<#u2+#U6)=Rt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STrih_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
-            "memh($src1<<#$src2+#$src3) = $src4",
-            [(truncstorei16 (i32 IntRegs:$src4),
-                            (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
-                                 u0AlwaysExtPred:$src3))]>,
-            Requires<[HasV4T]>;
-
 // memh(Rx++#s4:1:circ(Mu))=Rt.H
 // memh(Rx++#s4:1:circ(Mu))=Rt
 // memh(Rx++I:circ(Mu))=Rt.H
@@ -877,17 +774,6 @@ def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)),
            (STriw_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
            Requires<[HasV4T]>;
 
-// memw(Ru<<#u2+#U6)=Rt
-let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1,
-validSubTargets = HasV4SubT in
-def STriw_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
-            "memw($src1<<#$src2+#$src3) = $src4",
-            [(store (i32 IntRegs:$src4),
-                    (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
-                              u0AlwaysExtPred:$src3))]>,
-            Requires<[HasV4T]>;
-
 // memw(Rx++#s4:2)=Rt
 // memw(Rx++#s4:2:circ(Mu))=Rt
 // memw(Rx++I:circ(Mu))=Rt
@@ -907,7 +793,7 @@ def STriw_shl_V4 : STInst<(outs),
 //
 multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
                             Operand predImmOp, bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInst_V4<(outs),
             (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -918,7 +804,7 @@ multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
 
 multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp,
                            bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>;
@@ -960,7 +846,7 @@ let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in {
 // and MEMri operand.
 multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
                           bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInst_V4<(outs),
             (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -970,7 +856,7 @@ multiclass ST_MEMri_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_MEMri_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_MEMri_Pbase_nv<mnemonic, RC, PredNot, 0>;
 
     // Predicate new
@@ -1006,15 +892,6 @@ mayStore = 1 in {
   defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
 }
 
-// memb(Ru<<#u2+#U6)=Nt.new
-let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
-isNVStore = 1, validSubTargets = HasV4SubT in
-def STrib_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
-            "memb($src1<<#$src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
 //===----------------------------------------------------------------------===//
 // Post increment store
 // mem[bhwd](Rx++#s4:[0123])=Nt.new
@@ -1022,7 +899,7 @@ def STrib_shl_nv_V4 : NVInst_V4<(outs),
 
 multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp,
                             bit isNot, bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -1034,7 +911,7 @@ multiclass ST_PostInc_Pbase_nv<string mnemonic, RegisterClass RC, Operand ImmOp,
 
 multiclass ST_PostInc_Pred_nv<string mnemonic, RegisterClass RC,
                            Operand ImmOp, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_PostInc_Pbase_nv<mnemonic, RC, ImmOp, PredNot, 0>;
     // Predicate new
     let Predicates = [HasV4T], validSubTargets = HasV4SubT in
@@ -1072,29 +949,11 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
 // memb(Rx++I:circ(Mu))=Nt.new
 // memb(Rx++Mu)=Nt.new
 // memb(Rx++Mu:brev)=Nt.new
-// memh(Ru<<#u2+#U6)=Nt.new
-let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
-isNVStore = 1, validSubTargets = HasV4SubT in
-def STrih_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
-            "memh($src1<<#$src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
 // memh(Rx++#s4:1:circ(Mu))=Nt.new
 // memh(Rx++I:circ(Mu))=Nt.new
 // memh(Rx++Mu)=Nt.new
 // memh(Rx++Mu:brev)=Nt.new
 
-// memw(Ru<<#u2+#U6)=Nt.new
-let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10,
-isNVStore = 1, validSubTargets = HasV4SubT in
-def STriw_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4),
-            "memw($src1<<#$src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
 // memw(Rx++#s4:2:circ(Mu))=Nt.new
 // memw(Rx++I:circ(Mu))=Nt.new
 // memw(Rx++Mu)=Nt.new
@@ -1108,179 +967,193 @@ def STriw_shl_nv_V4 : NVInst_V4<(outs),
 // NV/J +
 //===----------------------------------------------------------------------===//
 
-multiclass NVJ_type_basic_reg<string NotStr, string OpcStr, string TakenStr> {
-  def _ie_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, $src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps with the register
+// operands.
+//===----------------------------------------------------------------------===//
 
-  def _nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, $src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
-}
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in
+class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
+                      bit isNegCond, bit isTaken>
+  : NVInst_V4<(outs),
+    (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+    "if ("#!if(isNegCond, "!","")#mnemonic#
+    "($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")#
+    "$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:"
+    #!if(isTaken, "t","nt")#" $offset",
+    []>, Requires<[HasV4T]> {
 
-multiclass NVJ_type_basic_2ndDotNew<string NotStr, string OpcStr,
-                                                   string TakenStr> {
-  def _ie_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1, $src2.new)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+      bits<5> src1;
+      bits<5> src2;
+      bits<3> Ns;    // New-Value Operand
+      bits<5> RegOp; // Non New-Value Operand
+      bits<11> offset;
 
-  def _nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1, $src2.new)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
-}
+      let isBrTaken = !if(isTaken, "true", "false");
+      let isPredicatedFalse = isNegCond;
 
-multiclass NVJ_type_basic_imm<string NotStr, string OpcStr, string TakenStr> {
-  def _ie_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, #$src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+      let Ns = !if(!eq(NvOpNum, 0), src1{2-0}, src2{2-0});
+      let RegOp = !if(!eq(NvOpNum, 0), src2, src1);
 
-  def _nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, #$src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+      let IClass = 0b0010;
+      let Inst{26} = 0b0;
+      let Inst{25-23} = majOp;
+      let Inst{22} = isNegCond;
+      let Inst{18-16} = Ns;
+      let Inst{13} = isTaken;
+      let Inst{12-8} = RegOp;
+      let Inst{21-20} = offset{10-9};
+      let Inst{7-1} = offset{8-2};
 }
 
-multiclass NVJ_type_basic_neg<string NotStr, string OpcStr, string TakenStr> {
-  def _ie_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, #$src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
 
-  def _nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, #$src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum,
+                       bit isNegCond> {
+  // Branch not taken:
+  def _nt_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>;
+  // Branch taken:
+  def _t_V4: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>;
 }
 
-multiclass NVJ_type_basic_tstbit<string NotStr, string OpcStr,
-                                                string TakenStr> {
-  def _ie_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, #$src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+// NvOpNum = 0 -> First Operand is a new-value Register
+// NvOpNum = 1 -> Second Operand is a new-value Register
 
-  def _nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset),
-            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
-            !strconcat("($src1.new, #$src2)) jump:",
-            !strconcat(TakenStr, " $offset"))))),
-            []>,
-            Requires<[HasV4T]>;
+multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp,
+                       bit NvOpNum> {
+  let BaseOpcode = BaseOp#_NVJ in {
+    defm _t_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond
+    defm _f_Jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond
+  }
 }
 
-// Multiclass for regular dot new of Ist operand register.
-multiclass NVJ_type_br_pred_reg<string NotStr, string OpcStr> {
-  defm Pt  : NVJ_type_basic_reg<NotStr, OpcStr, "t">;
-  defm Pnt : NVJ_type_basic_reg<NotStr, OpcStr, "nt">;
-}
+// if ([!]cmp.eq(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Ns.new,Rt)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Rt,Ns.new)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2
 
-// Multiclass for dot new of 2nd operand register.
-multiclass NVJ_type_br_pred_2ndDotNew<string NotStr, string OpcStr> {
-  defm Pt  : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "t">;
-  defm Pnt : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "nt">;
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
+  Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
+  defm CMPEQrr  : NVJrr_base<"cmp.eq",  "CMPEQ",  0b000, 0>, PredRel;
+  defm CMPGTrr  : NVJrr_base<"cmp.gt",  "CMPGT",  0b001, 0>, PredRel;
+  defm CMPGTUrr : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel;
+  defm CMPLTrr  : NVJrr_base<"cmp.gt",  "CMPLT",  0b011, 1>, PredRel;
+  defm CMPLTUrr : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel;
 }
 
-// Multiclass for 2nd operand immediate, including -1.
-multiclass NVJ_type_br_pred_imm<string NotStr, string OpcStr> {
-  defm Pt     : NVJ_type_basic_imm<NotStr, OpcStr, "t">;
-  defm Pnt    : NVJ_type_basic_imm<NotStr, OpcStr, "nt">;
-  defm Ptneg  : NVJ_type_basic_neg<NotStr, OpcStr, "t">;
-  defm Pntneg : NVJ_type_basic_neg<NotStr, OpcStr, "nt">;
-}
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps instruction
+// with a register and an unsigned immediate (U5) operand.
+//===----------------------------------------------------------------------===//
 
-// Multiclass for 2nd operand immediate, excluding -1.
-multiclass NVJ_type_br_pred_imm_only<string NotStr, string OpcStr> {
-  defm Pt     : NVJ_type_basic_imm<NotStr, OpcStr, "t">;
-  defm Pnt    : NVJ_type_basic_imm<NotStr, OpcStr, "nt">;
-}
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in
+class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond,
+                         bit isTaken>
+  : NVInst_V4<(outs),
+    (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
+    "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:"
+    #!if(isTaken, "t","nt")#" $offset",
+    []>, Requires<[HasV4T]> {
 
-// Multiclass for tstbit, where 2nd operand is always #0.
-multiclass NVJ_type_br_pred_tstbit<string NotStr, string OpcStr> {
-  defm Pt     : NVJ_type_basic_tstbit<NotStr, OpcStr, "t">;
-  defm Pnt    : NVJ_type_basic_tstbit<NotStr, OpcStr, "nt">;
+      let isPredicatedFalse = isNegCond;
+      let isBrTaken = !if(isTaken, "true", "false");
+
+      bits<3> src1;
+      bits<5> src2;
+      bits<11> offset;
+
+      let IClass = 0b0010;
+      let Inst{26} = 0b1;
+      let Inst{25-23} = majOp;
+      let Inst{22} = isNegCond;
+      let Inst{18-16} = src1;
+      let Inst{13} = isTaken;
+      let Inst{12-8} = src2;
+      let Inst{21-20} = offset{10-9};
+      let Inst{7-1} = offset{8-2};
 }
 
-// Multiclass for GT.
-multiclass NVJ_type_rr_ri<string OpcStr> {
-  defm rrNot   : NVJ_type_br_pred_reg<"!", OpcStr>;
-  defm rr      : NVJ_type_br_pred_reg<"",  OpcStr>;
-  defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>;
-  defm rrdn    : NVJ_type_br_pred_2ndDotNew<"",  OpcStr>;
-  defm riNot   : NVJ_type_br_pred_imm<"!", OpcStr>;
-  defm ri      : NVJ_type_br_pred_imm<"",  OpcStr>;
+multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> {
+  // Branch not taken:
+  def _nt_V4: NVJri_template<mnemonic, majOp, isNegCond, 0>;
+  // Branch taken:
+  def _t_V4: NVJri_template<mnemonic, majOp, isNegCond, 1>;
 }
 
-// Multiclass for EQ.
-multiclass NVJ_type_rr_ri_no_2ndDotNew<string OpcStr> {
-  defm rrNot   : NVJ_type_br_pred_reg<"!", OpcStr>;
-  defm rr      : NVJ_type_br_pred_reg<"",  OpcStr>;
-  defm riNot   : NVJ_type_br_pred_imm<"!", OpcStr>;
-  defm ri      : NVJ_type_br_pred_imm<"",  OpcStr>;
+multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> {
+  let BaseOpcode = BaseOp#_NVJri in {
+    defm _t_Jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond
+    defm _f_Jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond
+  }
 }
 
-// Multiclass for GTU.
-multiclass NVJ_type_rr_ri_no_nOne<string OpcStr> {
-  defm rrNot   : NVJ_type_br_pred_reg<"!", OpcStr>;
-  defm rr      : NVJ_type_br_pred_reg<"",  OpcStr>;
-  defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>;
-  defm rrdn    : NVJ_type_br_pred_2ndDotNew<"",  OpcStr>;
-  defm riNot   : NVJ_type_br_pred_imm_only<"!", OpcStr>;
-  defm ri      : NVJ_type_br_pred_imm_only<"",  OpcStr>;
+// if ([!]cmp.eq(Ns.new,#U5)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,#U5)) jump:[n]t #r9:2
+// if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2
+
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
+  Defs = [PC], neverHasSideEffects = 1, validSubTargets = HasV4SubT in {
+  defm CMPEQri  : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel;
+  defm CMPGTri  : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel;
+  defm CMPGTUri : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel;
 }
 
-// Multiclass for tstbit.
-multiclass NVJ_type_r0<string OpcStr> {
-  defm r0Not : NVJ_type_br_pred_tstbit<"!", OpcStr>;
-  defm r0    : NVJ_type_br_pred_tstbit<"",  OpcStr>;
- }
+//===----------------------------------------------------------------------===//
+// multiclass/template class for the new-value compare jumps instruction
+// with a register and an hardcoded 0/-1 immediate value.
+//===----------------------------------------------------------------------===//
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11 in
+class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal,
+                            bit isNegCond, bit isTaken>
+  : NVInst_V4<(outs),
+    (ins IntRegs:$src1, brtarget:$offset),
+    "if ("#!if(isNegCond, "!","")#mnemonic
+    #"($src1.new, #"#ImmVal#")) jump:"
+    #!if(isTaken, "t","nt")#" $offset",
+    []>, Requires<[HasV4T]> {
 
-// Base Multiclass for New Value Jump.
-multiclass NVJ_type {
-  defm GT     : NVJ_type_rr_ri<"cmp.gt">;
-  defm EQ     : NVJ_type_rr_ri_no_2ndDotNew<"cmp.eq">;
-  defm GTU    : NVJ_type_rr_ri_no_nOne<"cmp.gtu">;
-  defm TSTBIT : NVJ_type_r0<"tstbit">;
+      let isPredicatedFalse = isNegCond;
+      let isBrTaken = !if(isTaken, "true", "false");
+
+      bits<3> src1;
+      bits<11> offset;
+      let IClass = 0b0010;
+      let Inst{26} = 0b1;
+      let Inst{25-23} = majOp;
+      let Inst{22} = isNegCond;
+      let Inst{18-16} = src1;
+      let Inst{13} = isTaken;
+      let Inst{21-20} = offset{10-9};
+      let Inst{7-1} = offset{8-2};
 }
 
-let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
-  defm JMP_ : NVJ_type;
+multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal,
+                             bit isNegCond> {
+  // Branch not taken:
+  def _nt_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>;
+  // Branch taken:
+  def _t_V4: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>;
 }
 
-//===----------------------------------------------------------------------===//
-// NV/J -
-//===----------------------------------------------------------------------===//
+multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
+                             string ImmVal> {
+  let BaseOpcode = BaseOp#_NVJ_ConstImm in {
+  defm _t_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True cond
+  defm _f_Jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False Cond
+  }
+}
+
+// if ([!]tstbit(Ns.new,#0)) jump:[n]t #r9:2
+// if ([!]cmp.eq(Ns.new,#-1)) jump:[n]t #r9:2
+// if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2
+
+let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1,
+  Defs = [PC], neverHasSideEffects = 1 in {
+  defm TSTBIT0  : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel;
+  defm CMPEQn1  : NVJ_ConstImm_base<"cmp.eq", "CMPEQ",  0b100, "-1">, PredRel;
+  defm CMPGTn1  : NVJ_ConstImm_base<"cmp.gt", "CMPGT",  0b101, "-1">, PredRel;
+}
 
 //===----------------------------------------------------------------------===//
 // XTYPE/ALU +
@@ -2286,7 +2159,7 @@ def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
 
 def : Pat <(brcond (i1 (setne (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2)),
                        bb:$offset),
-      (JMP_cNot (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
+      (JMP_f (CMPbEQri_V4 (i32 IntRegs:$src1), u8ImmPred:$src2),
                 bb:$offset)>,
       Requires<[HasV4T]>;
 
@@ -2769,9 +2642,9 @@ let isReturn = 1, isTerminator = 1,
 
 multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
                            bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+            (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
             ") ")#mnemonic#"(##$absaddr) = $src2",
             []>,
@@ -2779,7 +2652,7 @@ multiclass ST_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Abs_Predbase<mnemonic, RC, PredNot, 1>;
@@ -2791,7 +2664,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
     let opExtendable = 0, isPredicable = 1 in
     def NAME#_V4 : STInst2<(outs),
-            (ins globaladdressExt:$absaddr, RC:$src),
+            (ins u0AlwaysExt:$absaddr, RC:$src),
             mnemonic#"(##$absaddr) = $src",
             []>,
             Requires<[HasV4T]>;
@@ -2805,9 +2678,9 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC> {
 
 multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot,
                            bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME#_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, globaladdressExt:$absaddr, RC: $src2),
+            (ins PredRegs:$src1, u0AlwaysExt:$absaddr, RC: $src2),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
             ") ")#mnemonic#"(##$absaddr) = $src2.new",
             []>,
@@ -2815,7 +2688,7 @@ multiclass ST_Abs_Predbase_nv<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass ST_Abs_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : ST_Abs_Predbase_nv<mnemonic, RC, PredNot, 1>;
@@ -2827,7 +2700,7 @@ multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
     let opExtendable = 0, isPredicable = 1 in
     def NAME#_nv_V4 : NVInst_V4<(outs),
-            (ins globaladdressExt:$absaddr, RC:$src),
+            (ins u0AlwaysExt:$absaddr, RC:$src),
             mnemonic#"(##$absaddr) = $src.new",
             []>,
             Requires<[HasV4T]>;
@@ -2840,16 +2713,19 @@ multiclass ST_Abs_nv<string mnemonic, string CextOp, RegisterClass RC> {
 }
 
 let addrMode = Absolute in {
+  let accessSize = ByteAccess in
     defm STrib_abs : ST_Abs<"memb", "STrib", IntRegs>,
                      ST_Abs_nv<"memb", "STrib", IntRegs>, AddrModeRel;
 
+  let accessSize = HalfWordAccess in
     defm STrih_abs : ST_Abs<"memh", "STrih", IntRegs>,
                      ST_Abs_nv<"memh", "STrih", IntRegs>, AddrModeRel;
 
+  let accessSize = WordAccess in
     defm STriw_abs : ST_Abs<"memw", "STriw", IntRegs>,
                      ST_Abs_nv<"memw", "STriw", IntRegs>, AddrModeRel;
 
-  let isNVStorable = 0 in
+  let accessSize = DoubleWordAccess, isNVStorable = 0 in
     defm STrid_abs : ST_Abs<"memd", "STrid", DoubleRegs>, AddrModeRel;
 }
 
@@ -2875,6 +2751,7 @@ def : Pat<(store (i64 DoubleRegs:$src1),
 // mem[bhwd](#global)=Rt
 // if ([!]Pv[.new]) mem[bhwd](##global) = Rt
 //===----------------------------------------------------------------------===//
+let mayStore = 1, isNVStorable = 1 in
 multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> {
   let BaseOpcode = BaseOp, isPredicable = 1 in
   def NAME#_V4 : STInst2<(outs),
@@ -2909,15 +2786,16 @@ multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> {
   }
 }
 
-let validSubTargets = HasV4SubT,  validSubTargets = HasV4SubT in {
-defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>,
-              ST_GP_nv<"memd", "STd_GP", DoubleRegs>, NewValueRel ;
-defm STb_GP : ST_GP<"memb",  "STb_GP", IntRegs>,
-              ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel ;
-defm STh_GP : ST_GP<"memh",  "STh_GP", IntRegs>,
-              ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel ;
-defm STw_GP : ST_GP<"memw",  "STw_GP", IntRegs>,
-              ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel ;
+let validSubTargets = HasV4SubT, neverHasSideEffects = 1 in {
+  let isNVStorable = 0 in
+  defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>, PredNewRel;
+
+  defm STb_GP : ST_GP<"memb",  "STb_GP", IntRegs>,
+                ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel;
+  defm STh_GP : ST_GP<"memh",  "STh_GP", IntRegs>,
+                ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel;
+  defm STw_GP : ST_GP<"memw",  "STw_GP", IntRegs>,
+                ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel;
 }
 
 // 64 bit atomic store
@@ -2974,9 +2852,9 @@ def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
 //===----------------------------------------------------------------------===//
 multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
                            bit isPredNew> {
-  let PNewValue = !if(isPredNew, "new", "") in
+  let isPredicatedNew = isPredNew in
   def NAME : LDInst2<(outs RC:$dst),
-            (ins PredRegs:$src1, globaladdressExt:$absaddr),
+            (ins PredRegs:$src1, u0AlwaysExt:$absaddr),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
             ") ")#"$dst = "#mnemonic#"(##$absaddr)",
             []>,
@@ -2984,7 +2862,7 @@ multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot,
 }
 
 multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
-  let PredSense = !if(PredNot, "false", "true") in {
+  let isPredicatedFalse = PredNot in {
     defm _c#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 0>;
     // Predicate new
     defm _cdn#NAME : LD_Abs_Predbase<mnemonic, RC, PredNot, 1>;
@@ -2996,7 +2874,7 @@ multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
     let  opExtendable = 1, isPredicable = 1 in
     def NAME#_V4 : LDInst2<(outs RC:$dst),
-            (ins globaladdressExt:$absaddr),
+            (ins u0AlwaysExt:$absaddr),
             "$dst = "#mnemonic#"(##$absaddr)",
             []>,
             Requires<[HasV4T]>;
@@ -3009,33 +2887,37 @@ multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC> {
 }
 
 let addrMode = Absolute in {
+  let accessSize = ByteAccess in {
     defm LDrib_abs  : LD_Abs<"memb", "LDrib", IntRegs>, AddrModeRel;
     defm LDriub_abs : LD_Abs<"memub", "LDriub", IntRegs>, AddrModeRel;
+  }
+  let accessSize = HalfWordAccess in {
     defm LDrih_abs  : LD_Abs<"memh", "LDrih", IntRegs>, AddrModeRel;
     defm LDriuh_abs : LD_Abs<"memuh", "LDriuh", IntRegs>, AddrModeRel;
+  }
+  let accessSize = WordAccess in
     defm LDriw_abs  : LD_Abs<"memw", "LDriw", IntRegs>, AddrModeRel;
+
+  let accessSize = DoubleWordAccess in
     defm LDrid_abs : LD_Abs<"memd",  "LDrid", DoubleRegs>, AddrModeRel;
 }
 
-let Predicates = [HasV4T], AddedComplexity  = 30 in
+let Predicates = [HasV4T], AddedComplexity  = 30 in {
 def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))),
           (LDriw_abs_V4 tglobaladdr: $absaddr)>;
 
-let Predicates = [HasV4T], AddedComplexity=30 in
 def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
           (LDrib_abs_V4 tglobaladdr:$absaddr)>;
 
-let Predicates = [HasV4T], AddedComplexity=30 in
 def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
           (LDriub_abs_V4 tglobaladdr:$absaddr)>;
 
-let Predicates = [HasV4T], AddedComplexity=30 in
 def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
           (LDrih_abs_V4 tglobaladdr:$absaddr)>;
 
-let Predicates = [HasV4T], AddedComplexity=30 in
 def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
           (LDriuh_abs_V4 tglobaladdr:$absaddr)>;
+}
 
 //===----------------------------------------------------------------------===//
 // multiclass for load instructions with GP-relative addressing mode.
@@ -3058,12 +2940,12 @@ multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> {
   }
 }
 
-defm LDd_GP  : LD_GP<"memd",  "LDd_GP",  DoubleRegs>;
-defm LDb_GP  : LD_GP<"memb",  "LDb_GP",  IntRegs>;
-defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>;
-defm LDh_GP  : LD_GP<"memh",  "LDh_GP",  IntRegs>;
-defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>;
-defm LDw_GP  : LD_GP<"memw",  "LDw_GP",  IntRegs>;
+defm LDd_GP  : LD_GP<"memd",  "LDd_GP",  DoubleRegs>, PredNewRel;
+defm LDb_GP  : LD_GP<"memb",  "LDb_GP",  IntRegs>, PredNewRel;
+defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>, PredNewRel;
+defm LDh_GP  : LD_GP<"memh",  "LDh_GP",  IntRegs>, PredNewRel;
+defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>, PredNewRel;
+defm LDw_GP  : LD_GP<"memw",  "LDw_GP",  IntRegs>, PredNewRel;
 
 def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
            (i64 (LDd_GP_V4 tglobaladdr:$global))>;
@@ -3139,9 +3021,10 @@ def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
 
 
 // Transfer global address into a register
-let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in
-def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
-           "$dst = ##$src1",
+let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1,
+isAsCheapAsAMove = 1, isReMaterializable = 1, validSubTargets = HasV4SubT in
+def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
+           "$dst = #$src1",
            [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>,
            Requires<[HasV4T]>;
 
@@ -3185,19 +3068,21 @@ def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
 
 // Load - Indirect with long offset: These instructions take global address
 // as an operand
-let AddedComplexity = 10 in
+let isExtended = 1, opExtendable = 3, AddedComplexity = 40,
+validSubTargets = HasV4SubT in
 def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst),
-            (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+            (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset),
             "$dst=memd($src1<<#$src2+##$offset)",
             [(set (i64 DoubleRegs:$dst),
                   (load (add (shl IntRegs:$src1, u2ImmPred:$src2),
                         (HexagonCONST32 tglobaladdr:$offset))))]>,
             Requires<[HasV4T]>;
 
-let AddedComplexity = 10 in
+let AddedComplexity = 40 in
 multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
+let isExtended = 1, opExtendable = 3, validSubTargets = HasV4SubT in
   def _lo_V4 : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+            (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset),
             !strconcat("$dst = ",
             !strconcat(OpcStr, "($src1<<#$src2+##$offset)")),
             [(set IntRegs:$dst,
@@ -3208,202 +3093,53 @@ multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
 
 defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>;
 defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>;
+defm LDriub_ind_anyext : LD_indirect_lo<"memub", extloadi8>;
 defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>;
 defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>;
+defm LDriuh_ind_anyext : LD_indirect_lo<"memuh", extloadi16>;
 defm LDriw_ind : LD_indirect_lo<"memw", load>;
 
-// Store - Indirect with long offset: These instructions take global address
-// as an operand
-let AddedComplexity = 10 in
-def STrid_ind_lo_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
-                 DoubleRegs:$src4),
-            "memd($src1<<#$src2+#$src3) = $src4",
-            [(store (i64 DoubleRegs:$src4),
-                 (add (shl IntRegs:$src1, u2ImmPred:$src2),
-                      (HexagonCONST32 tglobaladdr:$src3)))]>,
-             Requires<[HasV4T]>;
-
-let AddedComplexity = 10 in
-multiclass ST_indirect_lo<string OpcStr, PatFrag OpNode> {
-  def _lo_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
-                 IntRegs:$src4),
-            !strconcat(OpcStr, "($src1<<#$src2+##$src3) = $src4"),
-            [(OpNode (i32 IntRegs:$src4),
-                 (add (shl IntRegs:$src1, u2ImmPred:$src2),
-                      (HexagonCONST32 tglobaladdr:$src3)))]>,
-             Requires<[HasV4T]>;
-}
-
-defm STrib_ind : ST_indirect_lo<"memb", truncstorei8>;
-defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>;
-defm STriw_ind : ST_indirect_lo<"memw", store>;
-
-// Store - absolute addressing mode: These instruction take constant
-// value as the extended operand.
-multiclass ST_absimm<string OpcStr> {
-let isExtended = 1, opExtendable = 0, isPredicable = 1,
-validSubTargets = HasV4SubT in
-  def _abs_V4 : STInst2<(outs),
-            (ins u0AlwaysExt:$src1, IntRegs:$src2),
-            !strconcat(OpcStr, "(##$src1) = $src2"),
-            []>,
-            Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 1, isPredicated = 1,
-validSubTargets = HasV4SubT in {
-  def _abs_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if ($src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$src2) = $src3")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if ($src1.new)",
-            !strconcat(OpcStr, "(##$src2) = $src3")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if (!$src1.new)",
-            !strconcat(OpcStr, "(##$src2) = $src3")),
-            []>,
-            Requires<[HasV4T]>;
-}
-
-let isExtended = 1, opExtendable = 0, mayStore = 1, isNVStore = 1,
-validSubTargets = HasV4SubT in
-  def _abs_nv_V4 : NVInst_V4<(outs),
-            (ins u0AlwaysExt:$src1, IntRegs:$src2),
-            !strconcat(OpcStr, "(##$src1) = $src2.new"),
-            []>,
-            Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 1, mayStore = 1, isPredicated = 1,
-isNVStore = 1, validSubTargets = HasV4SubT in {
-  def _abs_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if ($src1)",
-            !strconcat(OpcStr, "(##$src2) = $src3.new")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if (!$src1)",
-            !strconcat(OpcStr, "(##$src2) = $src3.new")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if ($src1.new)",
-            !strconcat(OpcStr, "(##$src2) = $src3.new")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2, IntRegs:$src3),
-            !strconcat("if (!$src1.new)",
-            !strconcat(OpcStr, "(##$src2) = $src3.new")),
-            []>,
-            Requires<[HasV4T]>;
-}
-}
+let AddedComplexity = 40 in
+def : Pat <(i32 (sextloadi8 (add IntRegs:$src1,
+                                 (NumUsesBelowThresCONST32 tglobaladdr:$offset)))),
+           (i32 (LDrib_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>,
+           Requires<[HasV4T]>;
 
-defm STrib_imm : ST_absimm<"memb">;
-defm STrih_imm : ST_absimm<"memh">;
-defm STriw_imm : ST_absimm<"memw">;
+let AddedComplexity = 40 in
+def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
+                                 (NumUsesBelowThresCONST32 tglobaladdr:$offset)))),
+           (i32 (LDriub_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>,
+           Requires<[HasV4T]>;
 
 let Predicates = [HasV4T], AddedComplexity  = 30 in {
 def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
-          (STrib_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+          (STrib_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
 
 def : Pat<(truncstorei16 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
-          (STrih_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
+          (STrih_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
 
 def : Pat<(store (i32 IntRegs:$src1), u0AlwaysExtPred:$src2),
-          (STriw_imm_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
-}
-
-// Load - absolute addressing mode: These instruction take constant
-// value as the extended operand
-
-multiclass LD_absimm<string OpcStr> {
-let isExtended = 1, opExtendable = 1, isPredicable = 1,
-validSubTargets = HasV4SubT in
-  def _abs_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins u0AlwaysExt:$src),
-            !strconcat("$dst = ",
-            !strconcat(OpcStr, "(##$src)")),
-            []>,
-            Requires<[HasV4T]>;
-
-let isExtended = 1, opExtendable = 2, isPredicated = 1,
-validSubTargets = HasV4SubT in {
-  def _abs_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2),
-            !strconcat("if ($src1) $dst = ",
-            !strconcat(OpcStr, "(##$src2)")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2),
-            !strconcat("if (!$src1) $dst = ",
-            !strconcat(OpcStr, "(##$src2)")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2),
-            !strconcat("if ($src1.new) $dst = ",
-            !strconcat(OpcStr, "(##$src2)")),
-            []>,
-            Requires<[HasV4T]>;
-
-  def _abs_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, u0AlwaysExt:$src2),
-            !strconcat("if (!$src1.new) $dst = ",
-            !strconcat(OpcStr, "(##$src2)")),
-            []>,
-            Requires<[HasV4T]>;
+          (STriw_abs_V4 u0AlwaysExtPred:$src2, IntRegs: $src1)>;
 }
-}
-
-defm LDrib_imm  : LD_absimm<"memb">;
-defm LDriub_imm : LD_absimm<"memub">;
-defm LDrih_imm  : LD_absimm<"memh">;
-defm LDriuh_imm : LD_absimm<"memuh">;
-defm LDriw_imm  : LD_absimm<"memw">;
 
 let Predicates = [HasV4T], AddedComplexity  = 30 in {
 def : Pat<(i32 (load u0AlwaysExtPred:$src)),
-          (LDriw_imm_abs_V4 u0AlwaysExtPred:$src)>;
+          (LDriw_abs_V4 u0AlwaysExtPred:$src)>;
 
 def : Pat<(i32 (sextloadi8 u0AlwaysExtPred:$src)),
-          (LDrib_imm_abs_V4 u0AlwaysExtPred:$src)>;
+          (LDrib_abs_V4 u0AlwaysExtPred:$src)>;
 
 def : Pat<(i32 (zextloadi8 u0AlwaysExtPred:$src)),
-          (LDriub_imm_abs_V4 u0AlwaysExtPred:$src)>;
+          (LDriub_abs_V4 u0AlwaysExtPred:$src)>;
 
 def : Pat<(i32 (sextloadi16 u0AlwaysExtPred:$src)),
-          (LDrih_imm_abs_V4 u0AlwaysExtPred:$src)>;
+          (LDrih_abs_V4 u0AlwaysExtPred:$src)>;
 
 def : Pat<(i32 (zextloadi16 u0AlwaysExtPred:$src)),
-          (LDriuh_imm_abs_V4 u0AlwaysExtPred:$src)>;
+          (LDriuh_abs_V4 u0AlwaysExtPred:$src)>;
 }
 
-// Indexed store double word - global address.
+// Indexed store word - global address.
 // memw(Rs+#u6:2)=#S8
 let AddedComplexity = 10 in
 def STriw_offset_ext_V4 : STInst<(outs),
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index 0318c519..bd7b26a 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -29,15 +29,18 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
   std::vector<MachineInstr*> AllocaAdjustInsts;
   int VarArgsFrameIndex;
   bool HasClobberLR;
+  bool HasEHReturn;
 
   std::map<const MachineInstr*, unsigned> PacketInfo;
 
 
 public:
-  HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0) {}
+  HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0),
+    HasEHReturn(false) {}
 
   HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
-                                                    HasClobberLR(0) {}
+                                                    HasClobberLR(0),
+                                                    HasEHReturn(false) {}
 
   unsigned getSRetReturnReg() const { return SRetReturnReg; }
   void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
@@ -69,6 +72,8 @@ public:
   void setHasClobberLR(bool v) { HasClobberLR = v;  }
   bool hasClobberLR() const { return HasClobberLR; }
 
+  bool hasEHReturn() const { return HasEHReturn; };
+  void setHasEHReturn(bool H = true) { HasEHReturn = H; };
 };
 } // End llvm namespace
 
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 5e80e48..05e6968 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -22,29 +22,31 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "hexagon-nvj"
-#include "Hexagon.h"
-#include "HexagonInstrInfo.h"
-#include "HexagonMachineFunctionInfo.h"
-#include "HexagonRegisterInfo.h"
-#include "HexagonSubtarget.h"
-#include "HexagonTargetMachine.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/PassSupport.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+
 #include <map>
+
+#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
 STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created");
@@ -57,6 +59,11 @@ static cl::opt<bool> DisableNewValueJumps("disable-nvjump", cl::Hidden,
     cl::ZeroOrMore, cl::init(false),
     cl::desc("Disable New Value Jumps"));
 
+namespace llvm {
+  void initializeHexagonNewValueJumpPass(PassRegistry&);
+}
+
+
 namespace {
   struct HexagonNewValueJump : public MachineFunctionPass {
     const HexagonInstrInfo    *QII;
@@ -65,9 +72,12 @@ namespace {
   public:
     static char ID;
 
-    HexagonNewValueJump() : MachineFunctionPass(ID) { }
+    HexagonNewValueJump() : MachineFunctionPass(ID) {
+      initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineBranchProbabilityInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -78,6 +88,8 @@ namespace {
     virtual bool runOnMachineFunction(MachineFunction &Fn);
 
   private:
+    /// \brief A handle to the branch probability pass.
+    const MachineBranchProbabilityInfo *MBPI;
 
   };
 
@@ -85,6 +97,13 @@ namespace {
 
 char HexagonNewValueJump::ID = 0;
 
+INITIALIZE_PASS_BEGIN(HexagonNewValueJump, "hexagon-nvj",
+                      "Hexagon NewValueJump", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(HexagonNewValueJump, "hexagon-nvj",
+                    "Hexagon NewValueJump", false, false)
+
+
 // We have identified this II could be feeder to NVJ,
 // verify that it can be.
 static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
@@ -208,19 +227,15 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
   // range specified by the arch.
   if (!secondReg) {
     int64_t v = MI->getOperand(2).getImm();
-    if (MI->getOpcode() == Hexagon::CMPGEri ||
-       (MI->getOpcode() == Hexagon::CMPGEUri && v > 0))
-      --v;
 
     if (!(isUInt<5>(v) ||
          ((MI->getOpcode() == Hexagon::CMPEQri ||
-           MI->getOpcode() == Hexagon::CMPGTri ||
-           MI->getOpcode() == Hexagon::CMPGEri) &&
+           MI->getOpcode() == Hexagon::CMPGTri) &&
           (v == -1))))
       return false;
   }
 
-  unsigned cmpReg1, cmpOp2;
+  unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning.
   cmpReg1 = MI->getOperand(1).getReg();
 
   if (secondReg) {
@@ -271,58 +286,63 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
 // Given a compare operator, return a matching New Value Jump
 // compare operator. Make sure that MI here is included in
 // HexagonInstrInfo.cpp::isNewValueJumpCandidate
-static unsigned getNewValueJumpOpcode(const MachineInstr *MI, int reg,
-                                      bool secondRegNewified) {
+static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
+                                      bool secondRegNewified,
+                                      MachineBasicBlock *jmpTarget,
+                                      const MachineBranchProbabilityInfo
+                                      *MBPI) {
+  bool taken = false;
+  MachineBasicBlock *Src = MI->getParent();
+  const BranchProbability Prediction =
+    MBPI->getEdgeProbability(Src, jmpTarget);
+
+  if (Prediction >= BranchProbability(1,2))
+    taken = true;
+
   switch (MI->getOpcode()) {
     case Hexagon::CMPEQrr:
-      return Hexagon::JMP_EQrrPt_nv_V4;
+      return taken ? Hexagon::CMPEQrr_t_Jumpnv_t_V4
+                   : Hexagon::CMPEQrr_t_Jumpnv_nt_V4;
 
     case Hexagon::CMPEQri: {
       if (reg >= 0)
-        return Hexagon::JMP_EQriPt_nv_V4;
+        return taken ? Hexagon::CMPEQri_t_Jumpnv_t_V4
+                     : Hexagon::CMPEQri_t_Jumpnv_nt_V4;
       else
-        return Hexagon::JMP_EQriPtneg_nv_V4;
+        return taken ? Hexagon::CMPEQn1_t_Jumpnv_t_V4
+                     : Hexagon::CMPEQn1_t_Jumpnv_nt_V4;
     }
 
-    case Hexagon::CMPLTrr:
     case Hexagon::CMPGTrr: {
       if (secondRegNewified)
-        return Hexagon::JMP_GTrrdnPt_nv_V4;
+        return taken ? Hexagon::CMPLTrr_t_Jumpnv_t_V4
+                     : Hexagon::CMPLTrr_t_Jumpnv_nt_V4;
       else
-        return Hexagon::JMP_GTrrPt_nv_V4;
-    }
-
-    case Hexagon::CMPGEri: {
-      if (reg >= 1)
-        return Hexagon::JMP_GTriPt_nv_V4;
-      else
-        return Hexagon::JMP_GTriPtneg_nv_V4;
+        return taken ? Hexagon::CMPGTrr_t_Jumpnv_t_V4
+                     : Hexagon::CMPGTrr_t_Jumpnv_nt_V4;
     }
 
     case Hexagon::CMPGTri: {
       if (reg >= 0)
-        return Hexagon::JMP_GTriPt_nv_V4;
+        return taken ? Hexagon::CMPGTri_t_Jumpnv_t_V4
+                     : Hexagon::CMPGTri_t_Jumpnv_nt_V4;
       else
-        return Hexagon::JMP_GTriPtneg_nv_V4;
+        return taken ? Hexagon::CMPGTn1_t_Jumpnv_t_V4
+                     : Hexagon::CMPGTn1_t_Jumpnv_nt_V4;
     }
 
-    case Hexagon::CMPLTUrr:
     case Hexagon::CMPGTUrr: {
       if (secondRegNewified)
-        return Hexagon::JMP_GTUrrdnPt_nv_V4;
+        return taken ? Hexagon::CMPLTUrr_t_Jumpnv_t_V4
+                     : Hexagon::CMPLTUrr_t_Jumpnv_nt_V4;
       else
-        return Hexagon::JMP_GTUrrPt_nv_V4;
+        return taken ? Hexagon::CMPGTUrr_t_Jumpnv_t_V4
+                     : Hexagon::CMPGTUrr_t_Jumpnv_nt_V4;
     }
 
     case Hexagon::CMPGTUri:
-      return Hexagon::JMP_GTUriPt_nv_V4;
-
-    case Hexagon::CMPGEUri: {
-      if (reg == 0)
-        return Hexagon::JMP_EQrrPt_nv_V4;
-      else
-        return Hexagon::JMP_GTUriPt_nv_V4;
-    }
+      return taken ? Hexagon::CMPGTUri_t_Jumpnv_t_V4
+                   : Hexagon::CMPGTUri_t_Jumpnv_nt_V4;
 
     default:
        llvm_unreachable("Could not find matching New Value Jump instruction.");
@@ -346,6 +366,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
   QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().getInstrInfo());
   QRI =
     static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
 
   if (!QRI->Subtarget.hasV4TOps() ||
       DisableNewValueJumps) {
@@ -393,12 +414,12 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
       DEBUG(dbgs() << "Instr: "; MI->dump(); dbgs() << "\n");
 
       if (!foundJump &&
-         (MI->getOpcode() == Hexagon::JMP_c ||
-          MI->getOpcode() == Hexagon::JMP_cNot ||
-          MI->getOpcode() == Hexagon::JMP_cdnPt ||
-          MI->getOpcode() == Hexagon::JMP_cdnPnt ||
-          MI->getOpcode() == Hexagon::JMP_cdnNotPt ||
-          MI->getOpcode() == Hexagon::JMP_cdnNotPnt)) {
+         (MI->getOpcode() == Hexagon::JMP_t ||
+          MI->getOpcode() == Hexagon::JMP_f ||
+          MI->getOpcode() == Hexagon::JMP_tnew_t ||
+          MI->getOpcode() == Hexagon::JMP_tnew_nt ||
+          MI->getOpcode() == Hexagon::JMP_fnew_t ||
+          MI->getOpcode() == Hexagon::JMP_fnew_nt)) {
         // This is where you would insert your compare and
         // instr that feeds compare
         jmpPos = MII;
@@ -434,9 +455,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
 
         jmpTarget = MI->getOperand(1).getMBB();
         foundJump = true;
-        if (MI->getOpcode() == Hexagon::JMP_cNot ||
-            MI->getOpcode() == Hexagon::JMP_cdnNotPt ||
-            MI->getOpcode() == Hexagon::JMP_cdnNotPnt) {
+        if (MI->getOpcode() == Hexagon::JMP_f ||
+            MI->getOpcode() == Hexagon::JMP_fnew_t ||
+            MI->getOpcode() == Hexagon::JMP_fnew_nt) {
           invertPredicate = true;
         }
         continue;
@@ -525,10 +546,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
           if (isSecondOpReg) {
             // In case of CMPLT, or CMPLTU, or EQ with the second register
             // to newify, swap the operands.
-            if (cmpInstr->getOpcode() == Hexagon::CMPLTrr  ||
-                cmpInstr->getOpcode() == Hexagon::CMPLTUrr ||
-                (cmpInstr->getOpcode() == Hexagon::CMPEQrr &&
-                                     feederReg == (unsigned) cmpOp2)) {
+            if (cmpInstr->getOpcode() == Hexagon::CMPEQrr &&
+                                     feederReg == (unsigned) cmpOp2) {
               unsigned tmp = cmpReg1;
               bool tmpIsKill = MO1IsKill;
               cmpReg1 = cmpOp2;
@@ -582,42 +601,34 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
            assert((QII->isNewValueJumpCandidate(cmpInstr)) &&
                       "This compare is not a New Value Jump candidate.");
           unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
-                                               isSecondOpNewified);
+                                               isSecondOpNewified,
+                                               jmpTarget, MBPI);
           if (invertPredicate)
             opc = QII->getInvertedPredicatedOpcode(opc);
 
-          // Manage the conversions from CMPGEUri to either CMPEQrr
-          // or CMPGTUri properly. See Arch spec for CMPGEUri instructions.
-          // This has to be after the getNewValueJumpOpcode function call as
-          // second operand of the compare could be modified in this logic.
-          if (cmpInstr->getOpcode() == Hexagon::CMPGEUri) {
-            if (cmpOp2 == 0) {
-              cmpOp2 = cmpReg1;
-              MO2IsKill = MO1IsKill;
-              isSecondOpReg = true;
-            } else
-              --cmpOp2;
-          }
-
-          // Manage the conversions from CMPGEri to CMPGTUri properly.
-          // See Arch spec for CMPGEri instructions.
-          if (cmpInstr->getOpcode() == Hexagon::CMPGEri)
-            --cmpOp2;
-
-          if (isSecondOpReg) {
+          if (isSecondOpReg)
             NewMI = BuildMI(*MBB, jmpPos, dl,
                                   QII->get(opc))
                                     .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                     .addReg(cmpOp2, getKillRegState(MO2IsKill))
                                     .addMBB(jmpTarget);
-          }
-          else {
+
+          else if ((cmpInstr->getOpcode() == Hexagon::CMPEQri ||
+                    cmpInstr->getOpcode() == Hexagon::CMPGTri) &&
+                    cmpOp2 == -1 )
+            // Corresponding new-value compare jump instructions don't have the
+            // operand for -1 immediate value.
+            NewMI = BuildMI(*MBB, jmpPos, dl,
+                                  QII->get(opc))
+                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
+                                    .addMBB(jmpTarget);
+
+          else
             NewMI = BuildMI(*MBB, jmpPos, dl,
                                   QII->get(opc))
                                     .addReg(cmpReg1, getKillRegState(MO1IsKill))
                                     .addImm(cmpOp2)
                                     .addMBB(jmpTarget);
-          }
 
           assert(NewMI && "New Value Jump Instruction Not created!");
           if (cmpInstr->getOperand(0).isReg() &&
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 576f1d7..89e3406 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -61,10 +61,6 @@ static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole",
     cl::Hidden, cl::ZeroOrMore, cl::init(false),
     cl::desc("Disable Peephole Optimization"));
 
-static cl::opt<int>
-DbgPNPCount("pnp-count", cl::init(-1), cl::Hidden,
-  cl::desc("Maximum number of P=NOT(P) to be optimized"));
-
 static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp",
     cl::Hidden, cl::ZeroOrMore, cl::init(false),
     cl::desc("Disable Optimization of PNotP"));
@@ -73,6 +69,14 @@ static cl::opt<bool> DisableOptSZExt("disable-hexagon-optszext",
     cl::Hidden, cl::ZeroOrMore, cl::init(false),
     cl::desc("Disable Optimization of Sign/Zero Extends"));
 
+static cl::opt<bool> DisableOptExtTo64("disable-hexagon-opt-ext-to-64",
+    cl::Hidden, cl::ZeroOrMore, cl::init(false),
+    cl::desc("Disable Optimization of extensions to i64."));
+
+namespace llvm {
+  void initializeHexagonPeepholePass(PassRegistry&);
+}
+
 namespace {
   struct HexagonPeephole : public MachineFunctionPass {
     const HexagonInstrInfo    *QII;
@@ -81,7 +85,9 @@ namespace {
 
   public:
     static char ID;
-    HexagonPeephole() : MachineFunctionPass(ID) { }
+    HexagonPeephole() : MachineFunctionPass(ID) {
+      initializeHexagonPeepholePass(*PassRegistry::getPassRegistry());
+    }
 
     bool runOnMachineFunction(MachineFunction &MF);
 
@@ -100,8 +106,10 @@ namespace {
 
 char HexagonPeephole::ID = 0;
 
-bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
+INITIALIZE_PASS(HexagonPeephole, "hexagon-peephole", "Hexagon Peephole",
+                false, false)
 
+bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
   QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().
                                         getInstrInfo());
   QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget().
@@ -142,6 +150,21 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
         }
       }
 
+      // Look for  %vreg170<def> = COMBINE_ir_V4 (0, %vreg169)
+      // %vreg170:DoublRegs, %vreg169:IntRegs
+      if (!DisableOptExtTo64 &&
+          MI->getOpcode () == Hexagon::COMBINE_Ir_V4) {
+        assert (MI->getNumOperands() == 3);
+        MachineOperand &Dst = MI->getOperand(0);
+        MachineOperand &Src1 = MI->getOperand(1);
+        MachineOperand &Src2 = MI->getOperand(2);
+        if (Src1.getImm() != 0)
+          continue;
+        unsigned DstReg = Dst.getReg();
+        unsigned SrcReg = Src2.getReg();
+        PeepholeMap[DstReg] = SrcReg;
+      }
+
       // Look for this sequence below
       // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32
       // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg.
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
index 34bf4ea..44234e8 100644
--- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -21,11 +21,18 @@
 #include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
+
+namespace llvm {
+  void initializeHexagonRemoveExtendArgsPass(PassRegistry&);
+}
+
 namespace {
   struct HexagonRemoveExtendArgs : public FunctionPass {
   public:
     static char ID;
-    HexagonRemoveExtendArgs() : FunctionPass(ID) {}
+    HexagonRemoveExtendArgs() : FunctionPass(ID) {
+      initializeHexagonRemoveExtendArgsPass(*PassRegistry::getPassRegistry());
+    }
     virtual bool runOnFunction(Function &F);
 
     const char *getPassName() const {
@@ -41,11 +48,9 @@ namespace {
 }
 
 char HexagonRemoveExtendArgs::ID = 0;
-RegisterPass<HexagonRemoveExtendArgs> X("reargs",
-                                        "Remove Sign and Zero Extends for Args"
-                                        );
-
 
+INITIALIZE_PASS(HexagonRemoveExtendArgs, "reargs",
+                "Remove Sign and Zero Extends for Args", false, false)
 
 bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
   unsigned Idx = 1;
@@ -78,6 +83,7 @@ bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
 
 
 
-FunctionPass *llvm::createHexagonRemoveExtendOps(HexagonTargetMachine &TM) {
+FunctionPass*
+llvm::createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM) {
   return new HexagonRemoveExtendArgs();
 }
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
index 814249f..8608e08 100644
--- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -49,16 +49,23 @@
 
 using namespace llvm;
 
+namespace llvm {
+  void initializeHexagonSplitTFRCondSetsPass(PassRegistry&);
+}
+
+
 namespace {
 
 class HexagonSplitTFRCondSets : public MachineFunctionPass {
-    HexagonTargetMachine& QTM;
+    const HexagonTargetMachine &QTM;
     const HexagonSubtarget &QST;
 
  public:
     static char ID;
-    HexagonSplitTFRCondSets(HexagonTargetMachine& TM) :
-      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+    HexagonSplitTFRCondSets(const HexagonTargetMachine& TM) :
+      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {
+      initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry());
+    }
 
     const char *getPassName() const {
       return "Hexagon Split TFRCondSets";
@@ -211,6 +218,18 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
 
-FunctionPass *llvm::createHexagonSplitTFRCondSets(HexagonTargetMachine &TM) {
+static void initializePassOnce(PassRegistry &Registry) {
+  const char *Name = "Hexagon Split TFRCondSets";
+  PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr",
+                              &HexagonSplitTFRCondSets::ID, 0, false, false);
+  Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonSplitTFRCondSetsPass(PassRegistry &Registry) {
+  CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+FunctionPass*
+llvm::createHexagonSplitTFRCondSets(const HexagonTargetMachine &TM) {
   return new HexagonSplitTFRCondSets(TM);
 }
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index ce45c62..caa1ba4 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -25,19 +25,17 @@
 
 using namespace llvm;
 
-static cl::
-opt<bool> DisableHardwareLoops(
-                        "disable-hexagon-hwloops", cl::Hidden,
-                        cl::desc("Disable Hardware Loops for Hexagon target"));
+static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
+      cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
 
-static cl::
-opt<bool> DisableHexagonMISched("disable-hexagon-misched",
-                                cl::Hidden, cl::ZeroOrMore, cl::init(false),
-                                cl::desc("Disable Hexagon MI Scheduling"));
+static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
+      cl::Hidden, cl::ZeroOrMore, cl::init(false),
+      cl::desc("Disable Hexagon MI Scheduling"));
 
 static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
-    cl::Hidden, cl::ZeroOrMore, cl::init(false),
-    cl::desc("Disable Hexagon CFG Optimization"));
+      cl::Hidden, cl::ZeroOrMore, cl::init(false),
+      cl::desc("Disable Hexagon CFG Optimization"));
+
 
 /// HexagonTargetMachineModule - Note that this is used on hosts that
 /// cannot link in a library unless there are references into the
@@ -126,55 +124,62 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
 }
 
 bool HexagonPassConfig::addInstSelector() {
+  const HexagonTargetMachine &TM = getHexagonTargetMachine();
+  bool NoOpt = (getOptLevel() == CodeGenOpt::None);
 
-  if (getOptLevel() != CodeGenOpt::None)
-    addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+  if (!NoOpt)
+    addPass(createHexagonRemoveExtendArgs(TM));
 
-  addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel()));
+  addPass(createHexagonISelDag(TM, getOptLevel()));
 
-  if (getOptLevel() != CodeGenOpt::None)
+  if (!NoOpt) {
     addPass(createHexagonPeephole());
+    printAndVerify("After hexagon peephole pass");
+  }
 
   return false;
 }
 
-
 bool HexagonPassConfig::addPreRegAlloc() {
-  if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
-    addPass(createHexagonHardwareLoops());
+  if (getOptLevel() != CodeGenOpt::None)
+    if (!DisableHardwareLoops)
+      addPass(createHexagonHardwareLoops());
   return false;
 }
 
 bool HexagonPassConfig::addPostRegAlloc() {
-  if (!DisableHexagonCFGOpt && getOptLevel() != CodeGenOpt::None)
-    addPass(createHexagonCFGOptimizer(getHexagonTargetMachine()));
-  return true;
+  const HexagonTargetMachine &TM = getHexagonTargetMachine();
+  if (getOptLevel() != CodeGenOpt::None)
+    if (!DisableHexagonCFGOpt)
+      addPass(createHexagonCFGOptimizer(TM));
+  return false;
 }
 
-
 bool HexagonPassConfig::addPreSched2() {
   if (getOptLevel() != CodeGenOpt::None)
     addPass(&IfConverterID);
-  return true;
+  return false;
 }
 
 bool HexagonPassConfig::addPreEmitPass() {
+  const HexagonTargetMachine &TM = getHexagonTargetMachine();
+  bool NoOpt = (getOptLevel() == CodeGenOpt::None);
 
-  if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None)
-    addPass(createHexagonFixupHwLoops());
-
-  if (getOptLevel() != CodeGenOpt::None)
+  if (!NoOpt)
     addPass(createHexagonNewValueJump());
 
   // Expand Spill code for predicate registers.
-  addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
+  addPass(createHexagonExpandPredSpillCode(TM));
 
   // Split up TFRcondsets into conditional transfers.
-  addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
+  addPass(createHexagonSplitTFRCondSets(TM));
 
   // Create Packets.
-  if (getOptLevel() != CodeGenOpt::None)
+  if (!NoOpt) {
+    if (!DisableHardwareLoops)
+      addPass(createHexagonFixupHwLoops());
     addPass(createHexagonPacketizer());
+  }
 
   return false;
 }
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index c0d86da..39995e1 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -48,19 +48,32 @@
 #include "HexagonMachineFunctionInfo.h"
 
 #include <map>
+#include <vector>
 
 using namespace llvm;
 
+static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
+      cl::ZeroOrMore, cl::Hidden, cl::init(true),
+      cl::desc("Allow non-solo packetization of volatile memory references"));
+
+namespace llvm {
+  void initializeHexagonPacketizerPass(PassRegistry&);
+}
+
+
 namespace {
   class HexagonPacketizer : public MachineFunctionPass {
 
   public:
     static char ID;
-    HexagonPacketizer() : MachineFunctionPass(ID) {}
+    HexagonPacketizer() : MachineFunctionPass(ID) {
+      initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry());
+    }
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<MachineBranchProbabilityInfo>();
       AU.addPreserved<MachineDominatorTree>();
       AU.addRequired<MachineLoopInfo>();
       AU.addPreserved<MachineLoopInfo>();
@@ -96,10 +109,17 @@ namespace {
     // schedule this instruction.
     bool FoundSequentialDependence;
 
+    /// \brief A handle to the branch probability pass.
+   const MachineBranchProbabilityInfo *MBPI;
+
+   // Track MIs with ignored dependece.
+   std::vector<MachineInstr*> IgnoreDepMIs;
+
   public:
     // Ctor.
     HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
-                          MachineDominatorTree &MDT);
+                          MachineDominatorTree &MDT,
+                          const MachineBranchProbabilityInfo *MBPI);
 
     // initPacketizerState - initialize some internal flags.
     void initPacketizerState();
@@ -123,20 +143,20 @@ namespace {
   private:
     bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg);
     bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
-                    MachineBasicBlock::iterator &MII,
-                    const TargetRegisterClass* RC);
+                         MachineBasicBlock::iterator &MII,
+                         const TargetRegisterClass* RC);
     bool CanPromoteToDotNew(MachineInstr* MI, SUnit* PacketSU,
-                    unsigned DepReg,
-                    std::map <MachineInstr*, SUnit*> MIToSUnit,
-                    MachineBasicBlock::iterator &MII,
-                    const TargetRegisterClass* RC);
+                            unsigned DepReg,
+                            std::map <MachineInstr*, SUnit*> MIToSUnit,
+                            MachineBasicBlock::iterator &MII,
+                            const TargetRegisterClass* RC);
     bool CanPromoteToNewValue(MachineInstr* MI, SUnit* PacketSU,
-                    unsigned DepReg,
-                    std::map <MachineInstr*, SUnit*> MIToSUnit,
-                    MachineBasicBlock::iterator &MII);
+                              unsigned DepReg,
+                              std::map <MachineInstr*, SUnit*> MIToSUnit,
+                              MachineBasicBlock::iterator &MII);
     bool CanPromoteToNewValueStore(MachineInstr* MI, MachineInstr* PacketMI,
-                    unsigned DepReg,
-                    std::map <MachineInstr*, SUnit*> MIToSUnit);
+                                   unsigned DepReg,
+                                   std::map <MachineInstr*, SUnit*> MIToSUnit);
     bool DemoteToDotOld(MachineInstr* MI);
     bool ArePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2,
                     std::map <MachineInstr*, SUnit*> MIToSUnit);
@@ -152,19 +172,32 @@ namespace {
   };
 }
 
+INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
+                    false, false)
+
+
 // HexagonPacketizerList Ctor.
 HexagonPacketizerList::HexagonPacketizerList(
-  MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT)
+  MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT,
+  const MachineBranchProbabilityInfo *MBPI)
   : VLIWPacketizerList(MF, MLI, MDT, true){
+  this->MBPI = MBPI;
 }
 
 bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
   const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
-
+  const MachineBranchProbabilityInfo *MBPI =
+    &getAnalysis<MachineBranchProbabilityInfo>();
   // Instantiate the packetizer.
-  HexagonPacketizerList Packetizer(Fn, MLI, MDT);
+  HexagonPacketizerList Packetizer(Fn, MLI, MDT, MBPI);
 
   // DFA state table should not be empty.
   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
@@ -710,8 +743,10 @@ static int GetDotNewOp(const int opc) {
 }
 
 // Return .new predicate version for an instruction
-static int GetDotNewPredOp(const int opc) {
-  switch (opc) {
+static int GetDotNewPredOp(MachineInstr *MI,
+                           const MachineBranchProbabilityInfo *MBPI,
+                           const HexagonInstrInfo *QII) {
+  switch (MI->getOpcode()) {
   default: llvm_unreachable("Unknown .new type");
   // Conditional stores
   // Store byte conditionally
@@ -857,17 +892,15 @@ static int GetDotNewPredOp(const int opc) {
     return Hexagon::STw_GP_cdnNotPt_V4;
 
   // Condtional Jumps
-  case Hexagon::JMP_c:
-    return Hexagon::JMP_cdnPt;
+  case Hexagon::JMP_t:
+  case Hexagon::JMP_f:
+    return QII->getDotNewPredJumpOp(MI, MBPI);
 
-  case Hexagon::JMP_cNot:
-    return Hexagon::JMP_cdnNotPt;
+  case Hexagon::JMPR_t:
+    return Hexagon::JMPR_tnew_tV3;
 
-  case Hexagon::JMPR_cPt:
-    return Hexagon::JMPR_cdnPt_V3;
-
-  case Hexagon::JMPR_cNotPt:
-    return Hexagon::JMPR_cdnNotPt_V3;
+  case Hexagon::JMPR_f:
+    return Hexagon::JMPR_fnew_tV3;
 
   // Conditional Transfers
   case Hexagon::TFR_cPt:
@@ -1261,7 +1294,7 @@ bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI,
 
   int NewOpcode;
   if (RC == &Hexagon::PredRegsRegClass)
-    NewOpcode = GetDotNewPredOp(MI->getOpcode());
+    NewOpcode = GetDotNewPredOp(MI, MBPI, QII);
   else
     NewOpcode = GetDotNewOp(MI->getOpcode());
   MI->setDesc(QII->get(NewOpcode));
@@ -1306,17 +1339,17 @@ static int GetDotOldOp(const int opc) {
   case Hexagon::TFRI_cdnNotPt:
     return Hexagon::TFRI_cNotPt;
 
-  case Hexagon::JMP_cdnPt:
-    return Hexagon::JMP_c;
+  case Hexagon::JMP_tnew_t:
+    return Hexagon::JMP_t;
 
-  case Hexagon::JMP_cdnNotPt:
-    return Hexagon::JMP_cNot;
+  case Hexagon::JMP_fnew_t:
+    return Hexagon::JMP_f;
 
-  case Hexagon::JMPR_cdnPt_V3:
-    return Hexagon::JMPR_cPt;
+  case Hexagon::JMPR_tnew_tV3:
+    return Hexagon::JMPR_t;
 
-  case Hexagon::JMPR_cdnNotPt_V3:
-    return Hexagon::JMPR_cNotPt;
+  case Hexagon::JMPR_fnew_tV3:
+    return Hexagon::JMPR_f;
 
   // Load double word
 
@@ -1912,7 +1945,7 @@ static bool GetPredicateSense(MachineInstr* MI,
   case Hexagon::STrih_imm_cdnPt_V4 :
   case Hexagon::STriw_imm_cPt_V4 :
   case Hexagon::STriw_imm_cdnPt_V4 :
-  case Hexagon::JMP_cdnPt :
+  case Hexagon::JMP_tnew_t :
   case Hexagon::LDrid_cPt :
   case Hexagon::LDrid_cdnPt :
   case Hexagon::LDrid_indexed_cPt :
@@ -2051,7 +2084,7 @@ static bool GetPredicateSense(MachineInstr* MI,
   case Hexagon::STrih_imm_cdnNotPt_V4 :
   case Hexagon::STriw_imm_cNotPt_V4 :
   case Hexagon::STriw_imm_cdnNotPt_V4 :
-  case Hexagon::JMP_cdnNotPt :
+  case Hexagon::JMP_fnew_t :
   case Hexagon::LDrid_cNotPt :
   case Hexagon::LDrid_cdnNotPt :
   case Hexagon::LDrid_indexed_cNotPt :
@@ -2739,9 +2772,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
     // If an instruction feeds new value jump, glue it.
     MachineBasicBlock::iterator NextMII = I;
     ++NextMII;
-    MachineInstr *NextMI = NextMII;
-
-    if (QII->isNewValueJump(NextMI)) {
+    if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) {
+      MachineInstr *NextMI = NextMII;
 
       bool secondRegMatch = false;
       bool maintainNewValueJump = false;
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index c06e8bc..1022ae9 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = AArch64 ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
+subdirectories = AArch64 ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc SystemZ X86 XCore
 
 ; This is a special group whose required libraries are extended (by llvm-build)
 ; with the best execution engine (the native JIT, if available, or the
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index 78ad24d..34e33fd 100644
--- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -237,7 +237,7 @@ SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
           // Use load to get GOT target
           SDValue Ops[] = { Callee, GPReg, Chain };
           SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl,
-                                 MVT::i32, MVT::Other, Ops, 3), 0);
+                                 MVT::i32, MVT::Other, Ops), 0);
           Chain = Load.getValue(1);
 
           // Call target must be on T9
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index f86bc0b..d27cd39 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -724,8 +724,7 @@ let usesCustomInserter=1 in {
     [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$val))]>;
 
   def MEMBARRIER : MBlazePseudo<(outs), (ins),
-    "# memory barrier",
-    [(membarrier (i32 imm), (i32 imm), (i32 imm), (i32 imm), (i32 imm))]>;
+    "# memory barrier", []>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index edfd421..d31efa8 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -188,7 +188,12 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
   
   // If this global has a name, handle it simply.
   if (GV->hasName()) {
-    getNameWithPrefix(OutName, GV->getName(), PrefixTy);
+    StringRef Name = GV->getName();
+    getNameWithPrefix(OutName, Name, PrefixTy);
+    // No need to do anything else if the global has the special "do not mangle"
+    // flag in the name.
+    if (Name[0] == 1)
+      return;
   } else {
     // Get the ID for the global, assigning a new one if we haven't got one
     // already.
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index c403f21..0795cb9 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -63,7 +63,6 @@ class MipsAsmParser : public MCTargetAsmParser {
   MCAsmParser &Parser;
   MipsAssemblerOptions Options;
 
-
 #define GET_ASSEMBLER_HEADER
 #include "MipsGenAsmMatcher.inc"
 
@@ -127,9 +126,12 @@ class MipsAsmParser : public MCTargetAsmParser {
                      bool isLoad,bool isImmOpnd);
   bool reportParseError(StringRef ErrorMsg);
 
-  bool parseMemOffset(const MCExpr *&Res);
+  bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
   bool parseRelocOperand(const MCExpr *&Res);
 
+  const MCExpr* evaluateRelocExpr(const MCExpr *Expr, StringRef RelocStr);
+
+  bool isEvaluated(const MCExpr *Expr);
   bool parseDirectiveSet();
 
   bool parseSetAtDirective();
@@ -171,7 +173,7 @@ class MipsAsmParser : public MCTargetAsmParser {
 
   bool requestsDoubleOperand(StringRef Mnemonic);
 
-  unsigned getReg(int RC,int RegNo);
+  unsigned getReg(int RC, int RegNo);
 
   int getATReg();
 
@@ -269,7 +271,7 @@ public:
   void addImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCExpr *Expr = getImm();
-    addExpr(Inst,Expr);
+    addExpr(Inst, Expr);
   }
 
   void addMemOperands(MCInst &Inst, unsigned N) const {
@@ -278,7 +280,7 @@ public:
     Inst.addOperand(MCOperand::CreateReg(getMemBase()));
 
     const MCExpr *Expr = getMemOff();
-    addExpr(Inst,Expr);
+    addExpr(Inst, Expr);
   }
 
   bool isReg() const { return Kind == k_Register; }
@@ -391,15 +393,19 @@ public:
   }
 
   /// getStartLoc - Get the location of the first token of this operand.
-  SMLoc getStartLoc() const { return StartLoc; }
+  SMLoc getStartLoc() const {
+    return StartLoc;
+  }
   /// getEndLoc - Get the location of the last token of this operand.
-  SMLoc getEndLoc() const { return EndLoc; }
+  SMLoc getEndLoc() const {
+    return EndLoc;
+  }
 
   virtual void print(raw_ostream &OS) const {
     llvm_unreachable("unimplemented!");
   }
-};
-}
+}; // class MipsOperand
+}  // namespace
 
 namespace llvm {
 extern const MCInstrDesc MipsInsts[];
@@ -409,39 +415,55 @@ static const MCInstrDesc &getInstDesc(unsigned Opcode) {
 }
 
 bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
-                        SmallVectorImpl<MCInst> &Instructions) {
+                                       SmallVectorImpl<MCInst> &Instructions) {
   const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
   Inst.setLoc(IDLoc);
+  if (MCID.hasDelaySlot() && Options.isReorder()) {
+    // If this instruction has a delay slot and .set reorder is active,
+    // emit a NOP after it.
+    Instructions.push_back(Inst);
+    MCInst NopInst;
+    NopInst.setOpcode(Mips::SLL);
+    NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+    NopInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+    NopInst.addOperand(MCOperand::CreateImm(0));
+    Instructions.push_back(NopInst);
+    return false;
+  }
+
   if (MCID.mayLoad() || MCID.mayStore()) {
     // Check the offset of memory operand, if it is a symbol
-    // reference or immediate we may have to expand instructions
-    for (unsigned i=0;i<MCID.getNumOperands();i++) {
+    // reference or immediate we may have to expand instructions.
+    for (unsigned i = 0; i < MCID.getNumOperands(); i++) {
       const MCOperandInfo &OpInfo = MCID.OpInfo[i];
-      if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) ||
-          (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
+      if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY)
+          || (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) {
         MCOperand &Op = Inst.getOperand(i);
         if (Op.isImm()) {
           int MemOffset = Op.getImm();
           if (MemOffset < -32768 || MemOffset > 32767) {
-            // Offset can't exceed 16bit value
-            expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true);
+            // Offset can't exceed 16bit value.
+            expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), true);
             return false;
           }
         } else if (Op.isExpr()) {
           const MCExpr *Expr = Op.getExpr();
-          if (Expr->getKind() == MCExpr::SymbolRef){
+          if (Expr->getKind() == MCExpr::SymbolRef) {
             const MCSymbolRefExpr *SR =
-                    static_cast<const MCSymbolRefExpr*>(Expr);
+                static_cast<const MCSymbolRefExpr*>(Expr);
             if (SR->getKind() == MCSymbolRefExpr::VK_None) {
-              // Expand symbol
-              expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false);
+              // Expand symbol.
+              expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), false);
               return false;
             }
+          } else if (!isEvaluated(Expr)) {
+            expandMemInst(Inst, IDLoc, Instructions, MCID.mayLoad(), false);
+            return false;
           }
         }
       }
-    }
-  }
+    } // for
+  } // if load/store
 
   if (needsExpansion(Inst))
     expandInstruction(Inst, IDLoc, Instructions);
@@ -453,30 +475,30 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
 
 bool MipsAsmParser::needsExpansion(MCInst &Inst) {
 
-  switch(Inst.getOpcode()) {
-    case Mips::LoadImm32Reg:
-    case Mips::LoadAddr32Imm:
-    case Mips::LoadAddr32Reg:
-      return true;
-    default:
-      return false;
+  switch (Inst.getOpcode()) {
+  case Mips::LoadImm32Reg:
+  case Mips::LoadAddr32Imm:
+  case Mips::LoadAddr32Reg:
+    return true;
+  default:
+    return false;
   }
 }
 
 void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
-                        SmallVectorImpl<MCInst> &Instructions){
-  switch(Inst.getOpcode()) {
-    case Mips::LoadImm32Reg:
-      return expandLoadImm(Inst, IDLoc, Instructions);
-    case Mips::LoadAddr32Imm:
-      return expandLoadAddressImm(Inst,IDLoc,Instructions);
-    case Mips::LoadAddr32Reg:
-      return expandLoadAddressReg(Inst,IDLoc,Instructions);
-    }
+                                       SmallVectorImpl<MCInst> &Instructions) {
+  switch (Inst.getOpcode()) {
+  case Mips::LoadImm32Reg:
+    return expandLoadImm(Inst, IDLoc, Instructions);
+  case Mips::LoadAddr32Imm:
+    return expandLoadAddressImm(Inst, IDLoc, Instructions);
+  case Mips::LoadAddr32Reg:
+    return expandLoadAddressReg(Inst, IDLoc, Instructions);
+  }
 }
 
 void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
-                                  SmallVectorImpl<MCInst> &Instructions){
+                                  SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
   const MCOperand &ImmOp = Inst.getOperand(1);
   assert(ImmOp.isImm() && "expected immediate operand kind");
@@ -485,26 +507,24 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
 
   int ImmValue = ImmOp.getImm();
   tmpInst.setLoc(IDLoc);
-  if ( 0 <= ImmValue && ImmValue <= 65535) {
-    // for 0 <= j <= 65535.
+  if (0 <= ImmValue && ImmValue <= 65535) {
+    // For 0 <= j <= 65535.
     // li d,j => ori d,$zero,j
     tmpInst.setOpcode(Mips::ORi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(
-              MCOperand::CreateReg(Mips::ZERO));
+    tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
-  } else if ( ImmValue < 0 && ImmValue >= -32768) {
-    // for -32768 <= j < 0.
+  } else if (ImmValue < 0 && ImmValue >= -32768) {
+    // For -32768 <= j < 0.
     // li d,j => addiu d,$zero,j
     tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(
-              MCOperand::CreateReg(Mips::ZERO));
+    tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else {
-    // for any other value of j that is representable as a 32-bit integer.
+    // For any other value of j that is representable as a 32-bit integer.
     // li d,j => lui d,hi16(j)
     //           ori d,d,lo16(j)
     tmpInst.setOpcode(Mips::LUi);
@@ -522,7 +542,7 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
 }
 
 void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
-                                         SmallVectorImpl<MCInst> &Instructions){
+                                       SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
   const MCOperand &ImmOp = Inst.getOperand(2);
   assert(ImmOp.isImm() && "expected immediate operand kind");
@@ -531,19 +551,19 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
   const MCOperand &DstRegOp = Inst.getOperand(0);
   assert(DstRegOp.isReg() && "expected register operand kind");
   int ImmValue = ImmOp.getImm();
-  if ( -32768 <= ImmValue && ImmValue <= 65535) {
-    //for -32768 <= j <= 65535.
-    //la d,j(s) => addiu d,s,j
+  if (-32768 <= ImmValue && ImmValue <= 65535) {
+    // For -32768 <= j <= 65535.
+    // la d,j(s) => addiu d,s,j
     tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else {
-    //for any other value of j that is representable as a 32-bit integer.
-    //la d,j(s) => lui d,hi16(j)
-    //             ori d,d,lo16(j)
-    //             addu d,d,s
+    // For any other value of j that is representable as a 32-bit integer.
+    // la d,j(s) => lui d,hi16(j)
+    //              ori d,d,lo16(j)
+    //              addu d,d,s
     tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(DstRegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
@@ -564,26 +584,25 @@ void MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
 }
 
 void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
-                                         SmallVectorImpl<MCInst> &Instructions){
+                                       SmallVectorImpl<MCInst> &Instructions) {
   MCInst tmpInst;
   const MCOperand &ImmOp = Inst.getOperand(1);
   assert(ImmOp.isImm() && "expected immediate operand kind");
   const MCOperand &RegOp = Inst.getOperand(0);
   assert(RegOp.isReg() && "expected register operand kind");
   int ImmValue = ImmOp.getImm();
-  if ( -32768 <= ImmValue && ImmValue <= 65535) {
-    //for -32768 <= j <= 65535.
-    //la d,j => addiu d,$zero,j
+  if (-32768 <= ImmValue && ImmValue <= 65535) {
+    // For -32768 <= j <= 65535.
+    // la d,j => addiu d,$zero,j
     tmpInst.setOpcode(Mips::ADDiu);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
-    tmpInst.addOperand(
-              MCOperand::CreateReg(Mips::ZERO));
+    tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
     tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
     Instructions.push_back(tmpInst);
   } else {
-    //for any other value of j that is representable as a 32-bit integer.
-    //la d,j => lui d,hi16(j)
-    //          ori d,d,lo16(j)
+    // For any other value of j that is representable as a 32-bit integer.
+    // la d,j => lui d,hi16(j)
+    //           ori d,d,lo16(j)
     tmpInst.setOpcode(Mips::LUi);
     tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
     tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
@@ -598,40 +617,37 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
 }
 
 void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
-                     SmallVectorImpl<MCInst> &Instructions,
-                     bool isLoad,bool isImmOpnd) {
+          SmallVectorImpl<MCInst> &Instructions, bool isLoad, bool isImmOpnd) {
   const MCSymbolRefExpr *SR;
   MCInst TempInst;
-  unsigned ImmOffset,HiOffset,LoOffset;
+  unsigned ImmOffset, HiOffset, LoOffset;
   const MCExpr *ExprOffset;
   unsigned TmpRegNum;
-  unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID:
-                                            Mips::CPURegsRegClassID,
-                                            getATReg());
-  // 1st operand is either source or dst register
+  unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID
+                             : Mips::CPURegsRegClassID, getATReg());
+  // 1st operand is either the source or destination register.
   assert(Inst.getOperand(0).isReg() && "expected register operand kind");
   unsigned RegOpNum = Inst.getOperand(0).getReg();
-  // 2nd operand is base register
+  // 2nd operand is the base register.
   assert(Inst.getOperand(1).isReg() && "expected register operand kind");
   unsigned BaseRegNum = Inst.getOperand(1).getReg();
-  // 3rd operand is either immediate or expression
+  // 3rd operand is either an immediate or expression.
   if (isImmOpnd) {
     assert(Inst.getOperand(2).isImm() && "expected immediate operand kind");
     ImmOffset = Inst.getOperand(2).getImm();
     LoOffset = ImmOffset & 0x0000ffff;
     HiOffset = (ImmOffset & 0xffff0000) >> 16;
-    // If msb of LoOffset is 1(negative number) we must increment HiOffset
+    // If msb of LoOffset is 1(negative number) we must increment HiOffset.
     if (LoOffset & 0x8000)
       HiOffset++;
-  }
-  else
+  } else
     ExprOffset = Inst.getOperand(2).getExpr();
-  // All instructions will have the same location
+  // All instructions will have the same location.
   TempInst.setLoc(IDLoc);
   // 1st instruction in expansion is LUi. For load instruction we can use
   // the dst register as a temporary if base and dst are different,
-  // but for stores we must use $at
-  TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum;
+  // but for stores we must use $at.
+  TmpRegNum = (isLoad && (BaseRegNum != RegOpNum)) ? RegOpNum : AtRegNum;
   TempInst.setOpcode(Mips::LUi);
   TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
   if (isImmOpnd)
@@ -639,26 +655,28 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
   else {
     if (ExprOffset->getKind() == MCExpr::SymbolRef) {
       SR = static_cast<const MCSymbolRefExpr*>(ExprOffset);
-      const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::
-                                        Create(SR->getSymbol().getName(),
-                                        MCSymbolRefExpr::VK_Mips_ABS_HI,
-                                        getContext());
+      const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create(
+          SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_HI,
+          getContext());
+      TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
+    } else {
+      const MCExpr *HiExpr = evaluateRelocExpr(ExprOffset, "hi");
       TempInst.addOperand(MCOperand::CreateExpr(HiExpr));
     }
   }
-  // Add the instruction to the list
+  // Add the instruction to the list.
   Instructions.push_back(TempInst);
-  // and prepare TempInst for next instruction
+  // Prepare TempInst for next instruction.
   TempInst.clear();
-  // which is add temp register to base
+  // Add temp register to base.
   TempInst.setOpcode(Mips::ADDu);
   TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
   TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
   TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
   Instructions.push_back(TempInst);
   TempInst.clear();
-  // and finaly, create original instruction with low part
-  // of offset and new base
+  // And finaly, create original instruction with low part
+  // of offset and new base.
   TempInst.setOpcode(Inst.getOpcode());
   TempInst.addOperand(MCOperand::CreateReg(RegOpNum));
   TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
@@ -666,10 +684,12 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
     TempInst.addOperand(MCOperand::CreateImm(LoOffset));
   else {
     if (ExprOffset->getKind() == MCExpr::SymbolRef) {
-      const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::
-                                      Create(SR->getSymbol().getName(),
-                                      MCSymbolRefExpr::VK_Mips_ABS_LO,
-                                      getContext());
+      const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create(
+          SR->getSymbol().getName(), MCSymbolRefExpr::VK_Mips_ABS_LO,
+          getContext());
+      TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
+    } else {
+      const MCExpr *LoExpr = evaluateRelocExpr(ExprOffset, "lo");
       TempInst.addOperand(MCOperand::CreateExpr(LoExpr));
     }
   }
@@ -688,11 +708,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                               MatchingInlineAsm);
 
   switch (MatchResult) {
-  default: break;
+  default:
+    break;
   case Match_Success: {
-    if (processInstruction(Inst,IDLoc,Instructions))
+    if (processInstruction(Inst, IDLoc, Instructions))
       return true;
-    for(unsigned i =0; i < Instructions.size(); i++)
+    for (unsigned i = 0; i < Instructions.size(); i++)
       Out.EmitInstruction(Instructions[i]);
     return false;
   }
@@ -705,8 +726,9 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       if (ErrorInfo >= Operands.size())
         return Error(IDLoc, "too few operands for instruction");
 
-      ErrorLoc = ((MipsOperand*)Operands[ErrorInfo])->getStartLoc();
-      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+      ErrorLoc = ((MipsOperand*) Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc())
+        ErrorLoc = IDLoc;
     }
 
     return Error(ErrorLoc, "invalid operand for instruction");
@@ -757,10 +779,10 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
     .Case("t9",  25)
     .Default(-1);
 
-  // Although SGI documentation just cut out t0-t3 for n32/n64,
+  // Although SGI documentation just cuts out t0-t3 for n32/n64,
   // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
   // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
-  if (isMips64() && 8 <= CC  && CC <= 11)
+  if (isMips64() && 8 <= CC && CC <= 11)
     CC += 4;
 
   if (CC == -1 && isMips64())
@@ -776,19 +798,23 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
 
   return CC;
 }
+
 int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
 
+  if (Name.equals("fcc0"))
+    return Mips::FCC0;
+
   int CC;
   CC = matchCPURegisterName(Name);
   if (CC != -1)
-    return matchRegisterByNumber(CC,is64BitReg?Mips::CPU64RegsRegClassID:
-                               Mips::CPURegsRegClassID);
+    return matchRegisterByNumber(CC, is64BitReg ? Mips::CPU64RegsRegClassID
+                                                : Mips::CPURegsRegClassID);
 
   if (Name[0] == 'f') {
     StringRef NumString = Name.substr(1);
     unsigned IntVal;
-    if( NumString.getAsInteger(10, IntVal))
-      return -1; // not integer
+    if (NumString.getAsInteger(10, IntVal))
+      return -1; // This is not an integer.
     if (IntVal > 31)
       return -1;
 
@@ -797,18 +823,19 @@ int MipsAsmParser::matchRegisterName(StringRef Name, bool is64BitReg) {
     if (Format == FP_FORMAT_S || Format == FP_FORMAT_W)
       return getReg(Mips::FGR32RegClassID, IntVal);
     if (Format == FP_FORMAT_D) {
-      if(isFP64()) {
+      if (isFP64()) {
         return getReg(Mips::FGR64RegClassID, IntVal);
       }
-      // only even numbers available as register pairs
-      if (( IntVal > 31) || (IntVal%2 !=  0))
+      // Only even numbers available as register pairs.
+      if ((IntVal > 31) || (IntVal % 2 != 0))
         return -1;
-      return getReg(Mips::AFGR64RegClassID, IntVal/2);
+      return getReg(Mips::AFGR64RegClassID, IntVal / 2);
     }
   }
 
   return -1;
 }
+
 void MipsAsmParser::setDefaultFpFormat() {
 
   if (isMips64() || isFP64())
@@ -828,6 +855,7 @@ bool MipsAsmParser::requestsDoubleOperand(StringRef Mnemonic){
 
   return IsDouble;
 }
+
 void MipsAsmParser::setFpFormat(StringRef Format) {
 
   FpFormat = StringSwitch<FpFormatTy>(Format.lower())
@@ -850,7 +878,7 @@ int MipsAsmParser::getATReg() {
   return Options.getATRegNum();
 }
 
-unsigned MipsAsmParser::getReg(int RC,int RegNo) {
+unsigned MipsAsmParser::getReg(int RC, int RegNo) {
   return *(getContext().getRegisterInfo().getRegClass(RC).begin() + RegNo);
 }
 
@@ -871,14 +899,12 @@ int MipsAsmParser::tryParseRegister(bool is64BitReg) {
     RegNum = matchRegisterName(lowerCase, is64BitReg);
   } else if (Tok.is(AsmToken::Integer))
     RegNum = matchRegisterByNumber(static_cast<unsigned>(Tok.getIntVal()),
-                                   is64BitReg ? Mips::CPU64RegsRegClassID
-                                              : Mips::CPURegsRegClassID);
+        is64BitReg ? Mips::CPU64RegsRegClassID : Mips::CPURegsRegClassID);
   return RegNum;
 }
 
-bool MipsAsmParser::
-  tryParseRegisterOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                          bool is64BitReg){
+bool MipsAsmParser::tryParseRegisterOperand(
+             SmallVectorImpl<MCParsedAsmOperand*> &Operands, bool is64BitReg) {
 
   SMLoc S = Parser.getTok().getLoc();
   int RegNo = -1;
@@ -888,7 +914,7 @@ bool MipsAsmParser::
     return true;
 
   Operands.push_back(MipsOperand::CreateReg(RegNo, S,
-      Parser.getTok().getLoc()));
+                                            Parser.getTok().getLoc()));
   Parser.Lex(); // Eat register token.
   return false;
 }
@@ -911,19 +937,19 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
     Error(Parser.getTok().getLoc(), "unexpected token in operand");
     return true;
   case AsmToken::Dollar: {
-    // parse register
+    // Parse the register.
     SMLoc S = Parser.getTok().getLoc();
     Parser.Lex(); // Eat dollar token.
-    // parse register operand
+    // Parse the register operand.
     if (!tryParseRegisterOperand(Operands, isMips64())) {
       if (getLexer().is(AsmToken::LParen)) {
-        // check if it is indexed addressing operand
+        // Check if it is indexed addressing operand.
         Operands.push_back(MipsOperand::CreateToken("(", S));
-        Parser.Lex(); // eat parenthesis
+        Parser.Lex(); // Eat the parenthesis.
         if (getLexer().isNot(AsmToken::Dollar))
           return true;
 
-        Parser.Lex(); // eat dollar
+        Parser.Lex(); // Eat the dollar
         if (tryParseRegisterOperand(Operands, isMips64()))
           return true;
 
@@ -936,7 +962,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
       }
       return false;
     }
-    // maybe it is a symbol reference
+    // Maybe it is a symbol reference.
     StringRef Identifier;
     if (Parser.parseIdentifier(Identifier))
       return true;
@@ -945,7 +971,7 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
 
     MCSymbol *Sym = getContext().GetOrCreateSymbol("$" + Identifier);
 
-    // Otherwise create a symbol ref.
+    // Otherwise create a symbol reference.
     const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
                                                 getContext());
 
@@ -954,16 +980,16 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
   }
   case AsmToken::Identifier:
     // Look for the existing symbol, we should check if
-    // we need to assigne the propper RegisterKind
-   if (searchSymbolAlias(Operands,MipsOperand::Kind_None))
-     return false;
-    //else drop to expression parsing
+    // we need to assigne the propper RegisterKind.
+    if (searchSymbolAlias(Operands, MipsOperand::Kind_None))
+      return false;
+    // Else drop to expression parsing.
   case AsmToken::LParen:
   case AsmToken::Minus:
   case AsmToken::Plus:
   case AsmToken::Integer:
   case AsmToken::String: {
-     // quoted label names
+    // Quoted label names.
     const MCExpr *IdVal;
     SMLoc S = Parser.getTok().getLoc();
     if (getParser().parseExpression(IdVal))
@@ -973,9 +999,9 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
     return false;
   }
   case AsmToken::Percent: {
-    // it is a symbol reference or constant expression
+    // It is a symbol reference or constant expression.
     const MCExpr *IdVal;
-    SMLoc S = Parser.getTok().getLoc(); // start location of the operand
+    SMLoc S = Parser.getTok().getLoc(); // Start location of the operand.
     if (parseRelocOperand(IdVal))
       return true;
 
@@ -988,131 +1014,200 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands,
   return true;
 }
 
-bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
+const MCExpr* MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
+                                               StringRef RelocStr) {
+  const MCExpr *Res;
+  // Check the type of the expression.
+  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Expr)) {
+    // It's a constant, evaluate lo or hi value.
+    if (RelocStr == "lo") {
+      short Val = MCE->getValue();
+      Res = MCConstantExpr::Create(Val, getContext());
+    } else if (RelocStr == "hi") {
+      int Val = MCE->getValue();
+      int LoSign = Val & 0x8000;
+      Val = (Val & 0xffff0000) >> 16;
+      // Lower part is treated as a signed int, so if it is negative
+      // we must add 1 to the hi part to compensate.
+      if (LoSign)
+        Val++;
+      Res = MCConstantExpr::Create(Val, getContext());
+    } else {
+      llvm_unreachable("Invalid RelocStr value");
+    }
+    return Res;
+  }
+
+  if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(Expr)) {
+    // It's a symbol, create a symbolic expression from the symbol.
+    StringRef Symbol = MSRE->getSymbol().getName();
+    MCSymbolRefExpr::VariantKind VK = getVariantKind(RelocStr);
+    Res = MCSymbolRefExpr::Create(Symbol, VK, getContext());
+    return Res;
+  }
+
+  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+    const MCExpr *LExp = evaluateRelocExpr(BE->getLHS(), RelocStr);
+    const MCExpr *RExp = evaluateRelocExpr(BE->getRHS(), RelocStr);
+    Res = MCBinaryExpr::Create(BE->getOpcode(), LExp, RExp, getContext());
+    return Res;
+  }
 
-  Parser.Lex(); // eat % token
-  const AsmToken &Tok = Parser.getTok(); // get next token, operation
+  if (const MCUnaryExpr *UN = dyn_cast<MCUnaryExpr>(Expr)) {
+    const MCExpr *UnExp = evaluateRelocExpr(UN->getSubExpr(), RelocStr);
+    Res = MCUnaryExpr::Create(UN->getOpcode(), UnExp, getContext());
+    return Res;
+  }
+  // Just return the original expression.
+  return Expr;
+}
+
+bool MipsAsmParser::isEvaluated(const MCExpr *Expr) {
+
+  switch (Expr->getKind()) {
+  case MCExpr::Constant:
+    return true;
+  case MCExpr::SymbolRef:
+    return (cast<MCSymbolRefExpr>(Expr)->getKind() != MCSymbolRefExpr::VK_None);
+  case MCExpr::Binary:
+    if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+      if (!isEvaluated(BE->getLHS()))
+        return false;
+      return isEvaluated(BE->getRHS());
+    }
+  case MCExpr::Unary:
+    return isEvaluated(cast<MCUnaryExpr>(Expr)->getSubExpr());
+  default:
+    return false;
+  }
+  return false;
+}
+
+bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
+  Parser.Lex(); // Eat the % token.
+  const AsmToken &Tok = Parser.getTok(); // Get next token, operation.
   if (Tok.isNot(AsmToken::Identifier))
     return true;
 
   std::string Str = Tok.getIdentifier().str();
 
-  Parser.Lex(); // eat identifier
-  // now make expression from the rest of the operand
+  Parser.Lex(); // Eat the identifier.
+  // Now make an expression from the rest of the operand.
   const MCExpr *IdVal;
   SMLoc EndLoc;
 
   if (getLexer().getKind() == AsmToken::LParen) {
     while (1) {
-      Parser.Lex(); // eat '(' token
+      Parser.Lex(); // Eat the '(' token.
       if (getLexer().getKind() == AsmToken::Percent) {
-        Parser.Lex(); // eat % token
+        Parser.Lex(); // Eat the % token.
         const AsmToken &nextTok = Parser.getTok();
         if (nextTok.isNot(AsmToken::Identifier))
           return true;
         Str += "(%";
         Str += nextTok.getIdentifier();
-        Parser.Lex(); // eat identifier
+        Parser.Lex(); // Eat the identifier.
         if (getLexer().getKind() != AsmToken::LParen)
           return true;
       } else
         break;
     }
-    if (getParser().parseParenExpression(IdVal,EndLoc))
+    if (getParser().parseParenExpression(IdVal, EndLoc))
       return true;
 
     while (getLexer().getKind() == AsmToken::RParen)
-      Parser.Lex(); // eat ')' token
+      Parser.Lex(); // Eat the ')' token.
 
   } else
-    return true; // parenthesis must follow reloc operand
-
-  // Check the type of the expression
-  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) {
-    // It's a constant, evaluate lo or hi value
-    if (Str == "lo") {
-      short Val = MCE->getValue();
-      Res = MCConstantExpr::Create(Val, getContext());
-    } else if (Str == "hi") {
-      int Val = MCE->getValue();
-      int LoSign = Val & 0x8000;
-      Val = (Val & 0xffff0000) >> 16;
-      // Lower part is treated as a signed int, so if it is negative
-      // we must add 1 to the hi part to compensate
-      if (LoSign)
-        Val++;
-      Res = MCConstantExpr::Create(Val, getContext());
-    }
-    return false;
-  }
+    return true; // Parenthesis must follow the relocation operand.
 
-  if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) {
-    // It's a symbol, create symbolic expression from symbol
-    StringRef Symbol = MSRE->getSymbol().getName();
-    MCSymbolRefExpr::VariantKind VK = getVariantKind(Str);
-    Res = MCSymbolRefExpr::Create(Symbol,VK,getContext());
-    return false;
-  }
-  return true;
+  Res = evaluateRelocExpr(IdVal, Str);
+  return false;
 }
 
 bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
                                   SMLoc &EndLoc) {
-
   StartLoc = Parser.getTok().getLoc();
   RegNo = tryParseRegister(isMips64());
   EndLoc = Parser.getTok().getLoc();
-  return (RegNo == (unsigned)-1);
+  return (RegNo == (unsigned) -1);
 }
 
-bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) {
-
+bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
   SMLoc S;
+  bool Result = true;
 
-  switch(getLexer().getKind()) {
+  while (getLexer().getKind() == AsmToken::LParen)
+    Parser.Lex();
+
+  switch (getLexer().getKind()) {
   default:
     return true;
   case AsmToken::Identifier:
+  case AsmToken::LParen:
   case AsmToken::Integer:
   case AsmToken::Minus:
   case AsmToken::Plus:
-    return (getParser().parseExpression(Res));
+    if (isParenExpr)
+      Result = getParser().parseParenExpression(Res, S);
+    else
+      Result = (getParser().parseExpression(Res));
+    while (getLexer().getKind() == AsmToken::RParen)
+      Parser.Lex();
+    break;
   case AsmToken::Percent:
-    return parseRelocOperand(Res);
-  case AsmToken::LParen:
-    return false;  // it's probably assuming 0
+    Result = parseRelocOperand(Res);
   }
-  return true;
+  return Result;
 }
 
 MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
-               SmallVectorImpl<MCParsedAsmOperand*>&Operands) {
+                               SmallVectorImpl<MCParsedAsmOperand*>&Operands) {
 
   const MCExpr *IdVal = 0;
   SMLoc S;
-  // first operand is the offset
+  bool isParenExpr = false;
+  // First operand is the offset.
   S = Parser.getTok().getLoc();
 
-  if (parseMemOffset(IdVal))
-    return MatchOperand_ParseFail;
+  if (getLexer().getKind() == AsmToken::LParen) {
+    Parser.Lex();
+    isParenExpr = true;
+  }
 
-  const AsmToken &Tok = Parser.getTok(); // get next token
-  if (Tok.isNot(AsmToken::LParen)) {
-    MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
-    if (Mnemonic->getToken() == "la") {
-      SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() -1);
-      Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
-      return MatchOperand_Success;
+  if (getLexer().getKind() != AsmToken::Dollar) {
+    if (parseMemOffset(IdVal, isParenExpr))
+      return MatchOperand_ParseFail;
+
+    const AsmToken &Tok = Parser.getTok(); // Get the next token.
+    if (Tok.isNot(AsmToken::LParen)) {
+      MipsOperand *Mnemonic = static_cast<MipsOperand*>(Operands[0]);
+      if (Mnemonic->getToken() == "la") {
+        SMLoc E = SMLoc::getFromPointer(
+            Parser.getTok().getLoc().getPointer() - 1);
+        Operands.push_back(MipsOperand::CreateImm(IdVal, S, E));
+        return MatchOperand_Success;
+      }
+      if (Tok.is(AsmToken::EndOfStatement)) {
+        SMLoc E = SMLoc::getFromPointer(
+            Parser.getTok().getLoc().getPointer() - 1);
+
+        // Zero register assumed, add a memory operand with ZERO as its base.
+        Operands.push_back(MipsOperand::CreateMem(isMips64() ? Mips::ZERO_64
+                                                             : Mips::ZERO,
+                           IdVal, S, E));
+        return MatchOperand_Success;
+      }
+      Error(Parser.getTok().getLoc(), "'(' expected");
+      return MatchOperand_ParseFail;
     }
-    Error(Parser.getTok().getLoc(), "'(' expected");
-    return MatchOperand_ParseFail;
-  }
 
-  Parser.Lex(); // Eat '(' token.
+    Parser.Lex(); // Eat the '(' token.
+  }
 
-  const AsmToken &Tok1 = Parser.getTok(); // get next token
+  const AsmToken &Tok1 = Parser.getTok(); // Get next token
   if (Tok1.is(AsmToken::Dollar)) {
-    Parser.Lex(); // Eat '$' token.
+    Parser.Lex(); // Eat the '$' token.
     if (tryParseRegisterOperand(Operands, isMips64())) {
       Error(Parser.getTok().getLoc(), "unexpected token in operand");
       return MatchOperand_ParseFail;
@@ -1123,7 +1218,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
     return MatchOperand_ParseFail;
   }
 
-  const AsmToken &Tok2 = Parser.getTok(); // get next token
+  const AsmToken &Tok2 = Parser.getTok(); // Get next token.
   if (Tok2.isNot(AsmToken::RParen)) {
     Error(Parser.getTok().getLoc(), "')' expected");
     return MatchOperand_ParseFail;
@@ -1131,17 +1226,26 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
 
   SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
 
-  Parser.Lex(); // Eat ')' token.
+  Parser.Lex(); // Eat the ')' token.
 
   if (IdVal == 0)
     IdVal = MCConstantExpr::Create(0, getContext());
 
-  // now replace register operand with the mem operand
+  // Replace the register operand with the memory operand.
   MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
   int RegNo = op->getReg();
-  // remove register from operands
+  // Remove the register from the operands.
   Operands.pop_back();
-  // and add memory operand
+  // Add the memory operand.
+  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(IdVal)) {
+    int64_t Imm;
+    if (IdVal->EvaluateAsAbsolute(Imm))
+      IdVal = MCConstantExpr::Create(Imm, getContext());
+    else if (BE->getLHS()->getKind() != MCExpr::SymbolRef)
+      IdVal = MCBinaryExpr::Create(BE->getOpcode(), BE->getRHS(), BE->getLHS(),
+                                   getContext());
+  }
+
   Operands.push_back(MipsOperand::CreateMem(RegNo, IdVal, S, E));
   delete op;
   return MatchOperand_Success;
@@ -1153,17 +1257,17 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   if (!isMips64())
     return MatchOperand_NoMatch;
   if (getLexer().getKind() == AsmToken::Identifier) {
-    if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs))
+    if (searchSymbolAlias(Operands, MipsOperand::Kind_CPU64Regs))
       return MatchOperand_Success;
     return MatchOperand_NoMatch;
   }
-  // if the first token is not '$' we have an error
+  // If the first token is not '$', we have an error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
 
   Parser.Lex(); // Eat $
-  if(!tryParseRegisterOperand(Operands, true)) {
-    // set the proper register kind
+  if (!tryParseRegisterOperand(Operands, true)) {
+    // Set the proper register kind.
     MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
     op->setRegKind(MipsOperand::Kind_CPU64Regs);
     return MatchOperand_Success;
@@ -1171,9 +1275,8 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   return MatchOperand_NoMatch;
 }
 
-bool MipsAsmParser::
-searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                  unsigned RegisterKind) {
+bool MipsAsmParser::searchSymbolAlias(
+    SmallVectorImpl<MCParsedAsmOperand*> &Operands, unsigned RegisterKind) {
 
   MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
   if (Sym) {
@@ -1187,13 +1290,13 @@ searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
       const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
       const StringRef DefSymbol = Ref->getSymbol().getName();
       if (DefSymbol.startswith("$")) {
-        // Lookup for the register with corresponding name
-        int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64());
+        // Lookup for the register with the corresponding name.
+        int RegNum = matchRegisterName(DefSymbol.substr(1), isMips64());
         if (RegNum > -1) {
           Parser.Lex();
-          MipsOperand *op = MipsOperand::CreateReg(RegNum,S,
-                                         Parser.getTok().getLoc());
-          op->setRegKind((MipsOperand::RegisterKind)RegisterKind);
+          MipsOperand *op = MipsOperand::CreateReg(RegNum, S,
+                                                   Parser.getTok().getLoc());
+          op->setRegKind((MipsOperand::RegisterKind) RegisterKind);
           Operands.push_back(op);
           return true;
         }
@@ -1201,29 +1304,30 @@ searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
     } else if (Expr->getKind() == MCExpr::Constant) {
       Parser.Lex();
       const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr);
-      MipsOperand *op = MipsOperand::CreateImm(Const,S,
-                                     Parser.getTok().getLoc());
+      MipsOperand *op = MipsOperand::CreateImm(Const, S,
+          Parser.getTok().getLoc());
       Operands.push_back(op);
       return true;
     }
   }
   return false;
 }
+
 MipsAsmParser::OperandMatchResultTy
 MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
   if (getLexer().getKind() == AsmToken::Identifier) {
-    if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs))
+    if (searchSymbolAlias(Operands, MipsOperand::Kind_CPURegs))
       return MatchOperand_Success;
     return MatchOperand_NoMatch;
   }
-  // if the first token is not '$' we have an error
+  // If the first token is not '$' we have an error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
 
   Parser.Lex(); // Eat $
-  if(!tryParseRegisterOperand(Operands, false)) {
-    // set the propper register kind
+  if (!tryParseRegisterOperand(Operands, false)) {
+    // Set the proper register kind.
     MipsOperand* op = static_cast<MipsOperand*>(Operands.back());
     op->setRegKind(MipsOperand::Kind_CPURegs);
     return MatchOperand_Success;
@@ -1237,87 +1341,88 @@ MipsAsmParser::parseHWRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   if (isMips64())
     return MatchOperand_NoMatch;
 
-  // if the first token is not '$' we have error
+  // If the first token is not '$' we have error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
   SMLoc S = Parser.getTok().getLoc();
-  Parser.Lex(); // Eat $
+  Parser.Lex(); // Eat the '$'.
 
-  const AsmToken &Tok = Parser.getTok(); // get next token
+  const AsmToken &Tok = Parser.getTok(); // Get the next token.
   if (Tok.isNot(AsmToken::Integer))
     return MatchOperand_NoMatch;
 
   unsigned RegNum = Tok.getIntVal();
-  // at the moment only hwreg29 is supported
+  // At the moment only hwreg29 is supported.
   if (RegNum != 29)
     return MatchOperand_ParseFail;
 
   MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29, S,
-        Parser.getTok().getLoc());
+      Parser.getTok().getLoc());
   op->setRegKind(MipsOperand::Kind_HWRegs);
   Operands.push_back(op);
 
-  Parser.Lex(); // Eat reg number
+  Parser.Lex(); // Eat the register number.
   return MatchOperand_Success;
 }
 
 MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseHW64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+MipsAsmParser::parseHW64Regs(
+    SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
   if (!isMips64())
     return MatchOperand_NoMatch;
-    //if the first token is not '$' we have error
+  // If the first token is not '$' we have an error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
   SMLoc S = Parser.getTok().getLoc();
   Parser.Lex(); // Eat $
 
-  const AsmToken &Tok = Parser.getTok(); // get next token
+  const AsmToken &Tok = Parser.getTok(); // Get the next token.
   if (Tok.isNot(AsmToken::Integer))
     return MatchOperand_NoMatch;
 
   unsigned RegNum = Tok.getIntVal();
-  // at the moment only hwreg29 is supported
+  // At the moment only hwreg29 is supported.
   if (RegNum != 29)
     return MatchOperand_ParseFail;
 
   MipsOperand *op = MipsOperand::CreateReg(Mips::HWR29_64, S,
-        Parser.getTok().getLoc());
+                                           Parser.getTok().getLoc());
   op->setRegKind(MipsOperand::Kind_HW64Regs);
   Operands.push_back(op);
 
-  Parser.Lex(); // Eat reg number
+  Parser.Lex(); // Eat the register number.
   return MatchOperand_Success;
 }
 
 MipsAsmParser::OperandMatchResultTy
 MipsAsmParser::parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   unsigned RegNum;
-  //if the first token is not '$' we have error
+  // If the first token is not '$' we have an error.
   if (Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
   SMLoc S = Parser.getTok().getLoc();
-  Parser.Lex(); // Eat $
+  Parser.Lex(); // Eat the '$'
 
-  const AsmToken &Tok = Parser.getTok(); // get next token
+  const AsmToken &Tok = Parser.getTok(); // Get next token.
   if (Tok.is(AsmToken::Integer)) {
     RegNum = Tok.getIntVal();
-    // at the moment only fcc0 is supported
+    // At the moment only fcc0 is supported.
     if (RegNum != 0)
       return MatchOperand_ParseFail;
   } else if (Tok.is(AsmToken::Identifier)) {
-    // at the moment only fcc0 is supported
+    // At the moment only fcc0 is supported.
     if (Tok.getIdentifier() != "fcc0")
       return MatchOperand_ParseFail;
   } else
     return MatchOperand_NoMatch;
 
   MipsOperand *op = MipsOperand::CreateReg(Mips::FCC0, S,
-        Parser.getTok().getLoc());
+                                           Parser.getTok().getLoc());
   op->setRegKind(MipsOperand::Kind_CCRRegs);
   Operands.push_back(op);
 
-  Parser.Lex(); // Eat reg number
+  Parser.Lex(); // Eat the register number.
   return MatchOperand_Success;
 }
 
@@ -1349,23 +1454,23 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
 
 static int ConvertCcString(StringRef CondString) {
   int CC = StringSwitch<unsigned>(CondString)
-      .Case(".f",    0)
-      .Case(".un",   1)
-      .Case(".eq",   2)
-      .Case(".ueq",  3)
-      .Case(".olt",  4)
-      .Case(".ult",  5)
-      .Case(".ole",  6)
-      .Case(".ule",  7)
-      .Case(".sf",   8)
-      .Case(".ngle", 9)
-      .Case(".seq",  10)
-      .Case(".ngl",  11)
-      .Case(".lt",   12)
-      .Case(".nge",  13)
-      .Case(".le",   14)
-      .Case(".ngt",  15)
-      .Default(-1);
+    .Case(".f",    0)
+    .Case(".un",   1)
+    .Case(".eq",   2)
+    .Case(".ueq",  3)
+    .Case(".olt",  4)
+    .Case(".ult",  5)
+    .Case(".ole",  6)
+    .Case(".ule",  7)
+    .Case(".sf",   8)
+    .Case(".ngle", 9)
+    .Case(".seq",  10)
+    .Case(".ngl",  11)
+    .Case(".lt",   12)
+    .Case(".nge",  13)
+    .Case(".le",   14)
+    .Case(".ngt",  15)
+    .Default(-1);
 
   return CC;
 }
@@ -1373,16 +1478,16 @@ static int ConvertCcString(StringRef CondString) {
 bool MipsAsmParser::
 parseMathOperation(StringRef Name, SMLoc NameLoc,
                    SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-  // split the format
+  // Split the format.
   size_t Start = Name.find('.'), Next = Name.rfind('.');
   StringRef Format1 = Name.slice(Start, Next);
-  // and add the first format to the operands
+  // Add the first format to the operands.
   Operands.push_back(MipsOperand::CreateToken(Format1, NameLoc));
-  // now for the second format
+  // Now for the second format.
   StringRef Format2 = Name.slice(Next, StringRef::npos);
   Operands.push_back(MipsOperand::CreateToken(Format2, NameLoc));
 
-  // set the format for the first register
+  // Set the format for the first register.
   setFpFormat(Format1);
 
   // Read the remaining operands.
@@ -1398,11 +1503,10 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
       SMLoc Loc = getLexer().getLoc();
       Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token in argument list");
-
     }
-    Parser.Lex();  // Eat the comma.
+    Parser.Lex(); // Eat the comma.
 
-    //set the format for the first register
+    // Set the format for the first register
     setFpFormat(Format2);
 
     // Parse and remember the operand.
@@ -1419,7 +1523,7 @@ parseMathOperation(StringRef Name, SMLoc NameLoc,
     return Error(Loc, "unexpected token in argument list");
   }
 
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
@@ -1427,13 +1531,12 @@ bool MipsAsmParser::
 ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   StringRef Mnemonic;
-  // floating point instructions: should register be treated as double?
+  // Floating point instructions: Should the register be treated as a double?
   if (requestsDoubleOperand(Name)) {
     setFpFormat(FP_FORMAT_D);
-  Operands.push_back(MipsOperand::CreateToken(Name, NameLoc));
-  Mnemonic = Name;
-  }
-  else {
+    Operands.push_back(MipsOperand::CreateToken(Name, NameLoc));
+    Mnemonic = Name;
+  } else {
     setDefaultFpFormat();
     // Create the leading tokens for the mnemonic, split by '.' characters.
     size_t Start = 0, Next = Name.find('.');
@@ -1442,30 +1545,30 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
     Operands.push_back(MipsOperand::CreateToken(Mnemonic, NameLoc));
 
     if (Next != StringRef::npos) {
-      // there is a format token in mnemonic
-      // StringRef Rest = Name.slice(Next, StringRef::npos);
-      size_t Dot = Name.find('.', Next+1);
+      // There is a format token in mnemonic.
+      size_t Dot = Name.find('.', Next + 1);
       StringRef Format = Name.slice(Next, Dot);
-      if (Dot == StringRef::npos) //only one '.' in a string, it's a format
+      if (Dot == StringRef::npos) // Only one '.' in a string, it's a format.
         Operands.push_back(MipsOperand::CreateToken(Format, NameLoc));
       else {
-        if (Name.startswith("c.")){
-          // floating point compare, add '.' and immediate represent for cc
+        if (Name.startswith("c.")) {
+          // Floating point compare, add '.' and immediate represent for cc.
           Operands.push_back(MipsOperand::CreateToken(".", NameLoc));
           int Cc = ConvertCcString(Format);
           if (Cc == -1) {
             return Error(NameLoc, "Invalid conditional code");
           }
           SMLoc E = SMLoc::getFromPointer(
-              Parser.getTok().getLoc().getPointer() -1 );
-          Operands.push_back(MipsOperand::CreateImm(
-              MCConstantExpr::Create(Cc, getContext()), NameLoc, E));
+              Parser.getTok().getLoc().getPointer() - 1);
+          Operands.push_back(
+              MipsOperand::CreateImm(MCConstantExpr::Create(Cc, getContext()),
+                                     NameLoc, E));
         } else {
           // trunc, ceil, floor ...
           return parseMathOperation(Name, NameLoc, Operands);
         }
 
-        // the rest is a format
+        // The rest is a format.
         Format = Name.slice(Dot, StringRef::npos);
         Operands.push_back(MipsOperand::CreateToken(Format, NameLoc));
       }
@@ -1483,8 +1586,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
       return Error(Loc, "unexpected token in argument list");
     }
 
-    while (getLexer().is(AsmToken::Comma) ) {
-      Parser.Lex();  // Eat the comma.
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex(); // Eat the comma.
 
       // Parse and remember the operand.
       if (ParseOperand(Operands, Name)) {
@@ -1501,48 +1604,47 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
     return Error(Loc, "unexpected token in argument list");
   }
 
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
 bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
-   SMLoc Loc = getLexer().getLoc();
-   Parser.eatToEndOfStatement();
-   return Error(Loc, ErrorMsg);
+  SMLoc Loc = getLexer().getLoc();
+  Parser.eatToEndOfStatement();
+  return Error(Loc, ErrorMsg);
 }
 
 bool MipsAsmParser::parseSetNoAtDirective() {
-  // Line should look like:
-  //  .set noat
-  // set at reg to 0
+  // Line should look like: ".set noat".
+  // set at reg to 0.
   Options.setATReg(0);
   // eat noat
   Parser.Lex();
-  // If this is not the end of the statement, report error
+  // If this is not the end of the statement, report an error.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("unexpected token in statement");
     return false;
   }
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
+
 bool MipsAsmParser::parseSetAtDirective() {
-  // line can be
-  //  .set at - defaults to $1
+  // Line can be .set at - defaults to $1
   // or .set at=$reg
   int AtRegNo;
   getParser().Lex();
   if (getLexer().is(AsmToken::EndOfStatement)) {
     Options.setATReg(1);
-    Parser.Lex(); // Consume the EndOfStatement
+    Parser.Lex(); // Consume the EndOfStatement.
     return false;
   } else if (getLexer().is(AsmToken::Equal)) {
-    getParser().Lex(); // eat '='
+    getParser().Lex(); // Eat the '='.
     if (getLexer().isNot(AsmToken::Dollar)) {
       reportParseError("unexpected token in statement");
       return false;
     }
-    Parser.Lex(); // Eat '$'
+    Parser.Lex(); // Eat the '$'.
     const AsmToken &Reg = Parser.getTok();
     if (Reg.is(AsmToken::Identifier)) {
       AtRegNo = matchCPURegisterName(Reg.getIdentifier());
@@ -1553,7 +1655,7 @@ bool MipsAsmParser::parseSetAtDirective() {
       return false;
     }
 
-    if ( AtRegNo < 1 || AtRegNo > 31) {
+    if (AtRegNo < 1 || AtRegNo > 31) {
       reportParseError("unexpected token in statement");
       return false;
     }
@@ -1562,13 +1664,13 @@ bool MipsAsmParser::parseSetAtDirective() {
       reportParseError("unexpected token in statement");
       return false;
     }
-    getParser().Lex(); // Eat reg
+    getParser().Lex(); // Eat the register.
 
     if (getLexer().isNot(AsmToken::EndOfStatement)) {
       reportParseError("unexpected token in statement");
       return false;
-     }
-    Parser.Lex(); // Consume the EndOfStatement
+    }
+    Parser.Lex(); // Consume the EndOfStatement.
     return false;
   } else {
     reportParseError("unexpected token in statement");
@@ -1578,43 +1680,43 @@ bool MipsAsmParser::parseSetAtDirective() {
 
 bool MipsAsmParser::parseSetReorderDirective() {
   Parser.Lex();
-  // If this is not the end of the statement, report error
+  // If this is not the end of the statement, report an error.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("unexpected token in statement");
     return false;
   }
   Options.setReorder();
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
 bool MipsAsmParser::parseSetNoReorderDirective() {
-    Parser.Lex();
-    // if this is not the end of the statement, report error
-    if (getLexer().isNot(AsmToken::EndOfStatement)) {
-      reportParseError("unexpected token in statement");
-      return false;
-    }
-    Options.setNoreorder();
-    Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex();
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token in statement");
     return false;
+  }
+  Options.setNoreorder();
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
 }
 
 bool MipsAsmParser::parseSetMacroDirective() {
   Parser.Lex();
-  // if this is not the end of the statement, report error
+  // If this is not the end of the statement, report an error.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("unexpected token in statement");
     return false;
   }
   Options.setMacro();
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
 bool MipsAsmParser::parseSetNoMacroDirective() {
   Parser.Lex();
-  // if this is not the end of the statement, report error
+  // If this is not the end of the statement, report an error.
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     reportParseError("`noreorder' must be set before `nomacro'");
     return false;
@@ -1624,7 +1726,7 @@ bool MipsAsmParser::parseSetNoMacroDirective() {
     return false;
   }
   Options.setNomacro();
-  Parser.Lex(); // Consume the EndOfStatement
+  Parser.Lex(); // Consume the EndOfStatement.
   return false;
 }
 
@@ -1637,24 +1739,24 @@ bool MipsAsmParser::parseSetAssignment() {
 
   if (getLexer().isNot(AsmToken::Comma))
     return reportParseError("unexpected token in .set directive");
-  Lex(); //eat comma
+  Lex(); // Eat comma
 
   if (Parser.parseExpression(Value))
     reportParseError("expected valid expression after comma");
 
-  // check if the Name already exists as a symbol
+  // Check if the Name already exists as a symbol.
   MCSymbol *Sym = getContext().LookupSymbol(Name);
-  if (Sym) {
+  if (Sym)
     return reportParseError("symbol already defined");
-  }
   Sym = getContext().GetOrCreateSymbol(Name);
   Sym->setVariableValue(Value);
 
   return false;
 }
+
 bool MipsAsmParser::parseDirectiveSet() {
 
-  // get next token
+  // Get the next token.
   const AsmToken &Tok = Parser.getTok();
 
   if (Tok.getString() == "noat") {
@@ -1670,15 +1772,15 @@ bool MipsAsmParser::parseDirectiveSet() {
   } else if (Tok.getString() == "nomacro") {
     return parseSetNoMacroDirective();
   } else if (Tok.getString() == "nomips16") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   } else if (Tok.getString() == "nomicromips") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   } else {
-    // it is just an identifier, look for assignment
+    // It is just an identifier, look for an assignment.
     parseSetAssignment();
     return false;
   }
@@ -1715,20 +1817,20 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
 
   StringRef IDVal = DirectiveID.getString();
 
-  if ( IDVal == ".ent") {
-    // ignore this directive for now
+  if (IDVal == ".ent") {
+    // Ignore this directive for now.
     Parser.Lex();
     return false;
   }
 
   if (IDVal == ".end") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.Lex();
     return false;
   }
 
   if (IDVal == ".frame") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   }
@@ -1738,19 +1840,19 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
   }
 
   if (IDVal == ".fmask") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   }
 
   if (IDVal == ".mask") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   }
 
   if (IDVal == ".gpword") {
-    // ignore this directive for now
+    // Ignore this directive for now.
     Parser.eatToEndOfStatement();
     return false;
   }
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index cf8bb18..78a9f70 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -32,6 +32,8 @@ add_llvm_target(MipsCodeGen
   MipsLongBranch.cpp
   MipsMCInstLower.cpp
   MipsMachineFunction.cpp
+  MipsModuleISelDAGToDAG.cpp
+  MipsOs16.cpp
   MipsRegisterInfo.cpp
   MipsSEFrameLowering.cpp
   MipsSEInstrInfo.cpp
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 59e49d8..0dba33a 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -143,6 +143,16 @@ static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
                                                  uint64_t Address,
                                                  const void *Decoder);
 
+static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder);
+
+static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder);
+
 static DecodeStatus DecodeBranchTarget(MCInst &Inst,
                                        unsigned Offset,
                                        uint64_t Address,
@@ -496,6 +506,30 @@ static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeHIRegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
+  if (RegNo >= 4)
+    return MCDisassembler::Fail;
+
+  unsigned Reg = getReg(Decoder, Mips::HIRegsDSPRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLORegsDSPRegisterClass(MCInst &Inst,
+                                                 unsigned RegNo,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
+  if (RegNo >= 4)
+    return MCDisassembler::Fail;
+
+  unsigned Reg = getReg(Decoder, Mips::LORegsDSPRegClassID, RegNo);
+  Inst.addOperand(MCOperand::CreateReg(Reg));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeBranchTarget(MCInst &Inst,
                                        unsigned Offset,
                                        uint64_t Address,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index e198a7c..9460731 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -27,6 +27,9 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/raw_ostream.h"
 
+#define GET_INSTRMAP_INFO
+#include "MipsGenInstrInfo.inc"
+
 using namespace llvm;
 
 namespace {
@@ -35,12 +38,13 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
   void operator=(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
   const MCInstrInfo &MCII;
   MCContext &Ctx;
+  const MCSubtargetInfo &STI;
   bool IsLittleEndian;
 
 public:
   MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_,
                     const MCSubtargetInfo &sti, bool IsLittle) :
-    MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
+    MCII(mcii), Ctx(Ctx_), STI (sti), IsLittleEndian(IsLittle) {}
 
   ~MipsMCCodeEmitter() {}
 
@@ -88,6 +92,9 @@ public:
   unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
                               SmallVectorImpl<MCFixup> &Fixups) const;
 
+  unsigned
+  getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const;
+
 }; // class MipsMCCodeEmitter
 }  // namespace
 
@@ -141,6 +148,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) && !Binary)
     llvm_unreachable("unimplemented opcode in EncodeInstruction()");
 
+  if (STI.getFeatureBits() & Mips::FeatureMicroMips) {
+    int NewOpcode = Mips::Std2MicroMips (Opcode, Mips::Arch_micromips);
+    if (NewOpcode != -1) {
+      Opcode = NewOpcode;
+      TmpInst.setOpcode (NewOpcode);
+      Binary = getBinaryCodeForInstr(TmpInst, Fixups);
+    }
+  }
+
   const MCInstrDesc &Desc = MCII.get(TmpInst.getOpcode());
 
   // Get byte count of instruction
@@ -192,35 +208,24 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
   return 0;
 }
 
-/// getMachineOpValue - Return binary encoding of operand. If the machine
-/// operand requires relocation, record the relocation and return zero.
 unsigned MipsMCCodeEmitter::
-getMachineOpValue(const MCInst &MI, const MCOperand &MO,
-                  SmallVectorImpl<MCFixup> &Fixups) const {
-  if (MO.isReg()) {
-    unsigned Reg = MO.getReg();
-    unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg);
-    return RegNo;
-  } else if (MO.isImm()) {
-    return static_cast<unsigned>(MO.getImm());
-  } else if (MO.isFPImm()) {
-    return static_cast<unsigned>(APFloat(MO.getFPImm())
-        .bitcastToAPInt().getHiBits(32).getLimitedValue());
-  }
+getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups) const {
+  int64_t Res;
 
-  // MO must be an Expr.
-  assert(MO.isExpr());
+  if (Expr->EvaluateAsAbsolute(Res))
+    return Res;
 
-  const MCExpr *Expr = MO.getExpr();
   MCExpr::ExprKind Kind = Expr->getKind();
+  if (Kind == MCExpr::Constant) {
+    return cast<MCConstantExpr>(Expr)->getValue();
+  }
 
   if (Kind == MCExpr::Binary) {
-    Expr = static_cast<const MCBinaryExpr*>(Expr)->getLHS();
-    Kind = Expr->getKind();
+    unsigned Res = getExprOpValue(cast<MCBinaryExpr>(Expr)->getLHS(), Fixups);
+    Res += getExprOpValue(cast<MCBinaryExpr>(Expr)->getRHS(), Fixups);
+    return Res;
   }
-
-  assert (Kind == MCExpr::SymbolRef);
-
+  if (Kind == MCExpr::SymbolRef) {
   Mips::Fixups FixupKind = Mips::Fixups(0);
 
   switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
@@ -300,12 +305,32 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
     break;
   } // switch
 
-  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind)));
-
-  // All of the information is in the fixup.
+    Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind)));
+    return 0;
+  }
   return 0;
 }
 
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    unsigned Reg = MO.getReg();
+    unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg);
+    return RegNo;
+  } else if (MO.isImm()) {
+    return static_cast<unsigned>(MO.getImm());
+  } else if (MO.isFPImm()) {
+    return static_cast<unsigned>(APFloat(MO.getFPImm())
+        .bitcastToAPInt().getHiBits(32).getLimitedValue());
+  }
+  // MO must be an Expr.
+  assert(MO.isExpr());
+  return getExprOpValue(MO.getExpr(),Fixups);
+}
+
 /// getMemEncoding - Return binary encoding of memory related operand.
 /// If the offset operand requires relocation, record the relocation.
 unsigned
diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td
new file mode 100644
index 0000000..665b4d2
--- /dev/null
+++ b/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -0,0 +1,112 @@
+class MMArch {
+  string Arch = "micromips";
+  list<dag> Pattern = [];
+}
+
+class ADD_FM_MM<bits<6> op, bits<10> funct> : MMArch {
+  bits<5> rt;
+  bits<5> rs;
+  bits<5> rd;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = rd;
+  let Inst{10}    = 0;
+  let Inst{9-0}   = funct;
+}
+
+class ADDI_FM_MM<bits<6> op> : MMArch {
+  bits<5>  rs;
+  bits<5>  rt;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-0}  = imm16;
+}
+
+class SLTI_FM_MM<bits<6> op> : MMArch {
+  bits<5> rt;
+  bits<5> rs;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = imm16;
+}
+
+class LUI_FM_MM : MMArch {
+  bits<5> rt;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x10;
+  let Inst{25-21} = 0xd;
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = imm16;
+}
+
+class MULT_FM_MM<bits<10> funct> : MMArch {
+  bits<5>  rs;
+  bits<5>  rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x00;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-6}  = funct;
+  let Inst{5-0}   = 0x3c;
+}
+
+class SRA_FM_MM<bits<10> funct, bit rotate> : MMArch {
+  bits<5> rd;
+  bits<5> rt;
+  bits<5> shamt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rd;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = shamt;
+  let Inst{10}    = rotate;
+  let Inst{9-0}   = funct;
+}
+
+class SRLV_FM_MM<bits<10> funct, bit rotate> : MMArch {
+  bits<5> rd;
+  bits<5> rt;
+  bits<5> rs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = rd;
+  let Inst{10}    = rotate;
+  let Inst{9-0}   = funct;
+}
+
+class LW_FM_MM<bits<6> op> : MMArch {
+  bits<5> rt;
+  bits<21> addr;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = addr{20-16};
+  let Inst{15-0}  = addr{15-0};
+}
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
new file mode 100644
index 0000000..74cdccd
--- /dev/null
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -0,0 +1,67 @@
+let isCodeGenOnly = 1 in {
+  /// Arithmetic Instructions (ALU Immediate)
+  def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, CPURegsOpnd>,
+                 ADDI_FM_MM<0xc>;
+  def ADDi_MM  : MMRel, ArithLogicI<"addi", simm16, CPURegsOpnd>,
+                 ADDI_FM_MM<0x4>;
+  def SLTi_MM  : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>,
+                 SLTI_FM_MM<0x24>;
+  def SLTiu_MM : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>,
+                 SLTI_FM_MM<0x2c>;
+  def ANDi_MM  : MMRel, ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
+                 ADDI_FM_MM<0x34>;
+  def ORi_MM   : MMRel, ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
+                 ADDI_FM_MM<0x14>;
+  def XORi_MM  : MMRel, ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
+                 ADDI_FM_MM<0x1c>;
+  def LUi_MM   : MMRel, LoadUpper<"lui", CPURegs, uimm16>, LUI_FM_MM;
+
+  /// Arithmetic Instructions (3-Operand, R-Type)
+  def ADDu_MM  : MMRel, ArithLogicR<"addu", CPURegsOpnd>, ADD_FM_MM<0, 0x150>;
+  def SUBu_MM  : MMRel, ArithLogicR<"subu", CPURegsOpnd>, ADD_FM_MM<0, 0x1d0>;
+  def MUL_MM   : MMRel, ArithLogicR<"mul", CPURegsOpnd>, ADD_FM_MM<0, 0x210>;
+  def ADD_MM   : MMRel, ArithLogicR<"add", CPURegsOpnd>, ADD_FM_MM<0, 0x110>;
+  def SUB_MM   : MMRel, ArithLogicR<"sub", CPURegsOpnd>, ADD_FM_MM<0, 0x190>;
+  def SLT_MM   : MMRel, SetCC_R<"slt", setlt, CPURegs>, ADD_FM_MM<0, 0x350>;
+  def SLTu_MM  : MMRel, SetCC_R<"sltu", setult, CPURegs>,
+                 ADD_FM_MM<0, 0x390>;
+  def AND_MM   : MMRel, ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>,
+                 ADD_FM_MM<0, 0x250>;
+  def OR_MM    : MMRel, ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>,
+                 ADD_FM_MM<0, 0x290>;
+  def XOR_MM   : MMRel, ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>,
+                 ADD_FM_MM<0, 0x310>;
+  def NOR_MM   : MMRel, LogicNOR<"nor", CPURegsOpnd>, ADD_FM_MM<0, 0x2d0>;
+  def MULT_MM  : MMRel, Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>,
+                 MULT_FM_MM<0x22c>;
+  def MULTu_MM : MMRel, Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>,
+                 MULT_FM_MM<0x26c>;
+
+  /// Shift Instructions
+  def SLL_MM   : MMRel, shift_rotate_imm<"sll", shamt, CPURegsOpnd>,
+                 SRA_FM_MM<0, 0>;
+  def SRL_MM   : MMRel, shift_rotate_imm<"srl", shamt, CPURegsOpnd>,
+                 SRA_FM_MM<0x40, 0>;
+  def SRA_MM   : MMRel, shift_rotate_imm<"sra", shamt, CPURegsOpnd>,
+                 SRA_FM_MM<0x80, 0>;
+  def SLLV_MM  : MMRel, shift_rotate_reg<"sllv", CPURegsOpnd>,
+                 SRLV_FM_MM<0x10, 0>;
+  def SRLV_MM  : MMRel, shift_rotate_reg<"srlv", CPURegsOpnd>,
+                 SRLV_FM_MM<0x50, 0>;
+  def SRAV_MM  : MMRel, shift_rotate_reg<"srav", CPURegsOpnd>,
+                 SRLV_FM_MM<0x90, 0>;
+  def ROTR_MM  : MMRel, shift_rotate_imm<"rotr", shamt, CPURegsOpnd>,
+                 SRA_FM_MM<0xc0, 0>;
+  def ROTRV_MM : MMRel, shift_rotate_reg<"rotrv", CPURegsOpnd>,
+                 SRLV_FM_MM<0xd0, 0>;
+
+  /// Load and Store Instructions - aligned
+  defm LB_MM  : LoadM<"lb", CPURegs, sextloadi8>, MMRel, LW_FM_MM<0x7>;
+  defm LBu_MM : LoadM<"lbu", CPURegs, zextloadi8>, MMRel, LW_FM_MM<0x5>;
+  defm LH_MM  : LoadM<"lh", CPURegs, sextloadi16>, MMRel, LW_FM_MM<0xf>;
+  defm LHu_MM : LoadM<"lhu", CPURegs, zextloadi16>, MMRel, LW_FM_MM<0xd>;
+  defm LW_MM  : LoadM<"lw", CPURegs>, MMRel, LW_FM_MM<0x3f>;
+  defm SB_MM  : StoreM<"sb", CPURegs, truncstorei8>, MMRel, LW_FM_MM<0x6>;
+  defm SH_MM  : StoreM<"sh", CPURegs, truncstorei16>, MMRel, LW_FM_MM<0xe>;
+  defm SW_MM  : StoreM<"sw", CPURegs>, MMRel, LW_FM_MM<0x3e>;
+}
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 00b3449..c1c635c 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -35,6 +35,11 @@
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
+bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  if (!Subtarget.inMips16Mode())
+    return false;
+  return MipsDAGToDAGISel::runOnMachineFunction(MF);
+}
 /// Select multiply instructions.
 std::pair<SDNode*, SDNode*>
 Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
@@ -267,7 +272,7 @@ std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) {
     EVT VT = LHS.getValueType();
 
     unsigned Sltu_op = Mips::SltuRxRyRz16;
-    SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2);
+    SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops);
     unsigned Addu_op = Mips::AdduRxRyRz16;
     SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT,
                                               SDValue(Carry,0), RHS);
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
index baa8587..f05f9b7 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.h
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -28,6 +28,8 @@ private:
 
   SDValue getMips16SPAliasReg();
 
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
   void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg);
 
   virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 23eb537..f63318f 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -53,7 +53,6 @@ Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
   if (Mips16HardFloat)
     setMips16HardFloatLibCalls();
 
-  setOperationAction(ISD::MEMBARRIER,         MVT::Other, Expand);
   setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Expand);
   setOperationAction(ISD::ATOMIC_CMP_SWAP,    MVT::i32,   Expand);
   setOperationAction(ISD::ATOMIC_SWAP,        MVT::i32,   Expand);
@@ -614,7 +613,8 @@ MachineBasicBlock
   unsigned regX = MI->getOperand(0).getReg();
   unsigned regY = MI->getOperand(1).getReg();
   MachineBasicBlock *target = MI->getOperand(2).getMBB();
-  BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY);
+  BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX)
+    .addReg(regY);
   BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
   MI->eraseFromParent();   // The pseudo instruction is gone now.
   return BB;
@@ -636,7 +636,8 @@ MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins(
     CmpOpc = CmpiXOpc;
   else
     llvm_unreachable("immediate field not usable");
-  BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm);
+  BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX)
+    .addImm(imm);
   BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target);
   MI->eraseFromParent();   // The pseudo instruction is gone now.
   return BB;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index 6cca227..7ad18f2 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -1,5 +1,4 @@
-
-//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===//
+//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 846a822..fc533fb 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -66,14 +66,12 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc],
   defm ATOMIC_CMP_SWAP_I64  : AtomicCmpSwap64<atomic_cmp_swap_64>;
 }
 
-/// Pseudo instructions for loading, storing and copying accumulator registers.
+/// Pseudo instructions for loading and storing accumulator registers.
 let isPseudo = 1 in {
   defm LOAD_AC128  : LoadM<"load_ac128", ACRegs128>;
   defm STORE_AC128 : StoreM<"store_ac128", ACRegs128>;
 }
 
-def COPY_AC128 : PseudoSE<(outs ACRegs128:$dst), (ins ACRegs128:$src), []>;
-
 //===----------------------------------------------------------------------===//
 // Instruction definition
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 1876cb6..6e4feda 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -46,6 +46,10 @@
 using namespace llvm;
 
 bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  // Initialize TargetLoweringObjectFile.
+  if (Subtarget->allowMixed16_32())
+    const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+      .Initialize(OutContext, TM);
   MipsFI = MF.getInfo<MipsFunctionInfo>();
   AsmPrinter::runOnMachineFunction(MF);
   return true;
@@ -245,12 +249,18 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() {
 void MipsAsmPrinter::EmitFunctionBodyStart() {
   MCInstLowering.Initialize(Mang, &MF->getContext());
 
-  emitFrameDirective();
+  bool IsNakedFunction =
+    MF->getFunction()->
+      getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                   Attribute::Naked);
+  if (!IsNakedFunction)
+    emitFrameDirective();
 
   if (OutStreamer.hasRawTextSupport()) {
     SmallString<128> Str;
     raw_svector_ostream OS(Str);
-    printSavedRegsBitmask(OS);
+    if (!IsNakedFunction)
+      printSavedRegsBitmask(OS);
     OutStreamer.EmitRawText(OS.str());
     if (!Subtarget->inMips16Mode()) {
       OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder"));
@@ -419,12 +429,18 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
                                            unsigned OpNum, unsigned AsmVariant,
                                            const char *ExtraCode,
                                            raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier.
+  int Offset = 0;
+  // Currently we are expecting either no ExtraCode or 'D'
+  if (ExtraCode) {
+    if (ExtraCode[0] == 'D')
+      Offset = 4;
+    else
+      return true; // Unknown modifier.
+  }
 
   const MachineOperand &MO = MI->getOperand(OpNum);
   assert(MO.isReg() && "unexpected inline asm memory operand");
-  O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+  O << Offset << "($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
 
   return false;
 }
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 1d86d90..3fc402b 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -116,7 +116,7 @@ private:
                                   int Offset) const;
 
   /// Expand pseudo instructions with accumulator register operands.
-  void expandACCInstr(MachineBasicBlock::instr_iterator &MI,
+  void expandACCInstr(MachineBasicBlock::instr_iterator MI,
                       MachineBasicBlock &MBB, unsigned Opc) const;
 
   /// \brief Expand pseudo instruction. Return true if MI was expanded.
@@ -302,7 +302,7 @@ void MipsCodeEmitter::emitWord(unsigned Word) {
     MCE.emitWordBE(Word);
 }
 
-void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator &MI,
+void MipsCodeEmitter::expandACCInstr(MachineBasicBlock::instr_iterator MI,
                                      MachineBasicBlock &MBB,
                                      unsigned Opc) const {
   // Expand "pseudomult $ac0, $t0, $t1" to "mult $t0, $t1".
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index b5de1eb..1951324 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -80,6 +80,10 @@ FunctionPass *llvm::createMipsConstantIslandPass(MipsTargetMachine &tm) {
 }
 
 bool MipsConstantIslands::runOnMachineFunction(MachineFunction &F) {
-  return true;
+  // The intention is for this to be a mips16 only pass for now
+  // FIXME:
+  // if (!TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+  //  return false;
+  return false;
 }
 
diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td
index a72a763..cf09113 100644
--- a/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -219,6 +219,33 @@ class MULT_FMT<bits<6> opcode, bits<6> funct> : DSPInst {
   let Inst{5-0} = funct;
 }
 
+// MFHI sub-class format.
+class MFHI_FMT<bits<6> funct> : DSPInst {
+  bits<5> rd;
+  bits<2> ac;
+
+  let Inst{31-26} = 0;
+  let Inst{25-23} = 0;
+  let Inst{22-21} = ac;
+  let Inst{20-16} = 0;
+  let Inst{15-11} = rd;
+  let Inst{10-6} = 0;
+  let Inst{5-0} = funct;
+}
+
+// MTHI sub-class format.
+class MTHI_FMT<bits<6> funct> : DSPInst {
+  bits<5> rs;
+  bits<2> ac;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rs;
+  let Inst{20-13} = 0;
+  let Inst{12-11} = ac;
+  let Inst{10-6} = 0;
+  let Inst{5-0} = funct;
+}
+
 // EXTR.W sub-class format (type 1).
 class EXTR_W_TY1_FMT<bits<5> op> : DSPInst {
   bits<5> rt;
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index 3c116e1..c12878a 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -26,6 +26,8 @@ def SDT_MipsShilo : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>,
                                          SDTCisSameAs<0, 2>, SDTCisVT<1, i32>]>;
 def SDT_MipsDPA : SDTypeProfile<1, 3, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>,
                                        SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+def SDT_MipsSHIFT_DSP : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                             SDTCisVT<2, i32>]>;
 
 class MipsDSPBase<string Opc, SDTypeProfile Prof> :
   SDNode<!strconcat("MipsISD::", Opc), Prof>;
@@ -74,18 +76,19 @@ def MipsMADD_DSP : MipsDSPBase<"MADD_DSP", SDT_MipsDPA>;
 def MipsMADDU_DSP : MipsDSPBase<"MADDU_DSP", SDT_MipsDPA>;
 def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>;
 def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>;
+def MipsSHLL_DSP : MipsDSPBase<"SHLL_DSP", SDT_MipsSHIFT_DSP>;
+def MipsSHRA_DSP : MipsDSPBase<"SHRA_DSP", SDT_MipsSHIFT_DSP>;
+def MipsSHRL_DSP : MipsDSPBase<"SHRL_DSP", SDT_MipsSHIFT_DSP>;
+def MipsSETCC_DSP : MipsDSPBase<"SETCC_DSP", SDTSetCC>;
+def MipsSELECT_CC_DSP : MipsDSPBase<"SELECT_CC_DSP", SDTSelectCC>;
 
 // Flags.
-class UseAC {
-  list<Register> Uses = [AC0];
+class Uses<list<Register> Regs> {
+  list<Register> Uses = Regs;
 }
 
-class UseDSPCtrl {
-  list<Register> Uses = [DSPCtrl];
-}
-
-class ClearDefs {
-  list<Register> Defs = [];
+class Defs<list<Register> Regs> {
+  list<Register> Defs = Regs;
 }
 
 // Instruction encoding.
@@ -145,6 +148,10 @@ class MAQ_S_W_PHL_ENC : DPA_W_PH_FMT<0b10100>;
 class MAQ_S_W_PHR_ENC : DPA_W_PH_FMT<0b10110>;
 class MAQ_SA_W_PHL_ENC : DPA_W_PH_FMT<0b10000>;
 class MAQ_SA_W_PHR_ENC : DPA_W_PH_FMT<0b10010>;
+class MFHI_ENC : MFHI_FMT<0b010000>;
+class MFLO_ENC : MFHI_FMT<0b010010>;
+class MTHI_ENC : MTHI_FMT<0b010001>;
+class MTLO_ENC : MTHI_FMT<0b010011>;
 class DPAU_H_QBL_ENC : DPA_W_PH_FMT<0b00011>;
 class DPAU_H_QBR_ENC : DPA_W_PH_FMT<0b00111>;
 class DPSU_H_QBL_ENC : DPA_W_PH_FMT<0b01011>;
@@ -256,7 +263,6 @@ class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
   list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -267,7 +273,6 @@ class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
   list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -278,7 +283,6 @@ class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rs, $rt");
   list<dag> Pattern = [(OpNode RCS:$rs, RCT:$rt)];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -289,7 +293,6 @@ class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
   list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -300,7 +303,6 @@ class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
   list<dag> Pattern = [(set RCT:$rt, (OpNode RCS:$src, RCS:$rs, immZExt5:$sa))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
   string Constraints = "$src = $rt";
 }
 
@@ -312,7 +314,6 @@ class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
   list<dag> Pattern = [(set RCD:$rd, (OpNode RCT:$rt))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -322,7 +323,6 @@ class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $imm");
   list<dag> Pattern = [(set RC:$rd, (OpNode immPat:$imm))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -332,7 +332,6 @@ class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
   list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs_sa))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -343,7 +342,7 @@ class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
   list<dag> Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
+  bit hasSideEffects = 1;
 }
 
 class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -354,7 +353,6 @@ class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   list<dag> Pattern = [(set CPURegs:$rd,
                        (OpNode CPURegs:$base, CPURegs:$index))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
   bit mayLoad = 1;
 }
 
@@ -366,7 +364,6 @@ class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
   list<dag> Pattern = [(set RCD:$rd, (OpNode RCS:$rs, RCT:$rt))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -377,7 +374,6 @@ class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   list<dag> Pattern =  [(set CPURegs:$rt,
                         (OpNode CPURegs:$src, CPURegs:$rs, ImmOp:$sa))];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
   string Constraints = "$src = $rt";
 }
 
@@ -387,7 +383,6 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   dag InOperandList = (ins ACRegsDSP:$ac, CPURegs:$shift_rs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -396,7 +391,6 @@ class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -405,7 +399,6 @@ class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
   string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
   list<dag> Pattern = [(set ACRegsDSP:$ac,
                         (OpNode immSExt6:$shift, ACRegsDSP:$acin))];
-  list<Register> Defs = [DSPCtrl];
   string Constraints = "$acin = $ac";
 }
 
@@ -415,7 +408,6 @@ class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
   string AsmString = !strconcat(instr_asm, "\t$ac, $rs");
   list<dag> Pattern = [(set ACRegsDSP:$ac,
                         (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
-  list<Register> Defs = [DSPCtrl];
   string Constraints = "$acin = $ac";
 }
 
@@ -425,7 +417,6 @@ class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
   string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
   list<dag> Pattern = [(set ACRegsDSP:$ac,
                         (OpNode CPURegs:$rs, ACRegsDSP:$acin))];
-  list<Register> Uses = [DSPCtrl];
   string Constraints = "$acin = $ac";
 }
 
@@ -436,7 +427,6 @@ class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
   list<dag> Pattern = [(set CPURegs:$rd, (OpNode immZExt10:$mask))];
   InstrItinClass Itinerary = itin;
-  list<Register> Uses = [DSPCtrl];
 }
 
 class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -446,7 +436,6 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
   list<dag> Pattern = [(OpNode CPURegs:$rs, immZExt10:$mask)];
   InstrItinClass Itinerary = itin;
-  list<Register> Defs = [DSPCtrl];
 }
 
 class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -455,7 +444,6 @@ class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
   string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt");
   list<dag> Pattern = [(set ACRegsDSP:$ac,
                         (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))];
-  list<Register> Defs = [DSPCtrl];
   string Constraints = "$acin = $ac";
 }
 
@@ -482,9 +470,22 @@ class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string Constraints = "$acin = $ac";
 }
 
+class MFHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> {
+  dag OutOperandList = (outs CPURegs:$rd);
+  dag InOperandList = (ins RC:$ac);
+  string AsmString = !strconcat(instr_asm, "\t$rd, $ac");
+  InstrItinClass Itinerary = itin;
+}
+
+class MTHI_DESC_BASE<string instr_asm, RegisterClass RC, InstrItinClass itin> {
+  dag OutOperandList = (outs RC:$ac);
+  dag InOperandList = (ins CPURegs:$rs);
+  string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+  InstrItinClass Itinerary = itin;
+}
+
 class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
   MipsPseudo<(outs CPURegs:$dst), (ins), [(set CPURegs:$dst, (OpNode))]> {
-  list<Register> Uses = [DSPCtrl];
   bit usesCustomInserter = 1;
 }
 
@@ -493,7 +494,6 @@ class BPOSGE32_DESC_BASE<string instr_asm, InstrItinClass itin> {
   dag InOperandList = (ins brtarget:$offset);
   string AsmString = !strconcat(instr_asm, "\t$offset");
   InstrItinClass Itinerary = itin;
-  list<Register> Uses = [DSPCtrl];
   bit isBranch = 1;
   bit isTerminator = 1;
   bit hasDelaySlot = 1;
@@ -506,7 +506,6 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
   list<dag> Pattern = [(set CPURegs:$rt, (OpNode CPURegs:$src, CPURegs:$rs))];
   InstrItinClass Itinerary = itin;
-  list<Register> Uses = [DSPCtrl];
   string Constraints = "$src = $rt";
 }
 
@@ -515,178 +514,183 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
 //===----------------------------------------------------------------------===//
 
 // Addition/subtraction
-class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", int_mips_addu_qb, NoItinerary,
-                                       DSPRegs, DSPRegs>, IsCommutable;
+class ADDU_QB_DESC : ADDU_QB_DESC_BASE<"addu.qb", null_frag, NoItinerary,
+                                       DSPRegs, DSPRegs>, IsCommutable,
+                     Defs<[DSPOutFlag20]>;
 
 class ADDU_S_QB_DESC : ADDU_QB_DESC_BASE<"addu_s.qb", int_mips_addu_s_qb,
                                          NoItinerary, DSPRegs, DSPRegs>,
-                       IsCommutable;
+                       IsCommutable, Defs<[DSPOutFlag20]>;
 
-class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", int_mips_subu_qb, NoItinerary,
-                                       DSPRegs, DSPRegs>;
+class SUBU_QB_DESC : ADDU_QB_DESC_BASE<"subu.qb", null_frag, NoItinerary,
+                                       DSPRegs, DSPRegs>,
+                     Defs<[DSPOutFlag20]>;
 
 class SUBU_S_QB_DESC : ADDU_QB_DESC_BASE<"subu_s.qb", int_mips_subu_s_qb,
-                                         NoItinerary, DSPRegs, DSPRegs>;
+                                         NoItinerary, DSPRegs, DSPRegs>,
+                       Defs<[DSPOutFlag20]>;
 
-class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", int_mips_addq_ph, NoItinerary,
-                                       DSPRegs, DSPRegs>, IsCommutable;
+class ADDQ_PH_DESC : ADDU_QB_DESC_BASE<"addq.ph", null_frag, NoItinerary,
+                                       DSPRegs, DSPRegs>, IsCommutable,
+                     Defs<[DSPOutFlag20]>;
 
 class ADDQ_S_PH_DESC : ADDU_QB_DESC_BASE<"addq_s.ph", int_mips_addq_s_ph,
                                          NoItinerary, DSPRegs, DSPRegs>,
-                       IsCommutable;
+                       IsCommutable, Defs<[DSPOutFlag20]>;
 
-class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", int_mips_subq_ph, NoItinerary,
-                                       DSPRegs, DSPRegs>;
+class SUBQ_PH_DESC : ADDU_QB_DESC_BASE<"subq.ph", null_frag, NoItinerary,
+                                       DSPRegs, DSPRegs>,
+                     Defs<[DSPOutFlag20]>;
 
 class SUBQ_S_PH_DESC : ADDU_QB_DESC_BASE<"subq_s.ph", int_mips_subq_s_ph,
-                                         NoItinerary, DSPRegs, DSPRegs>;
+                                         NoItinerary, DSPRegs, DSPRegs>,
+                       Defs<[DSPOutFlag20]>;
 
 class ADDQ_S_W_DESC : ADDU_QB_DESC_BASE<"addq_s.w", int_mips_addq_s_w,
                                         NoItinerary, CPURegs, CPURegs>,
-                      IsCommutable;
+                      IsCommutable, Defs<[DSPOutFlag20]>;
 
 class SUBQ_S_W_DESC : ADDU_QB_DESC_BASE<"subq_s.w", int_mips_subq_s_w,
-                                        NoItinerary, CPURegs, CPURegs>;
+                                        NoItinerary, CPURegs, CPURegs>,
+                      Defs<[DSPOutFlag20]>;
 
-class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", int_mips_addsc, NoItinerary,
-                                     CPURegs, CPURegs>, IsCommutable;
+class ADDSC_DESC : ADDU_QB_DESC_BASE<"addsc", null_frag, NoItinerary,
+                                     CPURegs, CPURegs>, IsCommutable,
+                   Defs<[DSPCarry]>;
 
-class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", int_mips_addwc, NoItinerary,
+class ADDWC_DESC : ADDU_QB_DESC_BASE<"addwc", null_frag, NoItinerary,
                                      CPURegs, CPURegs>,
-                   IsCommutable, UseDSPCtrl;
+                   IsCommutable, Uses<[DSPCarry]>, Defs<[DSPOutFlag20]>;
 
 class MODSUB_DESC : ADDU_QB_DESC_BASE<"modsub", int_mips_modsub, NoItinerary,
-                                      CPURegs, CPURegs>, ClearDefs;
+                                      CPURegs, CPURegs>;
 
 class RADDU_W_QB_DESC : RADDU_W_QB_DESC_BASE<"raddu.w.qb", int_mips_raddu_w_qb,
-                                             NoItinerary, CPURegs, DSPRegs>,
-                        ClearDefs;
+                                             NoItinerary, CPURegs, DSPRegs>;
 
 // Absolute value
 class ABSQ_S_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.ph", int_mips_absq_s_ph,
-                                              NoItinerary, DSPRegs>;
+                                              NoItinerary, DSPRegs>,
+                       Defs<[DSPOutFlag20]>;
 
 class ABSQ_S_W_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.w", int_mips_absq_s_w,
-                                             NoItinerary, CPURegs>;
+                                             NoItinerary, CPURegs>,
+                      Defs<[DSPOutFlag20]>;
 
 // Precision reduce/expand
 class PRECRQ_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.qb.ph",
                                                  int_mips_precrq_qb_ph,
-                                                 NoItinerary, DSPRegs, DSPRegs>,
-                          ClearDefs;
+                                                 NoItinerary, DSPRegs, DSPRegs>;
 
 class PRECRQ_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq.ph.w",
                                                 int_mips_precrq_ph_w,
-                                                NoItinerary, DSPRegs, CPURegs>,
-                         ClearDefs;
+                                                NoItinerary, DSPRegs, CPURegs>;
 
 class PRECRQ_RS_PH_W_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrq_rs.ph.w",
                                                    int_mips_precrq_rs_ph_w,
                                                    NoItinerary, DSPRegs,
-                                                   CPURegs>;
+                                                   CPURegs>,
+                            Defs<[DSPOutFlag22]>;
 
 class PRECRQU_S_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precrqu_s.qb.ph",
                                                     int_mips_precrqu_s_qb_ph,
                                                     NoItinerary, DSPRegs,
-                                                    DSPRegs>;
+                                                    DSPRegs>,
+                             Defs<[DSPOutFlag22]>;
 
 class PRECEQ_W_PHL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phl",
                                                  int_mips_preceq_w_phl,
-                                                 NoItinerary, CPURegs, DSPRegs>,
-                          ClearDefs;
+                                                 NoItinerary, CPURegs, DSPRegs>;
 
 class PRECEQ_W_PHR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceq.w.phr",
                                                  int_mips_preceq_w_phr,
-                                                 NoItinerary, CPURegs, DSPRegs>,
-                          ClearDefs;
+                                                 NoItinerary, CPURegs, DSPRegs>;
 
 class PRECEQU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbl",
                                                    int_mips_precequ_ph_qbl,
-                                                   NoItinerary, DSPRegs>,
-                            ClearDefs;
+                                                   NoItinerary, DSPRegs>;
 
 class PRECEQU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbr",
                                                    int_mips_precequ_ph_qbr,
-                                                   NoItinerary, DSPRegs>,
-                            ClearDefs;
+                                                   NoItinerary, DSPRegs>;
 
 class PRECEQU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbla",
                                                     int_mips_precequ_ph_qbla,
-                                                    NoItinerary, DSPRegs>,
-                             ClearDefs;
+                                                    NoItinerary, DSPRegs>;
 
 class PRECEQU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"precequ.ph.qbra",
                                                     int_mips_precequ_ph_qbra,
-                                                    NoItinerary, DSPRegs>,
-                             ClearDefs;
+                                                    NoItinerary, DSPRegs>;
 
 class PRECEU_PH_QBL_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbl",
                                                   int_mips_preceu_ph_qbl,
-                                                  NoItinerary, DSPRegs>,
-                           ClearDefs;
+                                                  NoItinerary, DSPRegs>;
 
 class PRECEU_PH_QBR_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbr",
                                                   int_mips_preceu_ph_qbr,
-                                                  NoItinerary, DSPRegs>,
-                           ClearDefs;
+                                                  NoItinerary, DSPRegs>;
 
 class PRECEU_PH_QBLA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbla",
                                                    int_mips_preceu_ph_qbla,
-                                                   NoItinerary, DSPRegs>,
-                            ClearDefs;
+                                                   NoItinerary, DSPRegs>;
 
 class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra",
                                                    int_mips_preceu_ph_qbra,
-                                                   NoItinerary, DSPRegs>,
-                            ClearDefs;
+                                                   NoItinerary, DSPRegs>;
 
 // Shift
-class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", int_mips_shll_qb, immZExt3,
-                                          NoItinerary, DSPRegs>;
+class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3,
+                                          NoItinerary, DSPRegs>,
+                     Defs<[DSPOutFlag22]>;
 
 class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
-                                           NoItinerary, DSPRegs>;
+                                           NoItinerary, DSPRegs>,
+                      Defs<[DSPOutFlag22]>;
 
-class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", int_mips_shrl_qb, immZExt3,
-                                          NoItinerary, DSPRegs>, ClearDefs;
+class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3,
+                                          NoItinerary, DSPRegs>;
 
 class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb,
-                                           NoItinerary, DSPRegs>, ClearDefs;
+                                           NoItinerary, DSPRegs>;
 
-class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", int_mips_shll_ph, immZExt4,
-                                          NoItinerary, DSPRegs>;
+class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4,
+                                          NoItinerary, DSPRegs>,
+                     Defs<[DSPOutFlag22]>;
 
 class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
-                                           NoItinerary, DSPRegs>;
+                                           NoItinerary, DSPRegs>,
+                      Defs<[DSPOutFlag22]>;
 
 class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph,
-                                            immZExt4, NoItinerary, DSPRegs>;
+                                            immZExt4, NoItinerary, DSPRegs>,
+                       Defs<[DSPOutFlag22]>;
 
 class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
-                                             NoItinerary, DSPRegs>;
+                                             NoItinerary, DSPRegs>,
+                        Defs<[DSPOutFlag22]>;
 
-class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", int_mips_shra_ph, immZExt4,
-                                          NoItinerary, DSPRegs>, ClearDefs;
+class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4,
+                                          NoItinerary, DSPRegs>;
 
 class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph,
-                                           NoItinerary, DSPRegs>, ClearDefs;
+                                           NoItinerary, DSPRegs>;
 
 class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph,
-                                            immZExt4, NoItinerary, DSPRegs>,
-                       ClearDefs;
+                                            immZExt4, NoItinerary, DSPRegs>;
 
 class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph,
-                                             NoItinerary, DSPRegs>, ClearDefs;
+                                             NoItinerary, DSPRegs>;
 
 class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w,
-                                           immZExt5, NoItinerary, CPURegs>;
+                                           immZExt5, NoItinerary, CPURegs>,
+                      Defs<[DSPOutFlag22]>;
 
 class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
-                                            NoItinerary, CPURegs>;
+                                            NoItinerary, CPURegs>,
+                       Defs<[DSPOutFlag22]>;
 
 class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w,
-                                           immZExt5, NoItinerary, CPURegs>,
-                      ClearDefs;
+                                           immZExt5, NoItinerary, CPURegs>;
 
 class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
                                             NoItinerary, CPURegs>;
@@ -694,36 +698,49 @@ class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
 // Multiplication
 class MULEU_S_PH_QBL_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbl",
                                               int_mips_muleu_s_ph_qbl,
-                                              NoItinerary, DSPRegs, DSPRegs>;
+                                              NoItinerary, DSPRegs, DSPRegs>,
+                            Defs<[DSPOutFlag21]>;
 
 class MULEU_S_PH_QBR_DESC : ADDU_QB_DESC_BASE<"muleu_s.ph.qbr",
                                               int_mips_muleu_s_ph_qbr,
-                                              NoItinerary, DSPRegs, DSPRegs>;
+                                              NoItinerary, DSPRegs, DSPRegs>,
+                            Defs<[DSPOutFlag21]>;
 
 class MULEQ_S_W_PHL_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phl",
                                              int_mips_muleq_s_w_phl,
                                              NoItinerary, CPURegs, DSPRegs>,
-                           IsCommutable;
+                           IsCommutable, Defs<[DSPOutFlag21]>;
 
 class MULEQ_S_W_PHR_DESC : ADDU_QB_DESC_BASE<"muleq_s.w.phr",
                                              int_mips_muleq_s_w_phr,
                                              NoItinerary, CPURegs, DSPRegs>,
-                           IsCommutable;
+                           IsCommutable, Defs<[DSPOutFlag21]>;
 
 class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph,
                                           NoItinerary, DSPRegs, DSPRegs>,
-                        IsCommutable;
+                        IsCommutable, Defs<[DSPOutFlag21]>;
 
 class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph",
-                                              MipsMULSAQ_S_W_PH>;
+                                              MipsMULSAQ_S_W_PH>,
+                           Defs<[DSPOutFlag16_19]>;
 
-class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>;
+class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>,
+                         Defs<[DSPOutFlag16_19]>;
 
-class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>;
+class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>,
+                         Defs<[DSPOutFlag16_19]>;
 
-class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>;
+class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>,
+                          Defs<[DSPOutFlag16_19]>;
 
-class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>;
+class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>,
+                          Defs<[DSPOutFlag16_19]>;
+
+// Move from/to hi/lo.
+class MFHI_DESC : MFHI_DESC_BASE<"mfhi", HIRegsDSP, NoItinerary>;
+class MFLO_DESC : MFHI_DESC_BASE<"mflo", LORegsDSP, NoItinerary>;
+class MTHI_DESC : MTHI_DESC_BASE<"mthi", HIRegsDSP, NoItinerary>;
+class MTLO_DESC : MTHI_DESC_BASE<"mtlo", LORegsDSP, NoItinerary>;
 
 // Dot product with accumulate/subtract
 class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>;
@@ -734,13 +751,17 @@ class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl", MipsDPSU_H_QBL>;
 
 class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr", MipsDPSU_H_QBR>;
 
-class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>;
+class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>,
+                         Defs<[DSPOutFlag16_19]>;
 
-class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>;
+class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>,
+                         Defs<[DSPOutFlag16_19]>;
 
-class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>;
+class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>,
+                         Defs<[DSPOutFlag16_19]>;
 
-class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>;
+class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>,
+                         Defs<[DSPOutFlag16_19]>;
 
 class MULT_DSP_DESC  : MULT_DESC_BASE<"mult", MipsMult, NoItinerary>;
 class MULTU_DSP_DESC : MULT_DESC_BASE<"multu", MipsMultu, NoItinerary>;
@@ -752,15 +773,16 @@ class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>;
 // Comparison
 class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb",
                                                int_mips_cmpu_eq_qb, NoItinerary,
-                                               DSPRegs>, IsCommutable;
+                                               DSPRegs>,
+                        IsCommutable, Defs<[DSPCCond]>;
 
 class CMPU_LT_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.lt.qb",
                                                int_mips_cmpu_lt_qb, NoItinerary,
-                                               DSPRegs>, IsCommutable;
+                                               DSPRegs>, Defs<[DSPCCond]>;
 
 class CMPU_LE_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.le.qb",
                                                int_mips_cmpu_le_qb, NoItinerary,
-                                               DSPRegs>, IsCommutable;
+                                               DSPRegs>, Defs<[DSPCCond]>;
 
 class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb",
                                                 int_mips_cmpgu_eq_qb,
@@ -769,222 +791,235 @@ class CMPGU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.eq.qb",
 
 class CMPGU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.lt.qb",
                                                 int_mips_cmpgu_lt_qb,
-                                                NoItinerary, CPURegs, DSPRegs>,
-                         IsCommutable;
+                                                NoItinerary, CPURegs, DSPRegs>;
 
 class CMPGU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgu.le.qb",
                                                 int_mips_cmpgu_le_qb,
-                                                NoItinerary, CPURegs, DSPRegs>,
-                         IsCommutable;
+                                                NoItinerary, CPURegs, DSPRegs>;
 
 class CMP_EQ_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.eq.ph", int_mips_cmp_eq_ph,
                                               NoItinerary, DSPRegs>,
-                       IsCommutable;
+                       IsCommutable, Defs<[DSPCCond]>;
 
 class CMP_LT_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.lt.ph", int_mips_cmp_lt_ph,
                                               NoItinerary, DSPRegs>,
-                       IsCommutable;
+                       Defs<[DSPCCond]>;
 
 class CMP_LE_PH_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmp.le.ph", int_mips_cmp_le_ph,
                                               NoItinerary, DSPRegs>,
-                       IsCommutable;
+                       Defs<[DSPCCond]>;
 
 // Misc
 class BITREV_DESC : ABSQ_S_PH_R2_DESC_BASE<"bitrev", int_mips_bitrev,
-                                           NoItinerary, CPURegs>, ClearDefs;
+                                           NoItinerary, CPURegs>;
 
 class PACKRL_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"packrl.ph", int_mips_packrl_ph,
-                                              NoItinerary, DSPRegs, DSPRegs>,
-                       ClearDefs;
+                                              NoItinerary, DSPRegs, DSPRegs>;
 
 class REPL_QB_DESC : REPL_DESC_BASE<"repl.qb", int_mips_repl_qb, immZExt8,
-                                    NoItinerary, DSPRegs>, ClearDefs;
+                                    NoItinerary, DSPRegs>;
 
 class REPL_PH_DESC : REPL_DESC_BASE<"repl.ph", int_mips_repl_ph, immZExt10,
-                                    NoItinerary, DSPRegs>, ClearDefs;
+                                    NoItinerary, DSPRegs>;
 
 class REPLV_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.qb", int_mips_repl_qb,
-                                             NoItinerary, DSPRegs, CPURegs>,
-                      ClearDefs;
+                                             NoItinerary, DSPRegs, CPURegs>;
 
 class REPLV_PH_DESC : ABSQ_S_PH_R2_DESC_BASE<"replv.ph", int_mips_repl_ph,
-                                             NoItinerary, DSPRegs, CPURegs>,
-                      ClearDefs;
+                                             NoItinerary, DSPRegs, CPURegs>;
 
 class PICK_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.qb", int_mips_pick_qb,
                                             NoItinerary, DSPRegs, DSPRegs>,
-                     ClearDefs, UseDSPCtrl;
+                     Uses<[DSPCCond]>;
 
 class PICK_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"pick.ph", int_mips_pick_ph,
                                             NoItinerary, DSPRegs, DSPRegs>,
-                     ClearDefs, UseDSPCtrl;
+                     Uses<[DSPCCond]>;
 
-class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>, ClearDefs;
+class LWX_DESC : LX_DESC_BASE<"lwx", int_mips_lwx, NoItinerary>;
 
-class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>, ClearDefs;
+class LHX_DESC : LX_DESC_BASE<"lhx", int_mips_lhx, NoItinerary>;
 
-class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>, ClearDefs;
+class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>;
 
 class BPOSGE32_DESC : BPOSGE32_DESC_BASE<"bposge32", NoItinerary>;
 
 // Extr
-class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>;
+class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>,
+                  Uses<[DSPPos]>, Defs<[DSPEFI]>;
 
-class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>;
+class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>,
+                   Uses<[DSPPos]>, Defs<[DSPEFI]>;
 
-class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>;
+class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>,
+                    Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
 
 class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", MipsEXTPDP,
-                                             NoItinerary>;
+                                             NoItinerary>,
+                     Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
 
-class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>;
+class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>,
+                    Defs<[DSPOutFlag23]>;
 
 class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", MipsEXTR_W,
-                                             NoItinerary>;
+                                             NoItinerary>, Defs<[DSPOutFlag23]>;
 
 class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", MipsEXTR_R_W,
-                                              NoItinerary>;
+                                              NoItinerary>,
+                      Defs<[DSPOutFlag23]>;
 
 class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", MipsEXTR_R_W,
-                                               NoItinerary>;
+                                               NoItinerary>,
+                       Defs<[DSPOutFlag23]>;
 
 class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W,
-                                               NoItinerary>;
+                                               NoItinerary>,
+                       Defs<[DSPOutFlag23]>;
 
 class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W,
-                                                NoItinerary>;
+                                                NoItinerary>,
+                        Defs<[DSPOutFlag23]>;
 
 class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H,
-                                              NoItinerary>;
+                                              NoItinerary>,
+                      Defs<[DSPOutFlag23]>;
 
 class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H,
-                                               NoItinerary>;
+                                               NoItinerary>,
+                       Defs<[DSPOutFlag23]>;
 
 class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>;
 
 class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov", MipsSHILO>;
 
-class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>;
+class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>, Defs<[DSPPos]>;
 
 class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>;
 
 class WRDSP_DESC : WRDSP_DESC_BASE<"wrdsp", int_mips_wrdsp, NoItinerary>;
 
-class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>;
+class INSV_DESC : INSV_DESC_BASE<"insv", int_mips_insv, NoItinerary>,
+                  Uses<[DSPPos, DSPSCount]>;
 
 //===----------------------------------------------------------------------===//
 // MIPS DSP Rev 2
 // Addition/subtraction
 class ADDU_PH_DESC : ADDU_QB_DESC_BASE<"addu.ph", int_mips_addu_ph, NoItinerary,
-                                       DSPRegs, DSPRegs>, IsCommutable;
+                                       DSPRegs, DSPRegs>, IsCommutable,
+                     Defs<[DSPOutFlag20]>;
 
 class ADDU_S_PH_DESC : ADDU_QB_DESC_BASE<"addu_s.ph", int_mips_addu_s_ph,
                                          NoItinerary, DSPRegs, DSPRegs>,
-                       IsCommutable;
+                       IsCommutable, Defs<[DSPOutFlag20]>;
 
 class SUBU_PH_DESC : ADDU_QB_DESC_BASE<"subu.ph", int_mips_subu_ph, NoItinerary,
-                                       DSPRegs, DSPRegs>;
+                                       DSPRegs, DSPRegs>,
+                     Defs<[DSPOutFlag20]>;
 
 class SUBU_S_PH_DESC : ADDU_QB_DESC_BASE<"subu_s.ph", int_mips_subu_s_ph,
-                                         NoItinerary, DSPRegs, DSPRegs>;
+                                         NoItinerary, DSPRegs, DSPRegs>,
+                       Defs<[DSPOutFlag20]>;
 
 class ADDUH_QB_DESC : ADDUH_QB_DESC_BASE<"adduh.qb", int_mips_adduh_qb,
-                                         NoItinerary, DSPRegs>,
-                      ClearDefs, IsCommutable;
+                                         NoItinerary, DSPRegs>, IsCommutable;
 
 class ADDUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"adduh_r.qb", int_mips_adduh_r_qb,
-                                           NoItinerary, DSPRegs>,
-                        ClearDefs, IsCommutable;
+                                           NoItinerary, DSPRegs>, IsCommutable;
 
 class SUBUH_QB_DESC : ADDUH_QB_DESC_BASE<"subuh.qb", int_mips_subuh_qb,
-                                         NoItinerary, DSPRegs>, ClearDefs;
+                                         NoItinerary, DSPRegs>;
 
 class SUBUH_R_QB_DESC : ADDUH_QB_DESC_BASE<"subuh_r.qb", int_mips_subuh_r_qb,
-                                           NoItinerary, DSPRegs>, ClearDefs;
+                                           NoItinerary, DSPRegs>;
 
 class ADDQH_PH_DESC : ADDUH_QB_DESC_BASE<"addqh.ph", int_mips_addqh_ph,
-                                         NoItinerary, DSPRegs>,
-                      ClearDefs, IsCommutable;
+                                         NoItinerary, DSPRegs>, IsCommutable;
 
 class ADDQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"addqh_r.ph", int_mips_addqh_r_ph,
-                                           NoItinerary, DSPRegs>,
-                        ClearDefs, IsCommutable;
+                                           NoItinerary, DSPRegs>, IsCommutable;
 
 class SUBQH_PH_DESC : ADDUH_QB_DESC_BASE<"subqh.ph", int_mips_subqh_ph,
-                                         NoItinerary, DSPRegs>, ClearDefs;
+                                         NoItinerary, DSPRegs>;
 
 class SUBQH_R_PH_DESC : ADDUH_QB_DESC_BASE<"subqh_r.ph", int_mips_subqh_r_ph,
-                                           NoItinerary, DSPRegs>, ClearDefs;
+                                           NoItinerary, DSPRegs>;
 
 class ADDQH_W_DESC : ADDUH_QB_DESC_BASE<"addqh.w", int_mips_addqh_w,
-                                        NoItinerary, CPURegs>,
-                     ClearDefs, IsCommutable;
+                                        NoItinerary, CPURegs>, IsCommutable;
 
 class ADDQH_R_W_DESC : ADDUH_QB_DESC_BASE<"addqh_r.w", int_mips_addqh_r_w,
-                                          NoItinerary, CPURegs>,
-                       ClearDefs, IsCommutable;
+                                          NoItinerary, CPURegs>, IsCommutable;
 
 class SUBQH_W_DESC : ADDUH_QB_DESC_BASE<"subqh.w", int_mips_subqh_w,
-                                        NoItinerary, CPURegs>, ClearDefs;
+                                        NoItinerary, CPURegs>;
 
 class SUBQH_R_W_DESC : ADDUH_QB_DESC_BASE<"subqh_r.w", int_mips_subqh_r_w,
-                                          NoItinerary, CPURegs>, ClearDefs;
+                                          NoItinerary, CPURegs>;
 
 // Comparison
 class CMPGDU_EQ_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.eq.qb",
                                                  int_mips_cmpgdu_eq_qb,
                                                  NoItinerary, CPURegs, DSPRegs>,
-                          IsCommutable;
+                          IsCommutable, Defs<[DSPCCond]>;
 
 class CMPGDU_LT_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.lt.qb",
                                                  int_mips_cmpgdu_lt_qb,
                                                  NoItinerary, CPURegs, DSPRegs>,
-                          IsCommutable;
+                          Defs<[DSPCCond]>;
 
 class CMPGDU_LE_QB_DESC : CMP_EQ_QB_R3_DESC_BASE<"cmpgdu.le.qb",
                                                  int_mips_cmpgdu_le_qb,
                                                  NoItinerary, CPURegs, DSPRegs>,
-                          IsCommutable;
+                          Defs<[DSPCCond]>;
 
 // Absolute
 class ABSQ_S_QB_DESC : ABSQ_S_PH_R2_DESC_BASE<"absq_s.qb", int_mips_absq_s_qb,
-                                              NoItinerary, DSPRegs>;
+                                              NoItinerary, DSPRegs>,
+                       Defs<[DSPOutFlag20]>;
 
 // Multiplication
-class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", int_mips_mul_ph, NoItinerary,
-                                       DSPRegs>, IsCommutable;
+class MUL_PH_DESC : ADDUH_QB_DESC_BASE<"mul.ph", null_frag, NoItinerary,
+                                       DSPRegs>, IsCommutable,
+                    Defs<[DSPOutFlag21]>;
 
 class MUL_S_PH_DESC : ADDUH_QB_DESC_BASE<"mul_s.ph", int_mips_mul_s_ph,
-                                         NoItinerary, DSPRegs>, IsCommutable;
+                                         NoItinerary, DSPRegs>, IsCommutable,
+                      Defs<[DSPOutFlag21]>;
 
 class MULQ_S_W_DESC : ADDUH_QB_DESC_BASE<"mulq_s.w", int_mips_mulq_s_w,
-                                         NoItinerary, CPURegs>, IsCommutable;
+                                         NoItinerary, CPURegs>, IsCommutable,
+                      Defs<[DSPOutFlag21]>;
 
 class MULQ_RS_W_DESC : ADDUH_QB_DESC_BASE<"mulq_rs.w", int_mips_mulq_rs_w,
-                                          NoItinerary, CPURegs>, IsCommutable;
+                                          NoItinerary, CPURegs>, IsCommutable,
+                       Defs<[DSPOutFlag21]>;
 
 class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph,
                                          NoItinerary, DSPRegs, DSPRegs>,
-                       IsCommutable;
+                       IsCommutable, Defs<[DSPOutFlag21]>;
 
 // Dot product with accumulate/subtract
 class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph", MipsDPA_W_PH>;
 
 class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph", MipsDPS_W_PH>;
 
-class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>;
+class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>,
+                          Defs<[DSPOutFlag16_19]>;
 
 class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph",
-                                              MipsDPAQX_SA_W_PH>;
+                                              MipsDPAQX_SA_W_PH>,
+                           Defs<[DSPOutFlag16_19]>;
 
 class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph", MipsDPAX_W_PH>;
 
 class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph", MipsDPSX_W_PH>;
 
-class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>;
+class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>,
+                          Defs<[DSPOutFlag16_19]>;
 
 class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph",
-                                              MipsDPSQX_SA_W_PH>;
+                                              MipsDPSQX_SA_W_PH>,
+                           Defs<[DSPOutFlag16_19]>;
 
 class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>;
 
@@ -996,45 +1031,45 @@ class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph",
 class PRECR_SRA_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra.ph.w",
                                                      int_mips_precr_sra_ph_w,
                                                      NoItinerary, DSPRegs,
-                                                     CPURegs>, ClearDefs;
+                                                     CPURegs>;
 
 class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w",
                                                       int_mips_precr_sra_r_ph_w,
                                                        NoItinerary, DSPRegs,
-                                                       CPURegs>, ClearDefs;
+                                                       CPURegs>;
 
 // Shift
-class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", int_mips_shra_qb, immZExt3,
-                                          NoItinerary, DSPRegs>, ClearDefs;
+class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3,
+                                          NoItinerary, DSPRegs>;
 
 class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb,
-                                           NoItinerary, DSPRegs>, ClearDefs;
+                                           NoItinerary, DSPRegs>;
 
 class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb,
-                                            immZExt3, NoItinerary, DSPRegs>,
-                       ClearDefs;
+                                            immZExt3, NoItinerary, DSPRegs>;
 
 class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb,
-                                             NoItinerary, DSPRegs>, ClearDefs;
+                                             NoItinerary, DSPRegs>;
 
-class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", int_mips_shrl_ph, immZExt4,
-                                          NoItinerary, DSPRegs>, ClearDefs;
+class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4,
+                                          NoItinerary, DSPRegs>;
 
 class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
-                                           NoItinerary, DSPRegs>, ClearDefs;
+                                           NoItinerary, DSPRegs>;
 
 // Misc
 class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5,
-                                     NoItinerary>, ClearDefs;
+                                     NoItinerary>;
 
 class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2,
-                                     NoItinerary>, ClearDefs;
+                                     NoItinerary>;
 
 class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5,
-                                      NoItinerary>, ClearDefs;
+                                      NoItinerary>;
 
 // Pseudos.
-def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32, NoItinerary>;
+def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,
+                                                NoItinerary>, Uses<[DSPPos]>;
 
 // Instruction defs.
 // MIPS DSP Rev 1
@@ -1094,6 +1129,10 @@ def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC;
 def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC;
 def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC;
 def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC;
+def MFHI_DSP : MFHI_ENC, MFHI_DESC;
+def MFLO_DSP : MFLO_ENC, MFLO_DESC;
+def MTHI_DSP : MTHI_ENC, MTHI_DESC;
+def MTLO_DSP : MTLO_ENC, MTLO_DESC;
 def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC;
 def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC;
 def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC;
@@ -1201,13 +1240,35 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC;
 }
 
 // Pseudos.
-/// Pseudo instructions for loading, storing and copying accumulator registers.
 let isPseudo = 1 in {
+  // Pseudo instructions for loading and storing accumulator registers.
   defm LOAD_AC_DSP  : LoadM<"load_ac_dsp", ACRegsDSP>;
   defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSP>;
+
+  // Pseudos for loading and storing ccond field of DSP control register.
+  defm LOAD_CCOND_DSP  : LoadM<"load_ccond_dsp", DSPCC>;
+  defm STORE_CCOND_DSP : StoreM<"store_ccond_dsp", DSPCC>;
 }
 
-def COPY_AC_DSP : PseudoSE<(outs ACRegsDSP:$dst), (ins ACRegsDSP:$src), []>;
+// Pseudo CMP and PICK instructions.
+class PseudoCMP<Instruction RealInst> :
+  PseudoDSP<(outs DSPCC:$cmp), (ins DSPRegs:$rs, DSPRegs:$rt), []>,
+  PseudoInstExpansion<(RealInst DSPRegs:$rs, DSPRegs:$rt)>, NeverHasSideEffects;
+
+class PseudoPICK<Instruction RealInst> :
+  PseudoDSP<(outs DSPRegs:$rd), (ins DSPCC:$cmp, DSPRegs:$rs, DSPRegs:$rt), []>,
+  PseudoInstExpansion<(RealInst DSPRegs:$rd, DSPRegs:$rs, DSPRegs:$rt)>,
+  NeverHasSideEffects;
+
+def PseudoCMP_EQ_PH : PseudoCMP<CMP_EQ_PH>;
+def PseudoCMP_LT_PH : PseudoCMP<CMP_LT_PH>;
+def PseudoCMP_LE_PH : PseudoCMP<CMP_LE_PH>;
+def PseudoCMPU_EQ_QB : PseudoCMP<CMPU_EQ_QB>;
+def PseudoCMPU_LT_QB : PseudoCMP<CMPU_LT_QB>;
+def PseudoCMPU_LE_QB : PseudoCMP<CMPU_LE_QB>;
+
+def PseudoPICK_PH : PseudoPICK<PICK_PH>;
+def PseudoPICK_QB : PseudoPICK<PICK_QB>;
 
 // Patterns.
 class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
@@ -1232,6 +1293,95 @@ def : DSPPat<(store (v2i16 DSPRegs:$val), addr:$a),
 def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a),
              (SW (COPY_TO_REGCLASS DSPRegs:$val, CPURegs), addr:$a)>;
 
+// Binary operations.
+class DSPBinPat<Instruction Inst, ValueType ValTy, SDPatternOperator Node,
+                Predicate Pred = HasDSP> :
+  DSPPat<(Node ValTy:$a, ValTy:$b), (Inst ValTy:$a, ValTy:$b), Pred>;
+
+def : DSPBinPat<ADDQ_PH, v2i16, int_mips_addq_ph>;
+def : DSPBinPat<ADDQ_PH, v2i16, add>;
+def : DSPBinPat<SUBQ_PH, v2i16, int_mips_subq_ph>;
+def : DSPBinPat<SUBQ_PH, v2i16, sub>;
+def : DSPBinPat<MUL_PH, v2i16, int_mips_mul_ph, HasDSPR2>;
+def : DSPBinPat<MUL_PH, v2i16, mul, HasDSPR2>;
+def : DSPBinPat<ADDU_QB, v4i8, int_mips_addu_qb>;
+def : DSPBinPat<ADDU_QB, v4i8, add>;
+def : DSPBinPat<SUBU_QB, v4i8, int_mips_subu_qb>;
+def : DSPBinPat<SUBU_QB, v4i8, sub>;
+def : DSPBinPat<ADDSC, i32, int_mips_addsc>;
+def : DSPBinPat<ADDSC, i32, addc>;
+def : DSPBinPat<ADDWC, i32, int_mips_addwc>;
+def : DSPBinPat<ADDWC, i32, adde>;
+
+// Shift immediate patterns.
+class DSPShiftPat<Instruction Inst, ValueType ValTy, SDPatternOperator Node,
+                  SDPatternOperator Imm, Predicate Pred = HasDSP> :
+  DSPPat<(Node ValTy:$a, Imm:$shamt), (Inst ValTy:$a, Imm:$shamt), Pred>;
+
+def : DSPShiftPat<SHLL_PH, v2i16, MipsSHLL_DSP, imm>;
+def : DSPShiftPat<SHRA_PH, v2i16, MipsSHRA_DSP, imm>;
+def : DSPShiftPat<SHRL_PH, v2i16, MipsSHRL_DSP, imm, HasDSPR2>;
+def : DSPShiftPat<SHLL_PH, v2i16, int_mips_shll_ph, immZExt4>;
+def : DSPShiftPat<SHRA_PH, v2i16, int_mips_shra_ph, immZExt4>;
+def : DSPShiftPat<SHRL_PH, v2i16, int_mips_shrl_ph, immZExt4, HasDSPR2>;
+def : DSPShiftPat<SHLL_QB, v4i8, MipsSHLL_DSP, imm>;
+def : DSPShiftPat<SHRA_QB, v4i8, MipsSHRA_DSP, imm, HasDSPR2>;
+def : DSPShiftPat<SHRL_QB, v4i8, MipsSHRL_DSP, imm>;
+def : DSPShiftPat<SHLL_QB, v4i8, int_mips_shll_qb, immZExt3>;
+def : DSPShiftPat<SHRA_QB, v4i8, int_mips_shra_qb, immZExt3, HasDSPR2>;
+def : DSPShiftPat<SHRL_QB, v4i8, int_mips_shrl_qb, immZExt3>;
+
+// SETCC/SELECT_CC patterns.
+class DSPSetCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy,
+                  CondCode CC> :
+  DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)),
+         (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)),
+                      (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs)),
+                      (ValTy ZERO)))>;
+
+class DSPSetCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy,
+                     CondCode CC> :
+  DSPPat<(ValTy (MipsSETCC_DSP ValTy:$a, ValTy:$b, CC)),
+         (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)),
+                      (ValTy ZERO),
+                      (ValTy (COPY_TO_REGCLASS (ADDiu ZERO, -1), DSPRegs))))>;
+
+class DSPSelectCCPat<Instruction Cmp, Instruction Pick, ValueType ValTy,
+                     CondCode CC> :
+  DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)),
+         (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $c, $d))>;
+
+class DSPSelectCCPatInv<Instruction Cmp, Instruction Pick, ValueType ValTy,
+                        CondCode CC> :
+  DSPPat<(ValTy (MipsSELECT_CC_DSP ValTy:$a, ValTy:$b, ValTy:$c, ValTy:$d, CC)),
+         (ValTy (Pick (ValTy (Cmp ValTy:$a, ValTy:$b)), $d, $c))>;
+
+def : DSPSetCCPat<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETEQ>;
+def : DSPSetCCPat<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETLT>;
+def : DSPSetCCPat<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETLE>;
+def : DSPSetCCPatInv<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETNE>;
+def : DSPSetCCPatInv<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETGE>;
+def : DSPSetCCPatInv<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETGT>;
+def : DSPSetCCPat<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETEQ>;
+def : DSPSetCCPat<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETULT>;
+def : DSPSetCCPat<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETULE>;
+def : DSPSetCCPatInv<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETNE>;
+def : DSPSetCCPatInv<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETUGE>;
+def : DSPSetCCPatInv<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETUGT>;
+
+def : DSPSelectCCPat<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETEQ>;
+def : DSPSelectCCPat<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETLT>;
+def : DSPSelectCCPat<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETLE>;
+def : DSPSelectCCPatInv<PseudoCMP_EQ_PH, PseudoPICK_PH, v2i16, SETNE>;
+def : DSPSelectCCPatInv<PseudoCMP_LT_PH, PseudoPICK_PH, v2i16, SETGE>;
+def : DSPSelectCCPatInv<PseudoCMP_LE_PH, PseudoPICK_PH, v2i16, SETGT>;
+def : DSPSelectCCPat<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETEQ>;
+def : DSPSelectCCPat<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETULT>;
+def : DSPSelectCCPat<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETULE>;
+def : DSPSelectCCPatInv<PseudoCMPU_EQ_QB, PseudoPICK_QB, v4i8, SETNE>;
+def : DSPSelectCCPatInv<PseudoCMPU_LT_QB, PseudoPICK_QB, v4i8, SETUGE>;
+def : DSPSelectCCPatInv<PseudoCMPU_LE_QB, PseudoPICK_QB, v4i8, SETUGT>;
+
 // Extr patterns.
 class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> :
   DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)),
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 77b08cb..968e536 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -17,7 +17,6 @@
 #include "MipsSEISelDAGToDAG.h"
 #include "Mips.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
-#include "MipsAnalyzeImmediate.h"
 #include "MipsMachineFunction.h"
 #include "MipsRegisterInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index e2219f2..4d76181 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -30,7 +30,6 @@
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -198,6 +197,11 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::MADDU_DSP:         return "MipsISD::MADDU_DSP";
   case MipsISD::MSUB_DSP:          return "MipsISD::MSUB_DSP";
   case MipsISD::MSUBU_DSP:         return "MipsISD::MSUBU_DSP";
+  case MipsISD::SHLL_DSP:          return "MipsISD::SHLL_DSP";
+  case MipsISD::SHRA_DSP:          return "MipsISD::SHRA_DSP";
+  case MipsISD::SHRL_DSP:          return "MipsISD::SHRL_DSP";
+  case MipsISD::SETCC_DSP:         return "MipsISD::SETCC_DSP";
+  case MipsISD::SELECT_CC_DSP:     return "MipsISD::SELECT_CC_DSP";
   default:                         return NULL;
   }
 }
@@ -211,7 +215,7 @@ MipsTargetLowering(MipsTargetMachine &TM)
   // Mips does not have i1 type, so use i32 for
   // setcc operations results (slt, sgt, ...).
   setBooleanContents(ZeroOrOneBooleanContent);
-  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
   // Load extented operations for i1 types must be promoted
   setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
@@ -346,9 +350,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::VACOPY,            MVT::Other, Expand);
   setOperationAction(ISD::VAEND,             MVT::Other, Expand);
 
-  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
-  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
-
   // Use the default for now
   setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
@@ -449,7 +450,7 @@ static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
+static Mips::CondCode condCodeToFCC(ISD::CondCode CC) {
   switch (CC) {
   default: llvm_unreachable("Unknown fp condition code!");
   case ISD::SETEQ:
@@ -508,7 +509,7 @@ static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) {
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
 
   return DAG.getNode(MipsISD::FPCmp, DL, MVT::Glue, LHS, RHS,
-                     DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+                     DAG.getConstant(condCodeToFCC(CC), MVT::i32));
 }
 
 // Creates and returns a CMovFPT/F node.
@@ -712,10 +713,7 @@ void
 MipsTargetLowering::ReplaceNodeResults(SDNode *N,
                                        SmallVectorImpl<SDValue> &Results,
                                        SelectionDAG &DAG) const {
-  SDValue Res = LowerOperation(SDValue(N, 0), DAG);
-
-  for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
-    Results.push_back(Res.getValue(I));
+  return LowerOperationWrapper(N, Results, DAG);
 }
 
 SDValue MipsTargetLowering::
@@ -739,15 +737,12 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
   case ISD::FRAMEADDR:          return lowerFRAMEADDR(Op, DAG);
   case ISD::RETURNADDR:         return lowerRETURNADDR(Op, DAG);
   case ISD::EH_RETURN:          return lowerEH_RETURN(Op, DAG);
-  case ISD::MEMBARRIER:         return lowerMEMBARRIER(Op, DAG);
   case ISD::ATOMIC_FENCE:       return lowerATOMIC_FENCE(Op, DAG);
   case ISD::SHL_PARTS:          return lowerShiftLeftParts(Op, DAG);
   case ISD::SRA_PARTS:          return lowerShiftRightParts(Op, DAG, true);
   case ISD::SRL_PARTS:          return lowerShiftRightParts(Op, DAG, false);
   case ISD::LOAD:               return lowerLOAD(Op, DAG);
   case ISD::STORE:              return lowerSTORE(Op, DAG);
-  case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
-  case ISD::INTRINSIC_W_CHAIN:  return lowerINTRINSIC_W_CHAIN(Op, DAG);
   case ISD::ADD:                return lowerADD(Op, DAG);
   }
   return SDValue();
@@ -1827,15 +1822,6 @@ SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
                      Chain.getValue(1));
 }
 
-// TODO: set SType according to the desired memory barrier behavior.
-SDValue
-MipsTargetLowering::lowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const {
-  unsigned SType = 0;
-  DebugLoc DL = Op.getDebugLoc();
-  return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0),
-                     DAG.getConstant(SType, MVT::i32));
-}
-
 SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op,
                                               SelectionDAG &DAG) const {
   // FIXME: Need pseudo-fence for 'singlethread' fences
@@ -1918,7 +1904,7 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
   return DAG.getMergeValues(Ops, 2, DL);
 }
 
-static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
+static SDValue createLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
                             SDValue Chain, SDValue Src, unsigned Offset) {
   SDValue Ptr = LD->getBasePtr();
   EVT VT = LD->getValueType(0), MemVT = LD->getMemoryVT();
@@ -1958,15 +1944,15 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   //  (set tmp, (ldl (add baseptr, 7), undef))
   //  (set dst, (ldr baseptr, tmp))
   if ((VT == MVT::i64) && (ExtType == ISD::NON_EXTLOAD)) {
-    SDValue LDL = CreateLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef,
+    SDValue LDL = createLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef,
                                IsLittle ? 7 : 0);
-    return CreateLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL,
+    return createLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL,
                         IsLittle ? 0 : 7);
   }
 
-  SDValue LWL = CreateLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef,
+  SDValue LWL = createLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef,
                              IsLittle ? 3 : 0);
-  SDValue LWR = CreateLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL,
+  SDValue LWR = createLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL,
                              IsLittle ? 0 : 3);
 
   // Expand
@@ -1997,7 +1983,7 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getMergeValues(Ops, 2, DL);
 }
 
-static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
+static SDValue createStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
                              SDValue Chain, unsigned Offset) {
   SDValue Ptr = SD->getBasePtr(), Value = SD->getValue();
   EVT MemVT = SD->getMemoryVT(), BasePtrVT = Ptr.getValueType();
@@ -2034,9 +2020,9 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   //  (swl val, (add baseptr, 3))
   //  (swr val, baseptr)
   if ((VT == MVT::i32) || SD->isTruncatingStore()) {
-    SDValue SWL = CreateStoreLR(MipsISD::SWL, DAG, SD, Chain,
+    SDValue SWL = createStoreLR(MipsISD::SWL, DAG, SD, Chain,
                                 IsLittle ? 3 : 0);
-    return CreateStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3);
+    return createStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3);
   }
 
   assert(VT == MVT::i64);
@@ -2046,172 +2032,8 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   // to
   //  (sdl val, (add baseptr, 7))
   //  (sdr val, baseptr)
-  SDValue SDL = CreateStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0);
-  return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
-}
-
-static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) {
-  SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
-                             DAG.getConstant(0, MVT::i32));
-  SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
-                             DAG.getConstant(1, MVT::i32));
-  return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi);
-}
-
-static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) {
-  SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
-                           DAG.getConstant(Mips::sub_lo, MVT::i32));
-  SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
-                           DAG.getConstant(Mips::sub_hi, MVT::i32));
-  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
-}
-
-// This function expands mips intrinsic nodes which have 64-bit input operands
-// or output values.
-//
-// out64 = intrinsic-node in64
-// =>
-// lo = copy (extract-element (in64, 0))
-// hi = copy (extract-element (in64, 1))
-// mips-specific-node
-// v0 = copy lo
-// v1 = copy hi
-// out64 = merge-values (v0, v1)
-//
-static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
-  DebugLoc DL = Op.getDebugLoc();
-  bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
-  SmallVector<SDValue, 3> Ops;
-  unsigned OpNo = 0;
-
-  // See if Op has a chain input.
-  if (HasChainIn)
-    Ops.push_back(Op->getOperand(OpNo++));
-
-  // The next operand is the intrinsic opcode.
-  assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
-
-  // See if the next operand has type i64.
-  SDValue Opnd = Op->getOperand(++OpNo), In64;
-
-  if (Opnd.getValueType() == MVT::i64)
-    In64 = initAccumulator(Opnd, DL, DAG);
-  else
-    Ops.push_back(Opnd);
-
-  // Push the remaining operands.
-  for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
-    Ops.push_back(Op->getOperand(OpNo));
-
-  // Add In64 to the end of the list.
-  if (In64.getNode())
-    Ops.push_back(In64);
-
-  // Scan output.
-  SmallVector<EVT, 2> ResTys;
-
-  for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
-       I != E; ++I)
-    ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
-
-  // Create node.
-  SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size());
-  SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
-
-  if (!HasChainIn)
-    return Out;
-
-  assert(Val->getValueType(1) == MVT::Other);
-  SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
-  return DAG.getMergeValues(Vals, 2, DL);
-}
-
-SDValue MipsTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
-                                                    SelectionDAG &DAG) const {
-  switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
-  default:
-    return SDValue();
-  case Intrinsic::mips_shilo:
-    return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
-  case Intrinsic::mips_dpau_h_qbl:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
-  case Intrinsic::mips_dpau_h_qbr:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
-  case Intrinsic::mips_dpsu_h_qbl:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
-  case Intrinsic::mips_dpsu_h_qbr:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
-  case Intrinsic::mips_dpa_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
-  case Intrinsic::mips_dps_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
-  case Intrinsic::mips_dpax_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
-  case Intrinsic::mips_dpsx_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
-  case Intrinsic::mips_mulsa_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
-  case Intrinsic::mips_mult:
-    return lowerDSPIntr(Op, DAG, MipsISD::Mult);
-  case Intrinsic::mips_multu:
-    return lowerDSPIntr(Op, DAG, MipsISD::Multu);
-  case Intrinsic::mips_madd:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
-  case Intrinsic::mips_maddu:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
-  case Intrinsic::mips_msub:
-    return lowerDSPIntr(Op, DAG, MipsISD::MSub);
-  case Intrinsic::mips_msubu:
-    return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
-  }
-}
-
-SDValue MipsTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
-                                                   SelectionDAG &DAG) const {
-  switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
-  default:
-    return SDValue();
-  case Intrinsic::mips_extp:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
-  case Intrinsic::mips_extpdp:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
-  case Intrinsic::mips_extr_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
-  case Intrinsic::mips_extr_r_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
-  case Intrinsic::mips_extr_rs_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
-  case Intrinsic::mips_extr_s_h:
-    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
-  case Intrinsic::mips_mthlip:
-    return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
-  case Intrinsic::mips_mulsaq_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
-  case Intrinsic::mips_maq_s_w_phl:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
-  case Intrinsic::mips_maq_s_w_phr:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
-  case Intrinsic::mips_maq_sa_w_phl:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
-  case Intrinsic::mips_maq_sa_w_phr:
-    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
-  case Intrinsic::mips_dpaq_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
-  case Intrinsic::mips_dpsq_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
-  case Intrinsic::mips_dpaq_sa_l_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
-  case Intrinsic::mips_dpsq_sa_l_w:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
-  case Intrinsic::mips_dpaqx_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
-  case Intrinsic::mips_dpaqx_sa_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
-  case Intrinsic::mips_dpsqx_s_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
-  case Intrinsic::mips_dpsqx_sa_w_ph:
-    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
-  }
+  SDValue SDL = createStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0);
+  return createStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7);
 }
 
 SDValue MipsTargetLowering::lowerADD(SDValue Op, SelectionDAG &DAG) const {
@@ -3009,8 +2831,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
       return std::make_pair((unsigned)Mips::T9_64, &Mips::CPU64RegsRegClass);
     case 'l': // register suitable for indirect jump
       if (VT == MVT::i32)
-        return std::make_pair((unsigned)Mips::LO, &Mips::HILORegClass);
-      return std::make_pair((unsigned)Mips::LO64, &Mips::HILO64RegClass);
+        return std::make_pair((unsigned)Mips::LO, &Mips::LORegsRegClass);
+      return std::make_pair((unsigned)Mips::LO64, &Mips::LORegs64RegClass);
     case 'x': // register suitable for indirect jump
       // Fixme: Not triggering the use of both hi and low
       // This will generate an error message
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index cab71a6..5587e8f 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -143,6 +143,15 @@ namespace llvm {
       MSUB_DSP,
       MSUBU_DSP,
 
+      // DSP shift nodes.
+      SHLL_DSP,
+      SHRA_DSP,
+      SHRL_DSP,
+
+      // DSP setcc and select_cc nodes.
+      SETCC_DSP,
+      SELECT_CC_DSP,
+
       // Load/Store Left/Right nodes.
       LWL = ISD::FIRST_TARGET_MEMORY_OPCODE,
       LWR,
@@ -338,15 +347,12 @@ namespace llvm {
     SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
     SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
     SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const;
     SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG,
                                  bool IsSRA) const;
     SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const;
 
     /// isEligibleForTailCallOptimization - Check whether the call is eligible
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index ee432c8..ea07372 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -36,6 +36,24 @@ def FrmFR     : Format<4>;
 def FrmFI     : Format<5>;
 def FrmOther  : Format<6>; // Instruction w/ a custom format
 
+class MMRel;
+
+def Std2MicroMips : InstrMapping {
+  let FilterClass = "MMRel";
+  // Instructions with the same BaseOpcode and isNVStore values form a row.
+  let RowFields = ["BaseOpcode"];
+  // Instructions with the same predicate sense form a column.
+  let ColFields = ["Arch"];
+  // The key column is the unpredicated instructions.
+  let KeyCol = ["se"];
+  // Value columns are PredSense=true and PredSense=false
+  let ValueCols = [["se"], ["micromips"]];
+}
+
+class StdArch {
+  string Arch = "se";
+}
+
 // Generic Mips Format
 class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
                InstrItinClass itin, Format f>: Instruction
@@ -74,9 +92,11 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
 
 // Mips32/64 Instruction Format
 class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
-             InstrItinClass itin, Format f>:
+             InstrItinClass itin, Format f, string opstr = ""> :
   MipsInst<outs, ins, asmstr, pattern, itin, f> {
   let Predicates = [HasStdEnc];
+  string BaseOpcode = opstr;
+  string Arch;
 }
 
 // Mips Pseudo Instructions Format
@@ -192,7 +212,7 @@ class MFC3OP_FM<bits<6> op, bits<5> mfmt>
   let Inst{2-0}   = sel;
 }
 
-class ADD_FM<bits<6> op, bits<6> funct> {
+class ADD_FM<bits<6> op, bits<6> funct> : StdArch {
   bits<5> rd;
   bits<5> rs;
   bits<5> rt;
@@ -207,7 +227,7 @@ class ADD_FM<bits<6> op, bits<6> funct> {
   let Inst{5-0}   = funct;
 }
 
-class ADDI_FM<bits<6> op> {
+class ADDI_FM<bits<6> op> : StdArch {
   bits<5>  rs;
   bits<5>  rt;
   bits<16> imm16;
@@ -220,7 +240,7 @@ class ADDI_FM<bits<6> op> {
   let Inst{15-0}  = imm16;
 }
 
-class SRA_FM<bits<6> funct, bit rotate> {
+class SRA_FM<bits<6> funct, bit rotate> : StdArch {
   bits<5> rd;
   bits<5> rt;
   bits<5> shamt;
@@ -236,7 +256,7 @@ class SRA_FM<bits<6> funct, bit rotate> {
   let Inst{5-0}   = funct;
 }
 
-class SRLV_FM<bits<6> funct, bit rotate> {
+class SRLV_FM<bits<6> funct, bit rotate> : StdArch {
   bits<5> rd;
   bits<5> rt;
   bits<5> rs;
@@ -288,7 +308,7 @@ class B_FM {
   let Inst{15-0}  = offset;
 }
 
-class SLTI_FM<bits<6> op> {
+class SLTI_FM<bits<6> op> : StdArch {
   bits<5> rt;
   bits<5> rs;
   bits<16> imm16;
@@ -413,7 +433,7 @@ class SYNC_FM {
   let Inst{5-0}   = 0xf;
 }
 
-class MULT_FM<bits<6> op, bits<6> funct> {
+class MULT_FM<bits<6> op, bits<6> funct> : StdArch {
   bits<5>  rs;
   bits<5>  rt;
 
@@ -529,7 +549,7 @@ class MFC1_FM<bits<5> funct> {
   let Inst{10-0}  = 0;
 }
 
-class LW_FM<bits<6> op> {
+class LW_FM<bits<6> op> : StdArch {
   bits<5> rt;
   bits<21> addr;
 
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 3a82e81..86ec729 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -179,6 +179,7 @@ def NoNaNsFPMath :    Predicate<"TM.Options.NoNaNsFPMath">,
                       AssemblerPredicate<"FeatureMips32">;
 def HasStdEnc :       Predicate<"Subtarget.hasStandardEncoding()">,
                       AssemblerPredicate<"!FeatureMips16">;
+def NotDSP :          Predicate<"!Subtarget.hasDSP()">;
 
 class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
   let Predicates = [HasStdEnc];
@@ -374,11 +375,9 @@ class ArithLogicR<string opstr, RegisterOperand RO, bit isComm = 0,
                   SDPatternOperator OpNode = null_frag>:
   InstSE<(outs RO:$rd), (ins RO:$rs, RO:$rt),
          !strconcat(opstr, "\t$rd, $rs, $rt"),
-         [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR> {
+         [(set RO:$rd, (OpNode RO:$rs, RO:$rt))], Itin, FrmR, opstr> {
   let isCommutable = isComm;
   let isReMaterializable = 1;
-  string BaseOpcode;
-  string Arch;
 }
 
 // Arithmetic and logical instructions with 2 register operands.
@@ -387,7 +386,8 @@ class ArithLogicI<string opstr, Operand Od, RegisterOperand RO,
                   SDPatternOperator OpNode = null_frag> :
   InstSE<(outs RO:$rt), (ins RO:$rs, Od:$imm16),
          !strconcat(opstr, "\t$rt, $rs, $imm16"),
-         [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))], IIAlu, FrmI> {
+         [(set RO:$rt, (OpNode RO:$rs, imm_type:$imm16))],
+         IIAlu, FrmI, opstr> {
   let isReMaterializable = 1;
 }
 
@@ -404,7 +404,7 @@ class MArithR<string opstr, bit isComm = 0> :
 class LogicNOR<string opstr, RegisterOperand RC>:
   InstSE<(outs RC:$rd), (ins RC:$rs, RC:$rt),
          !strconcat(opstr, "\t$rd, $rs, $rt"),
-         [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR> {
+         [(set RC:$rd, (not (or RC:$rs, RC:$rt)))], IIAlu, FrmR, opstr> {
   let isCommutable = 1;
 }
 
@@ -414,13 +414,13 @@ class shift_rotate_imm<string opstr, Operand ImmOpnd,
                        SDPatternOperator PF = null_frag> :
   InstSE<(outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt),
          !strconcat(opstr, "\t$rd, $rt, $shamt"),
-         [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR>;
+         [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu, FrmR, opstr>;
 
 class shift_rotate_reg<string opstr, RegisterOperand RC,
                        SDPatternOperator OpNode = null_frag>:
   InstSE<(outs RC:$rd), (ins CPURegsOpnd:$rs, RC:$rt),
          !strconcat(opstr, "\t$rd, $rt, $rs"),
-         [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR>;
+         [(set RC:$rd, (OpNode RC:$rt, CPURegsOpnd:$rs))], IIAlu, FrmR, opstr>;
 
 // Load Upper Imediate
 class LoadUpper<string opstr, RegisterClass RC, Operand Imm>:
@@ -440,18 +440,20 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
 
 // Memory Load/Store
 class Load<string opstr, SDPatternOperator OpNode, RegisterClass RC,
-           Operand MemOpnd, ComplexPattern Addr> :
+           Operand MemOpnd, ComplexPattern Addr, string ofsuffix> :
   InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
-         [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI> {
+         [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI,
+         !strconcat(opstr, ofsuffix)> {
   let DecoderMethod = "DecodeMem";
   let canFoldAsLoad = 1;
   let mayLoad = 1;
 }
 
 class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC,
-            Operand MemOpnd, ComplexPattern Addr> :
+            Operand MemOpnd, ComplexPattern Addr, string ofsuffix> :
   InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"),
-         [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI> {
+         [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI,
+         !strconcat(opstr, ofsuffix)> {
   let DecoderMethod = "DecodeMem";
   let mayStore = 1;
 }
@@ -459,8 +461,9 @@ class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC,
 multiclass LoadM<string opstr, RegisterClass RC,
                  SDPatternOperator OpNode = null_frag,
                  ComplexPattern Addr = addr> {
-  def NAME : Load<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
-  def _P8  : Load<opstr, OpNode, RC, mem64, Addr>,
+  def NAME : Load<opstr, OpNode, RC, mem, Addr, "">,
+             Requires<[NotN64, HasStdEnc]>;
+  def _P8  : Load<opstr, OpNode, RC, mem64, Addr, "_p8">,
              Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
@@ -470,8 +473,9 @@ multiclass LoadM<string opstr, RegisterClass RC,
 multiclass StoreM<string opstr, RegisterClass RC,
                   SDPatternOperator OpNode = null_frag,
                   ComplexPattern Addr = addr> {
-  def NAME : Store<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>;
-  def _P8  : Store<opstr, OpNode, RC, mem64, Addr>,
+  def NAME : Store<opstr, OpNode, RC, mem, Addr, "">,
+             Requires<[NotN64, HasStdEnc]>;
+  def _P8  : Store<opstr, OpNode, RC, mem64, Addr, "_p8">,
              Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
@@ -542,14 +546,15 @@ class CBranchZero<string opstr, PatFrag cond_op, RegisterClass RC> :
 class SetCC_R<string opstr, PatFrag cond_op, RegisterClass RC> :
   InstSE<(outs CPURegsOpnd:$rd), (ins RC:$rs, RC:$rt),
          !strconcat(opstr, "\t$rd, $rs, $rt"),
-         [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))], IIAlu, FrmR>;
+         [(set CPURegsOpnd:$rd, (cond_op RC:$rs, RC:$rt))],
+         IIAlu, FrmR, opstr>;
 
 class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
               RegisterClass RC>:
   InstSE<(outs CPURegsOpnd:$rt), (ins RC:$rs, Od:$imm16),
          !strconcat(opstr, "\t$rt, $rs, $imm16"),
          [(set CPURegsOpnd:$rt, (cond_op RC:$rs, imm_type:$imm16))],
-         IIAlu, FrmI>;
+         IIAlu, FrmI, opstr>;
 
 // Jump
 class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
@@ -636,7 +641,7 @@ class SYNC_FT :
 class Mult<string opstr, InstrItinClass itin, RegisterOperand RO,
            list<Register> DefRegs> :
   InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$rs, $rt"), [],
-         itin, FrmR> {
+         itin, FrmR, opstr> {
   let isCommutable = 1;
   let Defs = DefRegs;
   let neverHasSideEffects = 1;
@@ -832,14 +837,12 @@ let usesCustomInserter = 1 in {
   defm ATOMIC_CMP_SWAP_I32  : AtomicCmpSwap32<atomic_cmp_swap_32>;
 }
 
-/// Pseudo instructions for loading, storing and copying accumulator registers.
+/// Pseudo instructions for loading and storing accumulator registers.
 let isPseudo = 1 in {
   defm LOAD_AC64  : LoadM<"load_ac64", ACRegs>;
   defm STORE_AC64 : StoreM<"store_ac64", ACRegs>;
 }
 
-def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>;
-
 //===----------------------------------------------------------------------===//
 // Instruction definition
 //===----------------------------------------------------------------------===//
@@ -848,60 +851,70 @@ def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>;
 //===----------------------------------------------------------------------===//
 
 /// Arithmetic Instructions (ALU Immediate)
-def ADDiu : ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>,
+def ADDiu : MMRel, ArithLogicI<"addiu", simm16, CPURegsOpnd, immSExt16, add>,
             ADDI_FM<0x9>, IsAsCheapAsAMove;
-def ADDi  : ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>;
-def SLTi  : SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>, SLTI_FM<0xa>;
-def SLTiu : SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>, SLTI_FM<0xb>;
-def ANDi  : ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
+def ADDi  : MMRel, ArithLogicI<"addi", simm16, CPURegsOpnd>, ADDI_FM<0x8>;
+def SLTi  : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, CPURegs>,
+            SLTI_FM<0xa>;
+def SLTiu : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, CPURegs>,
+            SLTI_FM<0xb>;
+def ANDi  : MMRel, ArithLogicI<"andi", uimm16, CPURegsOpnd, immZExt16, and>,
             ADDI_FM<0xc>;
-def ORi   : ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
+def ORi   : MMRel, ArithLogicI<"ori", uimm16, CPURegsOpnd, immZExt16, or>,
             ADDI_FM<0xd>;
-def XORi  : ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
+def XORi  : MMRel, ArithLogicI<"xori", uimm16, CPURegsOpnd, immZExt16, xor>,
             ADDI_FM<0xe>;
-def LUi   : LoadUpper<"lui", CPURegs, uimm16>, LUI_FM;
+def LUi   : MMRel, LoadUpper<"lui", CPURegs, uimm16>, LUI_FM;
 
 /// Arithmetic Instructions (3-Operand, R-Type)
-def ADDu : ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>, ADD_FM<0, 0x21>;
-def SUBu : ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>, ADD_FM<0, 0x23>;
-def MUL  : ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>, ADD_FM<0x1c, 2>;
-def ADD  : ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>;
-def SUB  : ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>;
-def SLT  : SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>;
-def SLTu : SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>;
-def AND  : ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>, ADD_FM<0, 0x24>;
-def OR   : ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>, ADD_FM<0, 0x25>;
-def XOR  : ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>, ADD_FM<0, 0x26>;
-def NOR  : LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>;
+def ADDu  : MMRel, ArithLogicR<"addu", CPURegsOpnd, 1, IIAlu, add>,
+            ADD_FM<0, 0x21>;
+def SUBu  : MMRel, ArithLogicR<"subu", CPURegsOpnd, 0, IIAlu, sub>,
+            ADD_FM<0, 0x23>;
+def MUL   : MMRel, ArithLogicR<"mul", CPURegsOpnd, 1, IIImul, mul>,
+            ADD_FM<0x1c, 2>;
+def ADD   : MMRel, ArithLogicR<"add", CPURegsOpnd>, ADD_FM<0, 0x20>;
+def SUB   : MMRel, ArithLogicR<"sub", CPURegsOpnd>, ADD_FM<0, 0x22>;
+def SLT   : MMRel, SetCC_R<"slt", setlt, CPURegs>, ADD_FM<0, 0x2a>;
+def SLTu  : MMRel, SetCC_R<"sltu", setult, CPURegs>, ADD_FM<0, 0x2b>;
+def AND   : MMRel, ArithLogicR<"and", CPURegsOpnd, 1, IIAlu, and>,
+            ADD_FM<0, 0x24>;
+def OR    : MMRel, ArithLogicR<"or", CPURegsOpnd, 1, IIAlu, or>,
+            ADD_FM<0, 0x25>;
+def XOR   : MMRel, ArithLogicR<"xor", CPURegsOpnd, 1, IIAlu, xor>,
+            ADD_FM<0, 0x26>;
+def NOR   : MMRel, LogicNOR<"nor", CPURegsOpnd>, ADD_FM<0, 0x27>;
 
 /// Shift Instructions
-def SLL  : shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>,
+def SLL  : MMRel, shift_rotate_imm<"sll", shamt, CPURegsOpnd, shl, immZExt5>,
            SRA_FM<0, 0>;
-def SRL  : shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>,
+def SRL  : MMRel, shift_rotate_imm<"srl", shamt, CPURegsOpnd, srl, immZExt5>,
            SRA_FM<2, 0>;
-def SRA  : shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>,
+def SRA  : MMRel, shift_rotate_imm<"sra", shamt, CPURegsOpnd, sra, immZExt5>,
            SRA_FM<3, 0>;
-def SLLV : shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>;
-def SRLV : shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>;
-def SRAV : shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>;
+def SLLV : MMRel, shift_rotate_reg<"sllv", CPURegsOpnd, shl>, SRLV_FM<4, 0>;
+def SRLV : MMRel, shift_rotate_reg<"srlv", CPURegsOpnd, srl>, SRLV_FM<6, 0>;
+def SRAV : MMRel, shift_rotate_reg<"srav", CPURegsOpnd, sra>, SRLV_FM<7, 0>;
 
 // Rotate Instructions
 let Predicates = [HasMips32r2, HasStdEnc] in {
-  def ROTR  : shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr, immZExt5>,
+  def ROTR  : MMRel, shift_rotate_imm<"rotr", shamt, CPURegsOpnd, rotr,
+                                      immZExt5>,
               SRA_FM<2, 1>;
-  def ROTRV : shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>, SRLV_FM<6, 1>;
+  def ROTRV : MMRel, shift_rotate_reg<"rotrv", CPURegsOpnd, rotr>,
+              SRLV_FM<6, 1>;
 }
 
 /// Load and Store Instructions
 ///  aligned
-defm LB  : LoadM<"lb", CPURegs, sextloadi8>, LW_FM<0x20>;
-defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, LW_FM<0x24>;
-defm LH  : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, LW_FM<0x21>;
-defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, LW_FM<0x25>;
-defm LW  : LoadM<"lw", CPURegs, load, addrDefault>, LW_FM<0x23>;
-defm SB  : StoreM<"sb", CPURegs, truncstorei8>, LW_FM<0x28>;
-defm SH  : StoreM<"sh", CPURegs, truncstorei16>, LW_FM<0x29>;
-defm SW  : StoreM<"sw", CPURegs, store>, LW_FM<0x2b>;
+defm LB  : LoadM<"lb", CPURegs, sextloadi8>, MMRel, LW_FM<0x20>;
+defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, MMRel, LW_FM<0x24>;
+defm LH  : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, MMRel, LW_FM<0x21>;
+defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, MMRel, LW_FM<0x25>;
+defm LW  : LoadM<"lw", CPURegs, load, addrDefault>, MMRel, LW_FM<0x23>;
+defm SB  : StoreM<"sb", CPURegs, truncstorei8>, MMRel, LW_FM<0x28>;
+defm SH  : StoreM<"sh", CPURegs, truncstorei16>, MMRel, LW_FM<0x29>;
+defm SW  : StoreM<"sw", CPURegs, store>, MMRel, LW_FM<0x2b>;
 
 /// load/store left/right
 defm LWL : LoadLeftRightM<"lwl", MipsLWL, CPURegs>, LW_FM<0x22>;
@@ -968,8 +981,10 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in {
 }
 
 /// Multiply and Divide Instructions.
-def MULT  : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>;
-def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>;
+def MULT  : MMRel, Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>,
+            MULT_FM<0, 0x18>;
+def MULTu : MMRel, Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>,
+            MULT_FM<0, 0x19>;
 def PseudoMULT  : MultDivPseudo<MULT, ACRegs, CPURegsOpnd, MipsMult, IIImul>;
 def PseudoMULTu : MultDivPseudo<MULTu, ACRegs, CPURegsOpnd, MipsMultu, IIImul>;
 def SDIV  : Div<"div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>;
@@ -1066,10 +1081,10 @@ def : InstAlias<"negu $rt, $rs",
 def : InstAlias<"slt $rs, $rt, $imm",
                 (SLTi CPURegsOpnd:$rs, CPURegs:$rt, simm16:$imm), 0>;
 def : InstAlias<"xor $rs, $rt, $imm",
-                (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+                (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, uimm16:$imm), 1>,
       Requires<[NotMips64]>;
 def : InstAlias<"or $rs, $rt, $imm",
-                (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>,
+                (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, uimm16:$imm), 1>,
                  Requires<[NotMips64]>;
 def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
 def : InstAlias<"mfc0 $rt, $rd",
@@ -1128,10 +1143,12 @@ def : MipsPat<(i32 imm:$imm),
 // Carry MipsPatterns
 def : MipsPat<(subc CPURegs:$lhs, CPURegs:$rhs),
               (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
-def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs),
-              (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
-def : MipsPat<(addc  CPURegs:$src, immSExt16:$imm),
-              (ADDiu CPURegs:$src, imm:$imm)>;
+let Predicates = [HasStdEnc, NotDSP] in {
+  def : MipsPat<(addc CPURegs:$lhs, CPURegs:$rhs),
+                (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
+  def : MipsPat<(addc  CPURegs:$src, immSExt16:$imm),
+                (ADDiu CPURegs:$src, imm:$imm)>;
+}
 
 // Call
 def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
@@ -1326,3 +1343,6 @@ include "Mips16InstrInfo.td"
 include "MipsDSPInstrFormats.td"
 include "MipsDSPInstrInfo.td"
 
+// Micromips
+include "MicroMipsInstrFormats.td"
+include "MicroMipsInstrInfo.td"
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 2efe534..bf5ad37 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -399,6 +399,8 @@ static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
 }
 
 bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
+  if (TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+    return false;
   if ((TM.getRelocationModel() == Reloc::PIC_) &&
       TM.getSubtarget<MipsSubtarget>().isABI_O32() &&
       F.getInfo<MipsFunctionInfo>()->globalBaseRegSet())
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
new file mode 100644
index 0000000..c6abf17
--- /dev/null
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -0,0 +1,34 @@
+//===----------------------------------------------------------------------===//
+// Instruction Selector Subtarget Control
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// This file defines a pass used to change the subtarget for the
+// Mips Instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsISelDAGToDAG.h"
+#include "MipsModuleISelDAGToDAG.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(errs() << "In MipsModuleDAGToDAGISel::runMachineFunction\n");
+  const_cast<MipsSubtarget&>(Subtarget).resetSubtarget(&MF);
+  return false;
+}
+
+char MipsModuleDAGToDAGISel::ID = 0;
+
+}
+
+
+llvm::FunctionPass *llvm::createMipsModuleISelDag(MipsTargetMachine &TM) {
+  return new MipsModuleDAGToDAGISel(TM);
+}
+
+
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.h b/lib/Target/Mips/MipsModuleISelDAGToDAG.h
new file mode 100644
index 0000000..fda35ae
--- /dev/null
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.h
@@ -0,0 +1,66 @@
+//===---- MipsModuleISelDAGToDAG.h -  Change Subtarget             --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pass used to change the subtarget for the
+// Mips Instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMODULEISELDAGTODAG_H
+#define MIPSMODULEISELDAGTODAG_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsModuleDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace llvm {
+
+class MipsModuleDAGToDAGISel : public MachineFunctionPass {
+public:
+
+  static char ID;
+
+  explicit MipsModuleDAGToDAGISel(MipsTargetMachine &TM_)
+    : MachineFunctionPass(ID),
+      TM(TM_), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+
+  // Pass Name
+  virtual const char *getPassName() const {
+    return "MIPS DAG->DAG Pattern Instruction Selection";
+  }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  virtual SDNode *Select(SDNode *N) {
+    llvm_unreachable("unexpected");
+  }
+
+protected:
+  /// Keep a pointer to the MipsSubtarget around so that we can make the right
+  /// decision when generating code for different targets.
+  const TargetMachine &TM;
+  const MipsSubtarget &Subtarget;
+};
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *createMipsModuleISelDag(MipsTargetMachine &TM);
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
new file mode 100644
index 0000000..1919077
--- /dev/null
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -0,0 +1,113 @@
+//===---- MipsOs16.cpp for Mips Option -Os16                       --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an optimization phase for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-os16"
+#include "MipsOs16.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace {
+
+  // Figure out if we need float point based on the function signature.
+  // We need to move variables in and/or out of floating point
+  // registers because of the ABI
+  //
+  bool needsFPFromSig(Function &F) {
+    Type* RetType = F.getReturnType();
+    switch (RetType->getTypeID()) {
+    case Type::FloatTyID:
+    case Type::DoubleTyID:
+      return true;
+    default:
+      ;
+    }
+    if (F.arg_size() >=1) {
+      Argument &Arg = F.getArgumentList().front();
+      switch (Arg.getType()->getTypeID()) {
+        case Type::FloatTyID:
+        case Type::DoubleTyID:
+          return true;
+        default:
+          ;
+      }
+    }
+    return false;
+  }
+
+  // Figure out if the function will need floating point operations
+  //
+  bool needsFP(Function &F) {
+    if (needsFPFromSig(F))
+      return true;
+    for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+        const Instruction &Inst = *I;
+        switch (Inst.getOpcode()) {
+        case Instruction::FAdd:
+        case Instruction::FSub:
+        case Instruction::FMul:
+        case Instruction::FDiv:
+        case Instruction::FRem:
+        case Instruction::FPToUI:
+        case Instruction::FPToSI:
+        case Instruction::UIToFP:
+        case Instruction::SIToFP:
+        case Instruction::FPTrunc:
+        case Instruction::FPExt:
+        case Instruction::FCmp:
+          return true;
+        default:
+          ;
+        }
+        if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+          DEBUG(dbgs() << "Working on call" << "\n");
+          Function &F_ =  *CI->getCalledFunction();
+          if (needsFPFromSig(F_))
+            return true;
+        }
+      }
+    return false;
+  }
+}
+namespace llvm {
+
+
+bool MipsOs16::runOnModule(Module &M) {
+  DEBUG(errs() << "Run on Module MipsOs16\n");
+  bool modified = false;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    DEBUG(dbgs() << "Working on " << F->getName() << "\n");
+    if (needsFP(*F)) {
+      DEBUG(dbgs() << " need to compile as nomips16 \n");
+      F->addFnAttr("nomips16");
+    }
+    else {
+      F->addFnAttr("mips16");
+      DEBUG(dbgs() << " no need to compile as nomips16 \n");
+    }
+  }
+  return modified;
+}
+
+char MipsOs16::ID = 0;
+
+}
+
+ModulePass *llvm::createMipsOs16(MipsTargetMachine &TM) {
+  return new MipsOs16;
+}
+
+
diff --git a/lib/Target/Mips/MipsOs16.h b/lib/Target/Mips/MipsOs16.h
new file mode 100644
index 0000000..21beef8
--- /dev/null
+++ b/lib/Target/Mips/MipsOs16.h
@@ -0,0 +1,49 @@
+//===---- MipsOs16.h for Mips Option -Os16                         --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an optimization phase for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "MipsTargetMachine.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
+
+
+
+#ifndef MIPSOS16_H
+#define MIPSOS16_H
+
+using namespace llvm;
+
+namespace llvm {
+
+class MipsOs16 : public ModulePass {
+
+public:
+  static char ID;
+
+  MipsOs16() : ModulePass(ID) {
+
+  }
+
+  virtual const char *getPassName() const {
+    return "MIPS Os16 Optimization";
+  }
+
+  virtual bool runOnModule(Module &M);
+
+};
+
+ModulePass *createMipsOs16(MipsTargetMachine &TM);
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 3250733..dead07b 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -145,7 +145,11 @@ getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(Mips::HWR29_64);
 
   // Reserve DSP control register.
-  Reserved.set(Mips::DSPCtrl);
+  Reserved.set(Mips::DSPPos);
+  Reserved.set(Mips::DSPSCount);
+  Reserved.set(Mips::DSPCarry);
+  Reserved.set(Mips::DSPEFI);
+  Reserved.set(Mips::DSPOutFlag);
 
   // Reserve RA if in mips16 mode.
   if (Subtarget.inMips16Mode()) {
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 64458bc..229f167 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -16,6 +16,11 @@ def sub_fpodd  : SubRegIndex;
 def sub_32     : SubRegIndex;
 def sub_lo     : SubRegIndex;
 def sub_hi     : SubRegIndex;
+def sub_dsp16_19 : SubRegIndex;
+def sub_dsp20    : SubRegIndex;
+def sub_dsp21    : SubRegIndex;
+def sub_dsp22    : SubRegIndex;
+def sub_dsp23    : SubRegIndex;
 }
 
 class Unallocatable {
@@ -229,14 +234,14 @@ let Namespace = "Mips" in {
   def D31_64  : AFPR64<31, "f31", [F31]>, DwarfRegNum<[63]>;
 
   // Hi/Lo registers
-  def HI  : Register<"hi">, DwarfRegNum<[64]>;
-  def HI1 : Register<"hi1">, DwarfRegNum<[176]>;
-  def HI2 : Register<"hi2">, DwarfRegNum<[178]>;
-  def HI3 : Register<"hi3">, DwarfRegNum<[180]>;
-  def LO  : Register<"lo">, DwarfRegNum<[65]>;
-  def LO1 : Register<"lo1">, DwarfRegNum<[177]>;
-  def LO2 : Register<"lo2">, DwarfRegNum<[179]>;
-  def LO3 : Register<"lo3">, DwarfRegNum<[181]>;
+  def HI  : Register<"ac0">, DwarfRegNum<[64]>;
+  def HI1 : Register<"ac1">, DwarfRegNum<[176]>;
+  def HI2 : Register<"ac2">, DwarfRegNum<[178]>;
+  def HI3 : Register<"ac3">, DwarfRegNum<[180]>;
+  def LO  : Register<"ac0">, DwarfRegNum<[65]>;
+  def LO1 : Register<"ac1">, DwarfRegNum<[177]>;
+  def LO2 : Register<"ac2">, DwarfRegNum<[179]>;
+  def LO3 : Register<"ac3">, DwarfRegNum<[181]>;
 
   let SubRegIndices = [sub_32] in {
   def HI64  : RegisterWithSubRegs<"hi", [HI]>;
@@ -264,7 +269,23 @@ let Namespace = "Mips" in {
 
   def AC0_64 : ACC<0, "ac0", [LO64, HI64]>;
 
-  def DSPCtrl : Register<"dspctrl">;
+  // DSP-ASE control register fields.
+  def DSPPos : Register<"">;
+  def DSPSCount : Register<"">;
+  def DSPCarry : Register<"">;
+  def DSPEFI : Register<"">;
+  def DSPOutFlag16_19 : Register<"">;
+  def DSPOutFlag20 : Register<"">;
+  def DSPOutFlag21 : Register<"">;
+  def DSPOutFlag22 : Register<"">;
+  def DSPOutFlag23 : Register<"">;
+  def DSPCCond : Register<"">;
+
+  let SubRegIndices = [sub_dsp16_19, sub_dsp20, sub_dsp21, sub_dsp22,
+                       sub_dsp23] in
+  def DSPOutFlag : RegisterWithSubRegs<"", [DSPOutFlag16_19, DSPOutFlag20,
+                                            DSPOutFlag21, DSPOutFlag22,
+                                            DSPOutFlag23]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -340,8 +361,12 @@ def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
 def CCR  : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable;
 
 // Hi/Lo Registers
-def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable;
-def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable;
+def LORegs : RegisterClass<"Mips", [i32], 32, (add LO)>;
+def HIRegs : RegisterClass<"Mips", [i32], 32, (add HI)>;
+def LORegsDSP : RegisterClass<"Mips", [i32], 32, (add LO, LO1, LO2, LO3)>;
+def HIRegsDSP : RegisterClass<"Mips", [i32], 32, (add HI, HI1, HI2, HI3)>;
+def LORegs64 : RegisterClass<"Mips", [i64], 64, (add LO64)>;
+def HIRegs64 : RegisterClass<"Mips", [i64], 64, (add HI64)>;
 
 // Hardware registers
 def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable;
@@ -360,6 +385,9 @@ def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> {
   let Size = 64;
 }
 
+def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>;
+
+// Register Operands.
 def CPURegsAsmOperand : AsmOperandClass {
   let Name = "CPURegsAsm";
   let ParserMethod = "parseCPURegs";
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 68ec921..b295e91 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -32,17 +32,21 @@ using namespace llvm;
 namespace {
 typedef MachineBasicBlock::iterator Iter;
 
-/// Helper class to expand accumulator pseudos.
-class ExpandACCPseudo {
+/// Helper class to expand pseudos.
+class ExpandPseudo {
 public:
-  ExpandACCPseudo(MachineFunction &MF);
+  ExpandPseudo(MachineFunction &MF);
   bool expand();
 
 private:
   bool expandInstr(MachineBasicBlock &MBB, Iter I);
-  void expandLoad(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
-  void expandStore(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
-  void expandCopy(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+  void expandLoadCCond(MachineBasicBlock &MBB, Iter I);
+  void expandStoreCCond(MachineBasicBlock &MBB, Iter I);
+  void expandLoadACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+  void expandStoreACC(MachineBasicBlock &MBB, Iter I, unsigned RegSize);
+  bool expandCopy(MachineBasicBlock &MBB, Iter I);
+  bool expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst,
+                     unsigned Src, unsigned RegSize);
 
   MachineFunction &MF;
   const MipsSEInstrInfo &TII;
@@ -51,12 +55,12 @@ private:
 };
 }
 
-ExpandACCPseudo::ExpandACCPseudo(MachineFunction &MF_)
+ExpandPseudo::ExpandPseudo(MachineFunction &MF_)
   : MF(MF_),
     TII(*static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo())),
     RegInfo(TII.getRegisterInfo()), MRI(MF.getRegInfo()) {}
 
-bool ExpandACCPseudo::expand() {
+bool ExpandPseudo::expand() {
   bool Expanded = false;
 
   for (MachineFunction::iterator BB = MF.begin(), BBEnd = MF.end();
@@ -67,34 +71,39 @@ bool ExpandACCPseudo::expand() {
   return Expanded;
 }
 
-bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
+bool ExpandPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
   switch(I->getOpcode()) {
+  case Mips::LOAD_CCOND_DSP:
+  case Mips::LOAD_CCOND_DSP_P8:
+    expandLoadCCond(MBB, I);
+    break;
+  case Mips::STORE_CCOND_DSP:
+  case Mips::STORE_CCOND_DSP_P8:
+    expandStoreCCond(MBB, I);
+    break;
   case Mips::LOAD_AC64:
   case Mips::LOAD_AC64_P8:
   case Mips::LOAD_AC_DSP:
   case Mips::LOAD_AC_DSP_P8:
-    expandLoad(MBB, I, 4);
+    expandLoadACC(MBB, I, 4);
     break;
   case Mips::LOAD_AC128:
   case Mips::LOAD_AC128_P8:
-    expandLoad(MBB, I, 8);
+    expandLoadACC(MBB, I, 8);
     break;
   case Mips::STORE_AC64:
   case Mips::STORE_AC64_P8:
   case Mips::STORE_AC_DSP:
   case Mips::STORE_AC_DSP_P8:
-    expandStore(MBB, I, 4);
+    expandStoreACC(MBB, I, 4);
     break;
   case Mips::STORE_AC128:
   case Mips::STORE_AC128_P8:
-    expandStore(MBB, I, 8);
+    expandStoreACC(MBB, I, 8);
     break;
-  case Mips::COPY_AC64:
-  case Mips::COPY_AC_DSP:
-    expandCopy(MBB, I, 4);
-    break;
-  case Mips::COPY_AC128:
-    expandCopy(MBB, I, 8);
+  case TargetOpcode::COPY:
+    if (!expandCopy(MBB, I))
+      return false;
     break;
   default:
     return false;
@@ -104,7 +113,37 @@ bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) {
   return true;
 }
 
-void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I,
+void ExpandPseudo::expandLoadCCond(MachineBasicBlock &MBB, Iter I) {
+  //  load $vr, FI
+  //  copy ccond, $vr
+
+  assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+  const TargetRegisterClass *RC = RegInfo.intRegClass(4);
+  unsigned VR = MRI.createVirtualRegister(RC);
+  unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+
+  TII.loadRegFromStack(MBB, I, VR, FI, RC, &RegInfo, 0);
+  BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), Dst)
+    .addReg(VR, RegState::Kill);
+}
+
+void ExpandPseudo::expandStoreCCond(MachineBasicBlock &MBB, Iter I) {
+  //  copy $vr, ccond
+  //  store $vr, FI
+
+  assert(I->getOperand(0).isReg() && I->getOperand(1).isFI());
+
+  const TargetRegisterClass *RC = RegInfo.intRegClass(4);
+  unsigned VR = MRI.createVirtualRegister(RC);
+  unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex();
+
+  BuildMI(MBB, I, I->getDebugLoc(), TII.get(TargetOpcode::COPY), VR)
+    .addReg(Src, getKillRegState(I->getOperand(0).isKill()));
+  TII.storeRegToStack(MBB, I, VR, true, FI, RC, &RegInfo, 0);
+}
+
+void ExpandPseudo::expandLoadACC(MachineBasicBlock &MBB, Iter I,
                                  unsigned RegSize) {
   //  load $vr0, FI
   //  copy lo, $vr0
@@ -128,7 +167,7 @@ void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I,
   BuildMI(MBB, I, DL, Desc, Hi).addReg(VR1, RegState::Kill);
 }
 
-void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I,
+void ExpandPseudo::expandStoreACC(MachineBasicBlock &MBB, Iter I,
                                   unsigned RegSize) {
   //  copy $vr0, lo
   //  store $vr0, FI
@@ -152,8 +191,20 @@ void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I,
   TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize);
 }
 
-void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
-                                 unsigned RegSize) {
+bool ExpandPseudo::expandCopy(MachineBasicBlock &MBB, Iter I) {
+  unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
+
+  if (Mips::ACRegsDSPRegClass.contains(Dst, Src))
+    return expandCopyACC(MBB, I, Dst, Src, 4);
+
+  if (Mips::ACRegs128RegClass.contains(Dst, Src))
+    return expandCopyACC(MBB, I, Dst, Src, 8);
+
+  return false;
+}
+
+bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, unsigned Dst,
+                                 unsigned Src, unsigned RegSize) {
   //  copy $vr0, src_lo
   //  copy dst_lo, $vr0
   //  copy $vr1, src_hi
@@ -162,7 +213,6 @@ void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
   const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize);
   unsigned VR0 = MRI.createVirtualRegister(RC);
   unsigned VR1 = MRI.createVirtualRegister(RC);
-  unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg();
   unsigned SrcKill = getKillRegState(I->getOperand(1).isKill());
   unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo);
   unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi);
@@ -176,6 +226,7 @@ void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I,
   BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(SrcHi, SrcKill);
   BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi)
     .addReg(VR1, RegState::Kill);
+  return true;
 }
 
 unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const {
@@ -438,7 +489,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   // Expand pseudo instructions which load, store or copy accumulators.
   // Add an emergency spill slot if a pseudo was expanded.
-  if (ExpandACCPseudo(MF).expand()) {
+  if (ExpandPseudo(MF).expand()) {
     // The spill slot should be half the size of the accumulator. If target is
     // mips64, it should be 64-bit, otherwise it should be 32-bt.
     const TargetRegisterClass *RC = STI.hasMips64() ?
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index d6d2207..8a6523a 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -35,6 +35,36 @@
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
+bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  if (Subtarget.inMips16Mode())
+    return false;
+  return MipsDAGToDAGISel::runOnMachineFunction(MF);
+}
+
+void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
+                                               MachineFunction &MF) {
+  MachineInstrBuilder MIB(MF, &MI);
+  unsigned Mask = MI.getOperand(1).getImm();
+  unsigned Flag = IsDef ? RegState::ImplicitDefine : RegState::Implicit;
+
+  if (Mask & 1)
+    MIB.addReg(Mips::DSPPos, Flag);
+
+  if (Mask & 2)
+    MIB.addReg(Mips::DSPSCount, Flag);
+
+  if (Mask & 4)
+    MIB.addReg(Mips::DSPCarry, Flag);
+
+  if (Mask & 8)
+    MIB.addReg(Mips::DSPOutFlag, Flag);
+
+  if (Mask & 16)
+    MIB.addReg(Mips::DSPCCond, Flag);
+
+  if (Mask & 32)
+    MIB.addReg(Mips::DSPEFI, Flag);
+}
 
 bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
                                                 const MachineInstr& MI) {
@@ -173,29 +203,14 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
 
   for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
        ++MFI)
-    for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
-      replaceUsesWithZeroReg(MRI, *I);
-}
-
-/// Select multiply instructions.
-std::pair<SDNode*, SDNode*>
-MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty,
-                               bool HasLo, bool HasHi) {
-  SDNode *Lo = 0, *Hi = 0;
-  SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
-                                       N->getOperand(1));
-  SDValue InFlag = SDValue(Mul, 0);
-
-  if (HasLo) {
-    unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
-    Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
-    InFlag = SDValue(Lo, 1);
-  }
-  if (HasHi) {
-    unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64);
-    Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
-  }
-  return std::make_pair(Lo, Hi);
+    for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
+      if (I->getOpcode() == Mips::RDDSP)
+        addDSPCtrlRegOperands(false, *I, MF);
+      else if (I->getOpcode() == Mips::WRDSP)
+        addDSPCtrlRegOperands(true, *I, MF);
+      else
+        replaceUsesWithZeroReg(MRI, *I);
+    }
 }
 
 SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
@@ -211,7 +226,7 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
   SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
   EVT VT = LHS.getValueType();
 
-  SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2);
+  SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops);
   SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT,
                                             SDValue(Carry, 0), RHS);
   return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
@@ -307,9 +322,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
   // Instruction Selection not handled by the auto-generated
   // tablegen selection should be handled here.
   ///
-  EVT NodeTy = Node->getValueType(0);
   SDNode *Result;
-  unsigned MultOpc;
 
   switch(Opcode) {
   default: break;
@@ -321,51 +334,13 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
   }
 
   case ISD::ADDE: {
+    if (Subtarget.hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
+      break;
     SDValue InFlag = Node->getOperand(2);
     Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
     return std::make_pair(true, Result);
   }
 
-  /// Mul with two results
-  case ISD::SMUL_LOHI:
-  case ISD::UMUL_LOHI: {
-    if (NodeTy == MVT::i32)
-      MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
-    else
-      MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
-
-    std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy,
-                                                  true, true);
-
-    if (!SDValue(Node, 0).use_empty())
-      ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
-
-    if (!SDValue(Node, 1).use_empty())
-      ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
-
-    return std::make_pair(true, (SDNode*)NULL);
-  }
-
-  /// Special Muls
-  case ISD::MUL: {
-    // Mips32 has a 32-bit three operand mul instruction.
-    if (Subtarget.hasMips32() && NodeTy == MVT::i32)
-      break;
-    MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT;
-    Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first;
-    return std::make_pair(true, Result);
-  }
-  case ISD::MULHS:
-  case ISD::MULHU: {
-    if (NodeTy == MVT::i32)
-      MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
-    else
-      MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
-
-    Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second;
-    return std::make_pair(true, Result);
-  }
-
   case ISD::ConstantFP: {
     ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
     if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
@@ -460,7 +435,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
     const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx,
                             Node->getOperand(1), HiIdx };
     SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
-                                         MVT::Untyped, Ops, 5);
+                                         MVT::Untyped, Ops);
     return std::make_pair(true, Res);
   }
   }
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 6137ab0..a235e96 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -24,6 +24,12 @@ public:
   explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {}
 
 private:
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
+                             MachineFunction &MF);
+
   bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
 
   std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 4f21921..8544bb8 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "MipsTargetMachine.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
@@ -27,6 +28,9 @@ EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
 MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
   : MipsTargetLowering(TM) {
   // Set up the register classes
+
+  clearRegisterClasses();
+
   addRegisterClass(MVT::i32, &Mips::CPURegsRegClass);
 
   if (HasMips64)
@@ -42,12 +46,23 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
       for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
         setOperationAction(Opc, VecTys[i], Expand);
 
+      setOperationAction(ISD::ADD, VecTys[i], Legal);
+      setOperationAction(ISD::SUB, VecTys[i], Legal);
       setOperationAction(ISD::LOAD, VecTys[i], Legal);
       setOperationAction(ISD::STORE, VecTys[i], Legal);
       setOperationAction(ISD::BITCAST, VecTys[i], Legal);
     }
+
+    setTargetDAGCombine(ISD::SHL);
+    setTargetDAGCombine(ISD::SRA);
+    setTargetDAGCombine(ISD::SRL);
+    setTargetDAGCombine(ISD::SETCC);
+    setTargetDAGCombine(ISD::VSELECT);
   }
 
+  if (Subtarget->hasDSPR2())
+    setOperationAction(ISD::MUL, MVT::v2i16, Legal);
+
   if (!TM.Options.UseSoftFloat) {
     addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
 
@@ -65,14 +80,19 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::MULHS,              MVT::i32, Custom);
   setOperationAction(ISD::MULHU,              MVT::i32, Custom);
 
-  if (HasMips64)
+  if (HasMips64) {
+    setOperationAction(ISD::MULHS,            MVT::i64, Custom);
+    setOperationAction(ISD::MULHU,            MVT::i64, Custom);
     setOperationAction(ISD::MUL,              MVT::i64, Custom);
+  }
+
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+  setOperationAction(ISD::INTRINSIC_W_CHAIN,  MVT::i64, Custom);
 
   setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
   setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
   setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
   setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
-  setOperationAction(ISD::MEMBARRIER,         MVT::Other, Custom);
   setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Custom);
   setOperationAction(ISD::LOAD,               MVT::i32, Custom);
   setOperationAction(ISD::STORE,              MVT::i32, Custom);
@@ -113,7 +133,10 @@ SDValue MipsSETargetLowering::LowerOperation(SDValue Op,
   case ISD::MULHU:     return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
   case ISD::MUL:       return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
   case ISD::SDIVREM:   return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
-  case ISD::UDIVREM:   return lowerMulDiv(Op, MipsISD::DivRemU, true, true, DAG);
+  case ISD::UDIVREM:   return lowerMulDiv(Op, MipsISD::DivRemU, true, true,
+                                          DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::INTRINSIC_W_CHAIN:  return lowerINTRINSIC_W_CHAIN(Op, DAG);
   }
 
   return MipsTargetLowering::LowerOperation(Op, DAG);
@@ -297,18 +320,136 @@ static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
+                                      SelectionDAG &DAG,
+                                      const MipsSubtarget *Subtarget) {
+  // See if this is a vector splat immediate node.
+  APInt SplatValue, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  unsigned EltSize = Ty.getVectorElementType().getSizeInBits();
+  BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+
+  if (!BV ||
+      !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
+                           EltSize, !Subtarget->isLittle()) ||
+      (SplatBitSize != EltSize) ||
+      (SplatValue.getZExtValue() >= EltSize))
+    return SDValue();
+
+  return DAG.getNode(Opc, N->getDebugLoc(), Ty, N->getOperand(0),
+                     DAG.getConstant(SplatValue.getZExtValue(), MVT::i32));
+}
+
+static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const MipsSubtarget *Subtarget) {
+  EVT Ty = N->getValueType(0);
+
+  if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
+    return SDValue();
+
+  return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
+}
+
+static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const MipsSubtarget *Subtarget) {
+  EVT Ty = N->getValueType(0);
+
+  if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2()))
+    return SDValue();
+
+  return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
+}
+
+
+static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const MipsSubtarget *Subtarget) {
+  EVT Ty = N->getValueType(0);
+
+  if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8))
+    return SDValue();
+
+  return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
+}
+
+static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) {
+  bool IsV216 = (Ty == MVT::v2i16);
+
+  switch (CC) {
+  case ISD::SETEQ:
+  case ISD::SETNE:  return true;
+  case ISD::SETLT:
+  case ISD::SETLE:
+  case ISD::SETGT:
+  case ISD::SETGE:  return IsV216;
+  case ISD::SETULT:
+  case ISD::SETULE:
+  case ISD::SETUGT:
+  case ISD::SETUGE: return !IsV216;
+  default:          return false;
+  }
+}
+
+static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+  EVT Ty = N->getValueType(0);
+
+  if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
+    return SDValue();
+
+  if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get()))
+    return SDValue();
+
+  return DAG.getNode(MipsISD::SETCC_DSP, N->getDebugLoc(), Ty, N->getOperand(0),
+                     N->getOperand(1), N->getOperand(2));
+}
+
+static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) {
+  EVT Ty = N->getValueType(0);
+
+  if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
+    return SDValue();
+
+  SDValue SetCC = N->getOperand(0);
+
+  if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
+    return SDValue();
+
+  return DAG.getNode(MipsISD::SELECT_CC_DSP, N->getDebugLoc(), Ty,
+                     SetCC.getOperand(0), SetCC.getOperand(1), N->getOperand(1),
+                     N->getOperand(2), SetCC.getOperand(2));
+}
+
 SDValue
 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
+  SDValue Val;
 
   switch (N->getOpcode()) {
   case ISD::ADDE:
     return performADDECombine(N, DAG, DCI, Subtarget);
   case ISD::SUBE:
     return performSUBECombine(N, DAG, DCI, Subtarget);
-  default:
-    return MipsTargetLowering::PerformDAGCombine(N, DCI);
+  case ISD::SHL:
+    return performSHLCombine(N, DAG, DCI, Subtarget);
+  case ISD::SRA:
+    return performSRACombine(N, DAG, DCI, Subtarget);
+  case ISD::SRL:
+    return performSRLCombine(N, DAG, DCI, Subtarget);
+  case ISD::VSELECT:
+    return performVSELECTCombine(N, DAG);
+  case ISD::SETCC: {
+    Val = performSETCCCombine(N, DAG);
+    break;
   }
+  }
+
+  if (Val.getNode())
+    return Val;
+
+  return MipsTargetLowering::PerformDAGCombine(N, DCI);
 }
 
 MachineBasicBlock *
@@ -378,6 +519,171 @@ SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
   return DAG.getMergeValues(Vals, 2, DL);
 }
 
+
+static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) {
+  SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+                             DAG.getConstant(0, MVT::i32));
+  SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
+                             DAG.getConstant(1, MVT::i32));
+  return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi);
+}
+
+static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) {
+  SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+                           DAG.getConstant(Mips::sub_lo, MVT::i32));
+  SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op,
+                           DAG.getConstant(Mips::sub_hi, MVT::i32));
+  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+}
+
+// This function expands mips intrinsic nodes which have 64-bit input operands
+// or output values.
+//
+// out64 = intrinsic-node in64
+// =>
+// lo = copy (extract-element (in64, 0))
+// hi = copy (extract-element (in64, 1))
+// mips-specific-node
+// v0 = copy lo
+// v1 = copy hi
+// out64 = merge-values (v0, v1)
+//
+static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
+  DebugLoc DL = Op.getDebugLoc();
+  bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
+  SmallVector<SDValue, 3> Ops;
+  unsigned OpNo = 0;
+
+  // See if Op has a chain input.
+  if (HasChainIn)
+    Ops.push_back(Op->getOperand(OpNo++));
+
+  // The next operand is the intrinsic opcode.
+  assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
+
+  // See if the next operand has type i64.
+  SDValue Opnd = Op->getOperand(++OpNo), In64;
+
+  if (Opnd.getValueType() == MVT::i64)
+    In64 = initAccumulator(Opnd, DL, DAG);
+  else
+    Ops.push_back(Opnd);
+
+  // Push the remaining operands.
+  for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
+    Ops.push_back(Op->getOperand(OpNo));
+
+  // Add In64 to the end of the list.
+  if (In64.getNode())
+    Ops.push_back(In64);
+
+  // Scan output.
+  SmallVector<EVT, 2> ResTys;
+
+  for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end();
+       I != E; ++I)
+    ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
+
+  // Create node.
+  SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size());
+  SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
+
+  if (!HasChainIn)
+    return Out;
+
+  assert(Val->getValueType(1) == MVT::Other);
+  SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
+  return DAG.getMergeValues(Vals, 2, DL);
+}
+
+SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) {
+  default:
+    return SDValue();
+  case Intrinsic::mips_shilo:
+    return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
+  case Intrinsic::mips_dpau_h_qbl:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
+  case Intrinsic::mips_dpau_h_qbr:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
+  case Intrinsic::mips_dpsu_h_qbl:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
+  case Intrinsic::mips_dpsu_h_qbr:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
+  case Intrinsic::mips_dpa_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
+  case Intrinsic::mips_dps_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
+  case Intrinsic::mips_dpax_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
+  case Intrinsic::mips_dpsx_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
+  case Intrinsic::mips_mulsa_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
+  case Intrinsic::mips_mult:
+    return lowerDSPIntr(Op, DAG, MipsISD::Mult);
+  case Intrinsic::mips_multu:
+    return lowerDSPIntr(Op, DAG, MipsISD::Multu);
+  case Intrinsic::mips_madd:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
+  case Intrinsic::mips_maddu:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
+  case Intrinsic::mips_msub:
+    return lowerDSPIntr(Op, DAG, MipsISD::MSub);
+  case Intrinsic::mips_msubu:
+    return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
+  }
+}
+
+SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+                                                     SelectionDAG &DAG) const {
+  switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) {
+  default:
+    return SDValue();
+  case Intrinsic::mips_extp:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
+  case Intrinsic::mips_extpdp:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
+  case Intrinsic::mips_extr_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
+  case Intrinsic::mips_extr_r_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
+  case Intrinsic::mips_extr_rs_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
+  case Intrinsic::mips_extr_s_h:
+    return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
+  case Intrinsic::mips_mthlip:
+    return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
+  case Intrinsic::mips_mulsaq_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
+  case Intrinsic::mips_maq_s_w_phl:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
+  case Intrinsic::mips_maq_s_w_phr:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
+  case Intrinsic::mips_maq_sa_w_phl:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
+  case Intrinsic::mips_maq_sa_w_phr:
+    return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
+  case Intrinsic::mips_dpaq_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
+  case Intrinsic::mips_dpsq_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
+  case Intrinsic::mips_dpaq_sa_l_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
+  case Intrinsic::mips_dpsq_sa_l_w:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
+  case Intrinsic::mips_dpaqx_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
+  case Intrinsic::mips_dpaqx_sa_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
+  case Intrinsic::mips_dpsqx_s_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
+  case Intrinsic::mips_dpsqx_sa_w_ph:
+    return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
+  }
+}
+
 MachineBasicBlock * MipsSETargetLowering::
 emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{
   // $bb:
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index 186f6a3..ec8a5c7 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -31,6 +31,11 @@ namespace llvm {
     virtual MachineBasicBlock *
     EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
 
+    virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+                                    EVT VT) const {
+      return false;
+    }
+
     virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
       if (VT == MVT::Untyped)
         return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass :
@@ -54,6 +59,9 @@ namespace llvm {
     SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi,
                         SelectionDAG &DAG) const;
 
+    SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+
     MachineBasicBlock *emitBPOSGE32(MachineInstr *MI,
                                     MachineBasicBlock *BB) const;
   };
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index ca0315e..a0768e5 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -95,20 +95,39 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       Opc = Mips::CFC1;
     else if (Mips::FGR32RegClass.contains(SrcReg))
       Opc = Mips::MFC1;
-    else if (SrcReg == Mips::HI)
+    else if (Mips::HIRegsRegClass.contains(SrcReg))
       Opc = Mips::MFHI, SrcReg = 0;
-    else if (SrcReg == Mips::LO)
+    else if (Mips::LORegsRegClass.contains(SrcReg))
       Opc = Mips::MFLO, SrcReg = 0;
+    else if (Mips::HIRegsDSPRegClass.contains(SrcReg))
+      Opc = Mips::MFHI_DSP;
+    else if (Mips::LORegsDSPRegClass.contains(SrcReg))
+      Opc = Mips::MFLO_DSP;
+    else if (Mips::DSPCCRegClass.contains(SrcReg)) {
+      BuildMI(MBB, I, DL, get(Mips::RDDSP), DestReg).addImm(1 << 4)
+        .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
+      return;
+    }
   }
   else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
     if (Mips::CCRRegClass.contains(DestReg))
       Opc = Mips::CTC1;
     else if (Mips::FGR32RegClass.contains(DestReg))
       Opc = Mips::MTC1;
-    else if (DestReg == Mips::HI)
+    else if (Mips::HIRegsRegClass.contains(DestReg))
       Opc = Mips::MTHI, DestReg = 0;
-    else if (DestReg == Mips::LO)
+    else if (Mips::LORegsRegClass.contains(DestReg))
       Opc = Mips::MTLO, DestReg = 0;
+    else if (Mips::HIRegsDSPRegClass.contains(DestReg))
+      Opc = Mips::MTHI_DSP;
+    else if (Mips::LORegsDSPRegClass.contains(DestReg))
+      Opc = Mips::MTLO_DSP;
+    else if (Mips::DSPCCRegClass.contains(DestReg)) {
+      BuildMI(MBB, I, DL, get(Mips::WRDSP))
+        .addReg(SrcReg, getKillRegState(KillSrc)).addImm(1 << 4)
+        .addReg(DestReg, RegState::ImplicitDefine);
+      return;
+    }
   }
   else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
     Opc = Mips::FMOV_S;
@@ -121,27 +140,21 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
     if (Mips::CPU64RegsRegClass.contains(SrcReg))
       Opc = Mips::OR64, ZeroReg = Mips::ZERO_64;
-    else if (SrcReg == Mips::HI64)
+    else if (Mips::HIRegs64RegClass.contains(SrcReg))
       Opc = Mips::MFHI64, SrcReg = 0;
-    else if (SrcReg == Mips::LO64)
+    else if (Mips::LORegs64RegClass.contains(SrcReg))
       Opc = Mips::MFLO64, SrcReg = 0;
     else if (Mips::FGR64RegClass.contains(SrcReg))
       Opc = Mips::DMFC1;
   }
   else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
-    if (DestReg == Mips::HI64)
+    if (Mips::HIRegs64RegClass.contains(DestReg))
       Opc = Mips::MTHI64, DestReg = 0;
-    else if (DestReg == Mips::LO64)
+    else if (Mips::LORegs64RegClass.contains(DestReg))
       Opc = Mips::MTLO64, DestReg = 0;
     else if (Mips::FGR64RegClass.contains(DestReg))
       Opc = Mips::DMTC1;
   }
-  else if (Mips::ACRegsRegClass.contains(DestReg, SrcReg))
-    Opc = Mips::COPY_AC64;
-  else if (Mips::ACRegsDSPRegClass.contains(DestReg, SrcReg))
-    Opc = Mips::COPY_AC_DSP;
-  else if (Mips::ACRegs128RegClass.contains(DestReg, SrcReg))
-    Opc = Mips::COPY_AC128;
 
   assert(Opc && "Cannot copy registers");
 
@@ -178,6 +191,8 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP;
   else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128;
+  else if (Mips::DSPCCRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::STORE_CCOND_DSP_P8 : Mips::STORE_CCOND_DSP;
   else if (Mips::FGR32RegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
   else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
@@ -209,6 +224,8 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP;
   else if (Mips::ACRegs128RegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128;
+  else if (Mips::DSPCCRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::LOAD_CCOND_DSP_P8 : Mips::LOAD_CCOND_DSP;
   else if (Mips::FGR32RegClass.hasSubClassEq(RC))
     Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
   else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index e11e5d1..14a2b27 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -11,29 +11,56 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "mips-subtarget"
+
+#include "MipsMachineFunction.h"
 #include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
 #include "Mips.h"
 #include "MipsRegisterInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 
 #define GET_SUBTARGETINFO_TARGET_DESC
 #define GET_SUBTARGETINFO_CTOR
 #include "MipsGenSubtargetInfo.inc"
 
+
 using namespace llvm;
 
+// FIXME: Maybe this should be on by default when Mips16 is specified
+//
+static cl::opt<bool> Mixed16_32(
+  "mips-mixed-16-32",
+  cl::init(false),
+  cl::desc("Allow for a mixture of Mips16 "
+           "and Mips32 code in a single source file"),
+  cl::Hidden);
+
+static cl::opt<bool> Mips_Os16(
+  "mips-os16",
+  cl::init(false),
+  cl::desc("Compile all functions that don' use "
+           "floating point as Mips 16"),
+  cl::Hidden);
+
 void MipsSubtarget::anchor() { }
 
 MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
                              const std::string &FS, bool little,
-                             Reloc::Model _RM) :
+                             Reloc::Model _RM, MipsTargetMachine *_TM) :
   MipsGenSubtargetInfo(TT, CPU, FS),
   MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
   IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
   IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false),
   HasBitCount(false), HasFPIdx(false),
   InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
-  RM(_RM)
+  AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
+  RM(_RM), OverrideMode(NoOverride), TM(_TM)
 {
   std::string CPUName = CPU;
   if (CPUName.empty())
@@ -42,6 +69,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
   // Parse features string.
   ParseSubtargetFeatures(CPUName, FS);
 
+  PreviousInMips16Mode = InMips16Mode;
+
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUName);
 
@@ -72,3 +101,48 @@ MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                             &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass);
   return OptLevel >= CodeGenOpt::Aggressive;
 }
+
+//FIXME: This logic for reseting the subtarget along with
+// the helper classes can probably be simplified but there are a lot of
+// cases so we will defer rewriting this to later.
+//
+void MipsSubtarget::resetSubtarget(MachineFunction *MF) {
+  bool ChangeToMips16 = false, ChangeToNoMips16 = false;
+  DEBUG(dbgs() << "resetSubtargetFeatures" << "\n");
+  AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+  ChangeToMips16 = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+                                        "mips16");
+  ChangeToNoMips16 = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+                                        "nomips16");
+  assert (!(ChangeToMips16 & ChangeToNoMips16) &&
+          "mips16 and nomips16 specified on the same function");
+  if (ChangeToMips16) {
+    if (PreviousInMips16Mode)
+      return;
+    OverrideMode = Mips16Override;
+    PreviousInMips16Mode = true;
+    TM->setHelperClassesMips16();
+    return;
+  } else if (ChangeToNoMips16) {
+    if (!PreviousInMips16Mode)
+      return;
+    OverrideMode = NoMips16Override;
+    PreviousInMips16Mode = false;
+    TM->setHelperClassesMipsSE();
+    return;
+  } else {
+    if (OverrideMode == NoOverride)
+      return;
+    OverrideMode = NoOverride;
+    DEBUG(dbgs() << "back to default" << "\n");
+    if (inMips16Mode() && !PreviousInMips16Mode) {
+      TM->setHelperClassesMips16();
+      PreviousInMips16Mode = true;
+    } else if (!inMips16Mode() && PreviousInMips16Mode) {
+      TM->setHelperClassesMipsSE();
+      PreviousInMips16Mode = false;
+    }
+    return;
+  }
+}
+
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 7a2e47c..f2f0e15 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -16,7 +16,9 @@
 
 #include "MCTargetDesc/MipsReginfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
@@ -25,6 +27,8 @@
 namespace llvm {
 class StringRef;
 
+class MipsTargetMachine;
+
 class MipsSubtarget : public MipsGenSubtargetInfo {
   virtual void anchor();
 
@@ -89,12 +93,23 @@ protected:
   // InMips16 -- can process Mips16 instructions
   bool InMips16Mode;
 
+  // PreviousInMips16 -- the function we just processed was in Mips 16 Mode
+  bool PreviousInMips16Mode;
+
   // InMicroMips -- can process MicroMips instructions
   bool InMicroMipsMode;
 
   // HasDSP, HasDSPR2 -- supports DSP ASE.
   bool HasDSP, HasDSPR2;
 
+  // Allow mixed Mips16 and Mips32 in one source file
+  bool AllowMixed16_32;
+
+  // Optimize for space by compiling all functions as Mips 16 unless
+  // it needs floating point. Functions needing floating point are
+  // compiled as Mips32
+  bool Os16;
+
   InstrItineraryData InstrItins;
 
   // The instance to the register info section object
@@ -103,6 +118,12 @@ protected:
   // Relocation Model
   Reloc::Model RM;
 
+  // We can override the determination of whether we are in mips16 mode
+  // as from the command line
+  enum {NoOverride, Mips16Override, NoMips16Override} OverrideMode;
+
+  MipsTargetMachine *TM;
+
 public:
   virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                                      AntiDepBreakMode& Mode,
@@ -118,7 +139,8 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   MipsSubtarget(const std::string &TT, const std::string &CPU,
-                const std::string &FS, bool little, Reloc::Model RM);
+                const std::string &FS, bool little, Reloc::Model RM,
+                MipsTargetMachine *TM);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
@@ -137,7 +159,20 @@ public:
   bool isSingleFloat() const { return IsSingleFloat; }
   bool isNotSingleFloat() const { return !IsSingleFloat; }
   bool hasVFPU() const { return HasVFPU; }
-  bool inMips16Mode() const { return InMips16Mode; }
+  bool inMips16Mode() const {
+    switch (OverrideMode) {
+    case NoOverride:
+      return InMips16Mode;
+    case Mips16Override:
+      return true;
+    case NoMips16Override:
+      return false;
+    }
+    llvm_unreachable("Unexpected mode");
+  }
+  bool inMips16ModeDefault() {
+    return InMips16Mode;
+  }
   bool inMicroMipsMode() const { return InMicroMipsMode; }
   bool hasDSP() const { return HasDSP; }
   bool hasDSPR2() const { return HasDSPR2; }
@@ -153,11 +188,20 @@ public:
   bool hasBitCount()  const { return HasBitCount; }
   bool hasFPIdx()     const { return HasFPIdx; }
 
+  bool allowMixed16_32() const { return AllowMixed16_32;};
+
+  bool os16() const { return Os16;};
+
   // Grab MipsRegInfo object
   const MipsReginfo &getMReginfo() const { return MRI; }
 
   // Grab relocation model
   Reloc::Model getRelocationModel() const {return RM;}
+
+  /// \brief Reset the subtarget for the Mips target.
+  void resetSubtarget(MachineFunction *MF);
+
+
 };
 } // End llvm namespace
 
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 3336358..ee28e2a 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -15,11 +15,26 @@
 #include "Mips.h"
 #include "MipsFrameLowering.h"
 #include "MipsInstrInfo.h"
+#include "MipsModuleISelDAGToDAG.h"
+#include "MipsOs16.h"
+#include "MipsSEFrameLowering.h"
+#include "MipsSEInstrInfo.h"
+#include "MipsSEISelLowering.h"
+#include "MipsSEISelDAGToDAG.h"
+#include "Mips16FrameLowering.h"
+#include "Mips16InstrInfo.h"
+#include "Mips16ISelDAGToDAG.h"
+#include "Mips16ISelLowering.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/PassManager.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
+
+
 extern "C" void LLVMInitializeMipsTarget() {
   // Register the target.
   RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget);
@@ -42,7 +57,7 @@ MipsTargetMachine(const Target &T, StringRef TT,
                   CodeGenOpt::Level OL,
                   bool isLittle)
   : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-    Subtarget(TT, CPU, FS, isLittle, RM),
+    Subtarget(TT, CPU, FS, isLittle, RM, this),
     DL(isLittle ?
                (Subtarget.isABI_N64() ?
                 "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-"
@@ -54,9 +69,46 @@ MipsTargetMachine(const Target &T, StringRef TT,
                 "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")),
     InstrInfo(MipsInstrInfo::create(*this)),
     FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
-    TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() {
+    TLInfo(MipsTargetLowering::create(*this)),
+    TSInfo(*this), JITInfo() {
+}
+
+
+void MipsTargetMachine::setHelperClassesMips16() {
+  InstrInfoSE.swap(InstrInfo);
+  FrameLoweringSE.swap(FrameLowering);
+  TLInfoSE.swap(TLInfo);
+  if (!InstrInfo16) {
+    InstrInfo.reset(MipsInstrInfo::create(*this));
+    FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget));
+    TLInfo.reset(MipsTargetLowering::create(*this));
+  } else {
+    InstrInfo16.swap(InstrInfo);
+    FrameLowering16.swap(FrameLowering);
+    TLInfo16.swap(TLInfo);
+  }
+  assert(TLInfo && "null target lowering 16");
+  assert(InstrInfo && "null instr info 16");
+  assert(FrameLowering && "null frame lowering 16");
 }
 
+void MipsTargetMachine::setHelperClassesMipsSE() {
+  InstrInfo16.swap(InstrInfo);
+  FrameLowering16.swap(FrameLowering);
+  TLInfo16.swap(TLInfo);
+  if (!InstrInfoSE) {
+    InstrInfo.reset(MipsInstrInfo::create(*this));
+    FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget));
+    TLInfo.reset(MipsTargetLowering::create(*this));
+  } else {
+    InstrInfoSE.swap(InstrInfo);
+    FrameLoweringSE.swap(FrameLowering);
+    TLInfoSE.swap(TLInfo);
+  }
+  assert(TLInfo && "null target lowering in SE");
+  assert(InstrInfo && "null instr info SE");
+  assert(FrameLowering && "null frame lowering SE");
+}
 void MipsebTargetMachine::anchor() { }
 
 MipsebTargetMachine::
@@ -90,6 +142,7 @@ public:
     return *getMipsTargetMachine().getSubtargetImpl();
   }
 
+  virtual void addIRPasses();
   virtual bool addInstSelector();
   virtual bool addPreEmitPass();
 };
@@ -99,24 +152,50 @@ TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new MipsPassConfig(this, PM);
 }
 
+void MipsPassConfig::addIRPasses() {
+  TargetPassConfig::addIRPasses();
+  if (getMipsSubtarget().os16())
+    addPass(createMipsOs16(getMipsTargetMachine()));
+}
 // Install an instruction selector pass using
 // the ISelDag to gen Mips code.
 bool MipsPassConfig::addInstSelector() {
-  addPass(createMipsISelDag(getMipsTargetMachine()));
+  if (getMipsSubtarget().allowMixed16_32()) {
+    addPass(createMipsModuleISelDag(getMipsTargetMachine()));
+    addPass(createMips16ISelDag(getMipsTargetMachine()));
+    addPass(createMipsSEISelDag(getMipsTargetMachine()));
+  } else {
+    addPass(createMipsISelDag(getMipsTargetMachine()));
+  }
   return false;
 }
 
+void MipsTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+  if (Subtarget.allowMixed16_32()) {
+    DEBUG(errs() << "No ");
+    //FIXME: The Basic Target Transform Info
+    // pass needs to become a function pass instead of
+    // being an immutable pass and then this method as it exists now
+    // would be unnecessary.
+    PM.add(createNoTargetTransformInfoPass());
+  } else
+    LLVMTargetMachine::addAnalysisPasses(PM);
+  DEBUG(errs() << "Target Transform Info Pass Added\n");
+}
+
 // Implemented by targets that want to run passes immediately before
 // machine code is emitted. return true if -print-machineinstrs should
 // print out the code after the passes.
 bool MipsPassConfig::addPreEmitPass() {
   MipsTargetMachine &TM = getMipsTargetMachine();
+  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
   addPass(createMipsDelaySlotFillerPass(TM));
 
-  // NOTE: long branch has not been implemented for mips16.
-  if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding())
+  if (Subtarget.hasStandardEncoding() ||
+      Subtarget.allowMixed16_32())
     addPass(createMipsLongBranchPass(TM));
-  if (TM.getSubtarget<MipsSubtarget>().inMips16Mode())
+  if (Subtarget.inMips16Mode() ||
+      Subtarget.allowMixed16_32())
     addPass(createMipsConstantIslandPass(TM));
 
   return true;
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 7e5f192..ee55708 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -21,6 +21,8 @@
 #include "MipsSelectionDAGInfo.h"
 #include "MipsSubtarget.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
@@ -35,6 +37,12 @@ class MipsTargetMachine : public LLVMTargetMachine {
   OwningPtr<const MipsInstrInfo> InstrInfo;
   OwningPtr<const MipsFrameLowering> FrameLowering;
   OwningPtr<const MipsTargetLowering> TLInfo;
+  OwningPtr<const MipsInstrInfo> InstrInfo16;
+  OwningPtr<const MipsFrameLowering> FrameLowering16;
+  OwningPtr<const MipsTargetLowering> TLInfo16;
+  OwningPtr<const MipsInstrInfo> InstrInfoSE;
+  OwningPtr<const MipsFrameLowering> FrameLoweringSE;
+  OwningPtr<const MipsTargetLowering> TLInfoSE;
   MipsSelectionDAGInfo TSInfo;
   MipsJITInfo JITInfo;
 
@@ -47,6 +55,8 @@ public:
 
   virtual ~MipsTargetMachine() {}
 
+  virtual void addAnalysisPasses(PassManagerBase &PM);
+
   virtual const MipsInstrInfo *getInstrInfo() const
   { return InstrInfo.get(); }
   virtual const TargetFrameLowering *getFrameLowering() const
@@ -73,6 +83,13 @@ public:
   // Pass Pipeline Configuration
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
   virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
+
+  // Set helper classes
+  void setHelperClassesMips16();
+
+  void setHelperClassesMipsSE();
+
+
 };
 
 /// MipsebTargetMachine - Mips32/64 big endian target machine.
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 7da2fed..735ca9b 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -23,6 +23,7 @@ set(NVPTXCodeGen_sources
   NVPTXAsmPrinter.cpp
   NVPTXUtilities.cpp
   NVVMReflect.cpp
+  NVPTXGenericToNVVM.cpp
   )
 
 add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index 6a53a44..072c65d 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -16,6 +16,7 @@
 #define LLVM_TARGET_NVPTX_H
 
 #include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -62,6 +63,9 @@ createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
 FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
 FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
 FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
+ModulePass *createGenericToNVVMPass();
+ModulePass *createNVVMReflectPass();
+ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
 
 bool isImageOrSamplerVal(const Value *, const Module *);
 
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index ce5d78a..229e4e5 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -68,11 +68,12 @@ InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
 namespace {
 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
 /// depends.
-void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+void DiscoverDependentGlobals(const Value *V,
+                              DenseSet<const GlobalVariable *> &Globals) {
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     Globals.insert(GV);
   else {
-    if (User *U = dyn_cast<User>(V)) {
+    if (const User *U = dyn_cast<User>(V)) {
       for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
         DiscoverDependentGlobals(U->getOperand(i), Globals);
       }
@@ -84,8 +85,9 @@ void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
 /// instances to be emitted, but only after any dependents have been added
 /// first.
 void VisitGlobalVariableForEmission(
-    GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order,
-    DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) {
+    const GlobalVariable *GV, SmallVectorImpl<const GlobalVariable *> &Order,
+    DenseSet<const GlobalVariable *> &Visited,
+    DenseSet<const GlobalVariable *> &Visiting) {
   // Have we already visited this one?
   if (Visited.count(GV))
     return;
@@ -98,12 +100,12 @@ void VisitGlobalVariableForEmission(
   Visiting.insert(GV);
 
   // Make sure we visit all dependents first
-  DenseSet<GlobalVariable *> Others;
+  DenseSet<const GlobalVariable *> Others;
   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
     DiscoverDependentGlobals(GV->getOperand(i), Others);
 
-  for (DenseSet<GlobalVariable *>::iterator I = Others.begin(),
-                                            E = Others.end();
+  for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
+                                                  E = Others.end();
        I != E; ++I)
     VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
 
@@ -405,6 +407,11 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
   SmallString<128> Str;
   raw_svector_ostream O(Str);
 
+  if (!GlobalsEmitted) {
+    emitGlobals(*MF->getFunction()->getParent());
+    GlobalsEmitted = true;
+  }
+  
   // Set up
   MRI = &MF->getRegInfo();
   F = MF->getFunction();
@@ -695,7 +702,7 @@ void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
   else
     O << ".func ";
   printReturnValStr(F, O);
-  O << *CurrentFnSym << "\n";
+  O << *Mang->getSymbol(F) << "\n";
   emitFunctionParamList(F, O);
   O << ";\n";
 }
@@ -795,7 +802,7 @@ static bool useFuncSeen(const Constant *C,
   return false;
 }
 
-void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
   llvm::DenseMap<const Function *, bool> seenMap;
   for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
     const Function *F = FI;
@@ -805,7 +812,6 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
         continue;
       if (F->getIntrinsicID())
         continue;
-      CurrentFnSym = Mang->getSymbol(F);
       emitDeclaration(F, O);
       continue;
     }
@@ -817,14 +823,12 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
           // The use is in the initialization of a global variable
           // that is a function pointer, so print a declaration
           // for the original function
-          CurrentFnSym = Mang->getSymbol(F);
           emitDeclaration(F, O);
           break;
         }
         // Emit a declaration of this function if the function that
         // uses this constant expr has already been seen.
         if (useFuncSeen(C, seenMap)) {
-          CurrentFnSym = Mang->getSymbol(F);
           emitDeclaration(F, O);
           break;
         }
@@ -844,7 +848,6 @@ void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
       // appearing in the module before the callee. so print out
       // a declaration for the callee.
       if (seenMap.find(caller) != seenMap.end()) {
-        CurrentFnSym = Mang->getSymbol(F);
         emitDeclaration(F, O);
         break;
       }
@@ -921,6 +924,12 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
     recordAndEmitFilenames(M);
 
+  GlobalsEmitted = false;
+    
+  return false; // success
+}
+
+void NVPTXAsmPrinter::emitGlobals(const Module &M) {
   SmallString<128> Str2;
   raw_svector_ostream OS2(Str2);
 
@@ -931,13 +940,13 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   // global variable in order, and ensure that we emit it *after* its dependent
   // globals. We use a little extra memory maintaining both a set and a list to
   // have fast searches while maintaining a strict ordering.
-  SmallVector<GlobalVariable *, 8> Globals;
-  DenseSet<GlobalVariable *> GVVisited;
-  DenseSet<GlobalVariable *> GVVisiting;
+  SmallVector<const GlobalVariable *, 8> Globals;
+  DenseSet<const GlobalVariable *> GVVisited;
+  DenseSet<const GlobalVariable *> GVVisiting;
 
   // Visit each global variable, in order
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
-       ++I)
+  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
     VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
 
   assert(GVVisited.size() == M.getGlobalList().size() &&
@@ -951,7 +960,6 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   OS2 << '\n';
 
   OutStreamer.EmitRawText(OS2.str());
-  return false; // success
 }
 
 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
@@ -989,6 +997,14 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
 }
 
 bool NVPTXAsmPrinter::doFinalization(Module &M) {
+
+  // If we did not emit any functions, then the global declarations have not
+  // yet been emitted.
+  if (!GlobalsEmitted) {
+    emitGlobals(M);
+    GlobalsEmitted = true;
+  }
+
   // XXX Temproarily remove global variables so that doFinalization() will not
   // emit them again (global variables are emitted at beginning).
 
@@ -1063,7 +1079,8 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
   }
 }
 
-void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
+void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
+                                         raw_ostream &O,
                                          bool processDemoted) {
 
   // Skip meta data
@@ -1107,10 +1124,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
   if (llvm::isSampler(*GVar)) {
     O << ".global .samplerref " << llvm::getSamplerName(*GVar);
 
-    Constant *Initializer = NULL;
+    const Constant *Initializer = NULL;
     if (GVar->hasInitializer())
       Initializer = GVar->getInitializer();
-    ConstantInt *CI = NULL;
+    const ConstantInt *CI = NULL;
     if (Initializer)
       CI = dyn_cast<ConstantInt>(Initializer);
     if (CI) {
@@ -1183,7 +1200,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
     if (localDecls.find(demotedFunc) != localDecls.end())
       localDecls[demotedFunc].push_back(GVar);
     else {
-      std::vector<GlobalVariable *> temp;
+      std::vector<const GlobalVariable *> temp;
       temp.push_back(GVar);
       localDecls[demotedFunc] = temp;
     }
@@ -1199,7 +1216,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
 
   if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
     O << " .";
-    O << getPTXFundamentalTypeStr(ETy, false);
+    // Special case: ABI requires that we use .u8 for predicates
+    if (ETy->isIntegerTy(1))
+      O << "u8";
+    else
+      O << getPTXFundamentalTypeStr(ETy, false);
     O << " ";
     O << *Mang->getSymbol(GVar);
 
@@ -1209,7 +1230,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
         GVar->hasInitializer()) {
-      Constant *Initializer = GVar->getInitializer();
+      const Constant *Initializer = GVar->getInitializer();
       if (!Initializer->isNullValue()) {
         O << " = ";
         printScalarConstant(Initializer, O);
@@ -1233,7 +1254,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
            (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
            (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
           GVar->hasInitializer()) {
-        Constant *Initializer = GVar->getInitializer();
+        const Constant *Initializer = GVar->getInitializer();
         if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
           AggBuffer aggBuffer(ElementSize, O, *this);
           bufferAggregateConstant(Initializer, &aggBuffer);
@@ -1283,7 +1304,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
   if (localDecls.find(f) == localDecls.end())
     return;
 
-  std::vector<GlobalVariable *> &gvars = localDecls[f];
+  std::vector<const GlobalVariable *> &gvars = localDecls[f];
 
   for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
     O << "\t// demoted variable\n\t";
@@ -1448,7 +1469,7 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
                                      int paramIndex, raw_ostream &O) {
   if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) ||
       (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA))
-    O << *CurrentFnSym << "_param_" << paramIndex;
+    O << *Mang->getSymbol(I->getParent()) << "_param_" << paramIndex;
   else {
     std::string argName = I->getName();
     const char *p = argName.c_str();
@@ -1507,11 +1528,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
       if (llvm::isImage(*I)) {
         std::string sname = I->getName();
         if (llvm::isImageWriteOnly(*I))
-          O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
+          O << "\t.param .surfref " << *Mang->getSymbol(F) << "_param_"
+            << paramIndex;
         else // Default image is read_only
-          O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
+          O << "\t.param .texref " << *Mang->getSymbol(F) << "_param_"
+            << paramIndex;
       } else // Should be llvm::isSampler(*I)
-        O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
+        O << "\t.param .samplerref " << *Mang->getSymbol(F) << "_param_"
           << paramIndex;
       continue;
     }
@@ -1564,7 +1587,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
         }
 
         // non-pointer scalar to kernel func
-        O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " ";
+        O << "\t.param .";
+        // Special case: predicate operands become .u8 types
+        if (Ty->isIntegerTy(1))
+          O << "u8";
+        else
+          O << getPTXFundamentalTypeStr(Ty);
+        O << " ";
         printParamName(I, paramIndex, O);
         continue;
       }
@@ -1751,12 +1780,12 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
   O << utohexstr(API.getZExtValue());
 }
 
-void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
     O << CI->getValue();
     return;
   }
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
     printFPConstant(CFP, O);
     return;
   }
@@ -1764,13 +1793,13 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
     O << "0";
     return;
   }
-  if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
     O << *Mang->getSymbol(GVar);
     return;
   }
-  if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
-    Value *v = Cexpr->stripPointerCasts();
-    if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+    const Value *v = Cexpr->stripPointerCasts();
+    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
       O << *Mang->getSymbol(GVar);
       return;
     } else {
@@ -1781,7 +1810,7 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
   llvm_unreachable("Not scalar type found in printScalarConstant()");
 }
 
-void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
+void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
                                    AggBuffer *aggBuffer) {
 
   const DataLayout *TD = TM.getDataLayout();
@@ -1809,13 +1838,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
       ptr = (unsigned char *)&int16;
       aggBuffer->addBytes(ptr, 2, Bytes);
     } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
-      if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
         int int32 = (int)(constInt->getZExtValue());
         ptr = (unsigned char *)&int32;
         aggBuffer->addBytes(ptr, 4, Bytes);
         break;
-      } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
-        if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
                 ConstantFoldConstantExpression(Cexpr, TD))) {
           int int32 = (int)(constInt->getZExtValue());
           ptr = (unsigned char *)&int32;
@@ -1831,13 +1860,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
       }
       llvm_unreachable("unsupported integer const type");
     } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
-      if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
+      if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
         long long int64 = (long long)(constInt->getZExtValue());
         ptr = (unsigned char *)&int64;
         aggBuffer->addBytes(ptr, 8, Bytes);
         break;
-      } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
-        if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
                 ConstantFoldConstantExpression(Cexpr, TD))) {
           long long int64 = (long long)(constInt->getZExtValue());
           ptr = (unsigned char *)&int64;
@@ -1858,7 +1887,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
   }
   case Type::FloatTyID:
   case Type::DoubleTyID: {
-    ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
+    const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
     const Type *Ty = CFP->getType();
     if (Ty == Type::getFloatTy(CPV->getContext())) {
       float float32 = (float) CFP->getValueAPF().convertToFloat();
@@ -1874,10 +1903,10 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
     break;
   }
   case Type::PointerTyID: {
-    if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
+    if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
       aggBuffer->addSymbol(GVar);
-    } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
-      Value *v = Cexpr->stripPointerCasts();
+    } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+      const Value *v = Cexpr->stripPointerCasts();
       aggBuffer->addSymbol(v);
     }
     unsigned int s = TD->getTypeAllocSize(CPV->getType());
@@ -1906,7 +1935,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
   }
 }
 
-void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
+void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
                                               AggBuffer *aggBuffer) {
   const DataLayout *TD = TM.getDataLayout();
   int Bytes;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 6dc9fc0..7faa6b2 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -91,7 +91,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
     unsigned char *buffer; // the buffer
     unsigned numSymbols;   // number of symbol addresses
     SmallVector<unsigned, 4> symbolPosInBuffer;
-    SmallVector<Value *, 4> Symbols;
+    SmallVector<const Value *, 4> Symbols;
 
   private:
     unsigned curpos;
@@ -128,7 +128,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
       }
       return curpos;
     }
-    void addSymbol(Value *GVar) {
+    void addSymbol(const Value *GVar) {
       symbolPosInBuffer.push_back(curpos);
       Symbols.push_back(GVar);
       numSymbols++;
@@ -153,11 +153,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
           if (pos)
             O << ", ";
           if (pos == nextSymbolPos) {
-            Value *v = Symbols[nSym];
-            if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
+            const Value *v = Symbols[nSym];
+            if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
               MCSymbol *Name = AP.Mang->getSymbol(GVar);
               O << *Name;
-            } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
+            } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
               O << *nvptx::LowerConstant(Cexpr, AP);
             } else
               llvm_unreachable("symbol type unknown");
@@ -205,10 +205,12 @@ private:
   void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
   // definition autogenerated.
   void printInstruction(const MachineInstr *MI, raw_ostream &O);
-  void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false);
+  void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
+                          bool = false);
   void printParamName(int paramIndex, raw_ostream &O);
   void printParamName(Function::const_arg_iterator I, int paramIndex,
                       raw_ostream &O);
+  void emitGlobals(const Module &M);
   void emitHeader(Module &M, raw_ostream &O);
   void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
   void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
@@ -234,6 +236,8 @@ protected:
 private:
   std::string CurrentBankselLabelInBasicBlock;
 
+  bool GlobalsEmitted;
+  
   // This is specific per MachineFunction.
   const MachineRegisterInfo *MRI;
   // The contents are specific for each
@@ -247,7 +251,7 @@ private:
   std::map<const Type *, std::string> TypeNameMap;
 
   // List of variables demoted to a function scope.
-  std::map<const Function *, std::vector<GlobalVariable *> > localDecls;
+  std::map<const Function *, std::vector<const GlobalVariable *> > localDecls;
 
   // To record filename to ID mapping
   std::map<std::string, unsigned> filenameMap;
@@ -256,15 +260,15 @@ private:
   void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
   void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
   std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
-  void printScalarConstant(Constant *CPV, raw_ostream &O);
+  void printScalarConstant(const Constant *CPV, raw_ostream &O);
   void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
-  void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer);
-  void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer);
+  void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer);
+  void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer);
 
   void printOperandProper(const MachineOperand &MO);
 
   void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
-  void emitDeclarations(Module &, raw_ostream &O);
+  void emitDeclarations(const Module &, raw_ostream &O);
   void emitDeclaration(const Function *, raw_ostream &O);
 
   static const char *getRegisterName(unsigned RegNo);
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
new file mode 100644
index 0000000..1077c46
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -0,0 +1,436 @@
+//===-- GenericToNVVM.cpp - Convert generic module to NVVM module - C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Convert generic global variables into either .global or .const access based
+// on the variable's "constant" qualifier.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
+
+#include "llvm/PassManager.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/IRBuilder.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeGenericToNVVMPass(PassRegistry &);
+}
+
+namespace {
+class GenericToNVVM : public ModulePass {
+public:
+  static char ID;
+
+  GenericToNVVM() : ModulePass(ID) {}
+
+  virtual bool runOnModule(Module &M);
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+  }
+
+private:
+  Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV,
+                         IRBuilder<> &Builder);
+  Value *remapConstant(Module *M, Function *F, Constant *C,
+                       IRBuilder<> &Builder);
+  Value *remapConstantVectorOrConstantAggregate(Module *M, Function *F,
+                                                Constant *C,
+                                                IRBuilder<> &Builder);
+  Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
+                           IRBuilder<> &Builder);
+  void remapNamedMDNode(Module *M, NamedMDNode *N);
+  MDNode *remapMDNode(Module *M, MDNode *N);
+
+  typedef ValueMap<GlobalVariable *, GlobalVariable *> GVMapTy;
+  typedef ValueMap<Constant *, Value *> ConstantToValueMapTy;
+  GVMapTy GVMap;
+  ConstantToValueMapTy ConstantToValueMap;
+};
+}
+
+char GenericToNVVM::ID = 0;
+
+ModulePass *llvm::createGenericToNVVMPass() { return new GenericToNVVM(); }
+
+INITIALIZE_PASS(
+    GenericToNVVM, "generic-to-nvvm",
+    "Ensure that the global variables are in the global address space", false,
+    false)
+
+bool GenericToNVVM::runOnModule(Module &M) {
+  // Create a clone of each global variable that has the default address space.
+  // The clone is created with the global address space  specifier, and the pair
+  // of original global variable and its clone is placed in the GVMap for later
+  // use.
+
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E;) {
+    GlobalVariable *GV = I++;
+    if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
+        !llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
+        !GV->getName().startswith("llvm.")) {
+      GlobalVariable *NewGV = new GlobalVariable(
+          M, GV->getType()->getElementType(), GV->isConstant(),
+          GV->getLinkage(), GV->hasInitializer() ? GV->getInitializer() : NULL,
+          "", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);
+      NewGV->copyAttributesFrom(GV);
+      GVMap[GV] = NewGV;
+    }
+  }
+
+  // Return immediately, if every global variable has a specific address space
+  // specifier.
+  if (GVMap.empty()) {
+    return false;
+  }
+
+  // Walk through the instructions in function defitinions, and replace any use
+  // of original global variables in GVMap with a use of the corresponding
+  // copies in GVMap.  If necessary, promote constants to instructions.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    if (I->isDeclaration()) {
+      continue;
+    }
+    IRBuilder<> Builder(I->getEntryBlock().getFirstNonPHIOrDbg());
+    for (Function::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE;
+         ++BBI) {
+      for (BasicBlock::iterator II = BBI->begin(), IE = BBI->end(); II != IE;
+           ++II) {
+        for (unsigned i = 0, e = II->getNumOperands(); i < e; ++i) {
+          Value *Operand = II->getOperand(i);
+          if (isa<Constant>(Operand)) {
+            II->setOperand(
+                i, remapConstant(&M, I, cast<Constant>(Operand), Builder));
+          }
+        }
+      }
+    }
+    ConstantToValueMap.clear();
+  }
+
+  // Walk through the metadata section and update the debug information
+  // associated with the global variables in the default address space.
+  for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+                                       E = M.named_metadata_end();
+       I != E; I++) {
+    remapNamedMDNode(&M, I);
+  }
+
+  // Walk through the global variable  initializers, and replace any use of
+  // original global variables in GVMap with a use of the corresponding copies
+  // in GVMap.  The copies need to be bitcast to the original global variable
+  // types, as we cannot use cvta in global variable initializers.
+  for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) {
+    GlobalVariable *GV = I->first;
+    GlobalVariable *NewGV = I->second;
+    ++I;
+    Constant *BitCastNewGV = ConstantExpr::getBitCast(NewGV, GV->getType());
+    // At this point, the remaining uses of GV should be found only in global
+    // variable initializers, as other uses have been already been removed
+    // while walking through the instructions in function definitions.
+    for (Value::use_iterator UI = GV->use_begin(), UE = GV->use_end();
+         UI != UE;) {
+      Use &U = (UI++).getUse();
+      U.set(BitCastNewGV);
+    }
+    std::string Name = GV->getName();
+    GV->removeDeadConstantUsers();
+    GV->eraseFromParent();
+    NewGV->setName(Name);
+  }
+  GVMap.clear();
+
+  return true;
+}
+
+Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F,
+                                      GlobalVariable *GV,
+                                      IRBuilder<> &Builder) {
+  PointerType *GVType = GV->getType();
+  Value *CVTA = NULL;
+
+  // See if the address space conversion requires the operand to be bitcast
+  // to i8 addrspace(n)* first.
+  EVT ExtendedGVType = EVT::getEVT(GVType->getElementType(), true);
+  if (!ExtendedGVType.isInteger() && !ExtendedGVType.isFloatingPoint()) {
+    // A bitcast to i8 addrspace(n)* on the operand is needed.
+    LLVMContext &Context = M->getContext();
+    unsigned int AddrSpace = GVType->getAddressSpace();
+    Type *DestTy = PointerType::get(Type::getInt8Ty(Context), AddrSpace);
+    CVTA = Builder.CreateBitCast(GV, DestTy, "cvta");
+    // Insert the address space conversion.
+    Type *ResultType =
+        PointerType::get(Type::getInt8Ty(Context), llvm::ADDRESS_SPACE_GENERIC);
+    SmallVector<Type *, 2> ParamTypes;
+    ParamTypes.push_back(ResultType);
+    ParamTypes.push_back(DestTy);
+    Function *CVTAFunction = Intrinsic::getDeclaration(
+        M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
+    CVTA = Builder.CreateCall(CVTAFunction, CVTA, "cvta");
+    // Another bitcast from i8 * to <the element type of GVType> * is
+    // required.
+    DestTy =
+        PointerType::get(GVType->getElementType(), llvm::ADDRESS_SPACE_GENERIC);
+    CVTA = Builder.CreateBitCast(CVTA, DestTy, "cvta");
+  } else {
+    // A simple CVTA is enough.
+    SmallVector<Type *, 2> ParamTypes;
+    ParamTypes.push_back(PointerType::get(GVType->getElementType(),
+                                          llvm::ADDRESS_SPACE_GENERIC));
+    ParamTypes.push_back(GVType);
+    Function *CVTAFunction = Intrinsic::getDeclaration(
+        M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes);
+    CVTA = Builder.CreateCall(CVTAFunction, GV, "cvta");
+  }
+
+  return CVTA;
+}
+
+Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C,
+                                    IRBuilder<> &Builder) {
+  // If the constant C has been converted already in the given function  F, just
+  // return the converted value.
+  ConstantToValueMapTy::iterator CTII = ConstantToValueMap.find(C);
+  if (CTII != ConstantToValueMap.end()) {
+    return CTII->second;
+  }
+
+  Value *NewValue = C;
+  if (isa<GlobalVariable>(C)) {
+    // If the constant C is a global variable and is found in  GVMap, generate a
+    // set set of instructions that convert the clone of C with the global
+    // address space specifier to a generic pointer.
+    // The constant C cannot be used here, as it will be erased from the
+    // module eventually.  And the clone of C with the global address space
+    // specifier cannot be used here either, as it will affect the types of
+    // other instructions in the function.  Hence, this address space conversion
+    // is required.
+    GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(C));
+    if (I != GVMap.end()) {
+      NewValue = getOrInsertCVTA(M, F, I->second, Builder);
+    }
+  } else if (isa<ConstantVector>(C) || isa<ConstantArray>(C) ||
+             isa<ConstantStruct>(C)) {
+    // If any element in the constant vector or aggregate C is or uses a global
+    // variable in GVMap, the constant C needs to be reconstructed, using a set
+    // of instructions.
+    NewValue = remapConstantVectorOrConstantAggregate(M, F, C, Builder);
+  } else if (isa<ConstantExpr>(C)) {
+    // If any operand in the constant expression C is or uses a global variable
+    // in GVMap, the constant expression C needs to be reconstructed, using a
+    // set of instructions.
+    NewValue = remapConstantExpr(M, F, cast<ConstantExpr>(C), Builder);
+  }
+
+  ConstantToValueMap[C] = NewValue;
+  return NewValue;
+}
+
+Value *GenericToNVVM::remapConstantVectorOrConstantAggregate(
+    Module *M, Function *F, Constant *C, IRBuilder<> &Builder) {
+  bool OperandChanged = false;
+  SmallVector<Value *, 4> NewOperands;
+  unsigned NumOperands = C->getNumOperands();
+
+  // Check if any element is or uses a global variable in  GVMap, and thus
+  // converted to another value.
+  for (unsigned i = 0; i < NumOperands; ++i) {
+    Value *Operand = C->getOperand(i);
+    Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder);
+    OperandChanged |= Operand != NewOperand;
+    NewOperands.push_back(NewOperand);
+  }
+
+  // If none of the elements has been modified, return C as it is.
+  if (!OperandChanged) {
+    return C;
+  }
+
+  // If any of the elements has been  modified, construct the equivalent
+  // vector or aggregate value with a set instructions and the converted
+  // elements.
+  Value *NewValue = UndefValue::get(C->getType());
+  if (isa<ConstantVector>(C)) {
+    for (unsigned i = 0; i < NumOperands; ++i) {
+      Value *Idx = ConstantInt::get(Type::getInt32Ty(M->getContext()), i);
+      NewValue = Builder.CreateInsertElement(NewValue, NewOperands[i], Idx);
+    }
+  } else {
+    for (unsigned i = 0; i < NumOperands; ++i) {
+      NewValue =
+          Builder.CreateInsertValue(NewValue, NewOperands[i], makeArrayRef(i));
+    }
+  }
+
+  return NewValue;
+}
+
+Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
+                                        IRBuilder<> &Builder) {
+  bool OperandChanged = false;
+  SmallVector<Value *, 4> NewOperands;
+  unsigned NumOperands = C->getNumOperands();
+
+  // Check if any operand is or uses a global variable in  GVMap, and thus
+  // converted to another value.
+  for (unsigned i = 0; i < NumOperands; ++i) {
+    Value *Operand = C->getOperand(i);
+    Value *NewOperand = remapConstant(M, F, cast<Constant>(Operand), Builder);
+    OperandChanged |= Operand != NewOperand;
+    NewOperands.push_back(NewOperand);
+  }
+
+  // If none of the operands has been modified, return C as it is.
+  if (!OperandChanged) {
+    return C;
+  }
+
+  // If any of the operands has been modified, construct the instruction with
+  // the converted operands.
+  unsigned Opcode = C->getOpcode();
+  switch (Opcode) {
+  case Instruction::ICmp:
+    // CompareConstantExpr (icmp)
+    return Builder.CreateICmp(CmpInst::Predicate(C->getPredicate()),
+                              NewOperands[0], NewOperands[1]);
+  case Instruction::FCmp:
+    // CompareConstantExpr (fcmp)
+    assert(false && "Address space conversion should have no effect "
+                    "on float point CompareConstantExpr (fcmp)!");
+    return C;
+  case Instruction::ExtractElement:
+    // ExtractElementConstantExpr
+    return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]);
+  case Instruction::InsertElement:
+    // InsertElementConstantExpr
+    return Builder.CreateInsertElement(NewOperands[0], NewOperands[1],
+                                       NewOperands[2]);
+  case Instruction::ShuffleVector:
+    // ShuffleVector
+    return Builder.CreateShuffleVector(NewOperands[0], NewOperands[1],
+                                       NewOperands[2]);
+  case Instruction::ExtractValue:
+    // ExtractValueConstantExpr
+    return Builder.CreateExtractValue(NewOperands[0], C->getIndices());
+  case Instruction::InsertValue:
+    // InsertValueConstantExpr
+    return Builder.CreateInsertValue(NewOperands[0], NewOperands[1],
+                                     C->getIndices());
+  case Instruction::GetElementPtr:
+    // GetElementPtrConstantExpr
+    return cast<GEPOperator>(C)->isInBounds()
+               ? Builder.CreateGEP(
+                     NewOperands[0],
+                     makeArrayRef(&NewOperands[1], NumOperands - 1))
+               : Builder.CreateInBoundsGEP(
+                     NewOperands[0],
+                     makeArrayRef(&NewOperands[1], NumOperands - 1));
+  case Instruction::Select:
+    // SelectConstantExpr
+    return Builder.CreateSelect(NewOperands[0], NewOperands[1], NewOperands[2]);
+  default:
+    // BinaryConstantExpr
+    if (Instruction::isBinaryOp(Opcode)) {
+      return Builder.CreateBinOp(Instruction::BinaryOps(C->getOpcode()),
+                                 NewOperands[0], NewOperands[1]);
+    }
+    // UnaryConstantExpr
+    if (Instruction::isCast(Opcode)) {
+      return Builder.CreateCast(Instruction::CastOps(C->getOpcode()),
+                                NewOperands[0], C->getType());
+    }
+    assert(false && "GenericToNVVM encountered an unsupported ConstantExpr");
+    return C;
+  }
+}
+
+void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) {
+
+  bool OperandChanged = false;
+  SmallVector<MDNode *, 16> NewOperands;
+  unsigned NumOperands = N->getNumOperands();
+
+  // Check if any operand is or contains a global variable in  GVMap, and thus
+  // converted to another value.
+  for (unsigned i = 0; i < NumOperands; ++i) {
+    MDNode *Operand = N->getOperand(i);
+    MDNode *NewOperand = remapMDNode(M, Operand);
+    OperandChanged |= Operand != NewOperand;
+    NewOperands.push_back(NewOperand);
+  }
+
+  // If none of the operands has been modified, return immediately.
+  if (!OperandChanged) {
+    return;
+  }
+
+  // Replace the old operands with the new operands.
+  N->dropAllReferences();
+  for (SmallVector<MDNode *, 16>::iterator I = NewOperands.begin(),
+                                           E = NewOperands.end();
+       I != E; ++I) {
+    N->addOperand(*I);
+  }
+}
+
+MDNode *GenericToNVVM::remapMDNode(Module *M, MDNode *N) {
+
+  bool OperandChanged = false;
+  SmallVector<Value *, 8> NewOperands;
+  unsigned NumOperands = N->getNumOperands();
+
+  // Check if any operand is or contains a global variable in  GVMap, and thus
+  // converted to another value.
+  for (unsigned i = 0; i < NumOperands; ++i) {
+    Value *Operand = N->getOperand(i);
+    Value *NewOperand = Operand;
+    if (Operand) {
+      if (isa<GlobalVariable>(Operand)) {
+        GVMapTy::iterator I = GVMap.find(cast<GlobalVariable>(Operand));
+        if (I != GVMap.end()) {
+          NewOperand = I->second;
+          if (++i < NumOperands) {
+            NewOperands.push_back(NewOperand);
+            // Address space of the global variable follows the global variable
+            // in the global variable debug info (see createGlobalVariable in
+            // lib/Analysis/DIBuilder.cpp).
+            NewOperand =
+                ConstantInt::get(Type::getInt32Ty(M->getContext()),
+                                 I->second->getType()->getAddressSpace());
+          }
+        }
+      } else if (isa<MDNode>(Operand)) {
+        NewOperand = remapMDNode(M, cast<MDNode>(Operand));
+      }
+    }
+    OperandChanged |= Operand != NewOperand;
+    NewOperands.push_back(NewOperand);
+  }
+
+  // If none of the operands has been modified, return N as it is.
+  if (!OperandChanged) {
+    return N;
+  }
+
+  // If any of the operands has been modified, create a new MDNode with the new
+  // operands.
+  return MDNode::get(M->getContext(), makeArrayRef(NewOperands));
+}
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index e862988..d4378c2 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -42,6 +42,11 @@ static cl::opt<int> UsePrecDivF32(
              " IEEE Compliant F32 div.rnd if avaiable."),
     cl::init(2));
 
+static cl::opt<bool>
+UsePrecSqrtF32("nvptx-prec-sqrtf32",
+          cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
+          cl::init(true));
+
 /// createNVPTXISelDag - This pass converts a legalized DAG into a
 /// NVPTX-specific DAG, ready for instruction scheduling.
 FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
@@ -74,6 +79,8 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
 
   // Decide how to translate f32 div
   do_DIVF32_PREC = UsePrecDivF32;
+  // Decide how to translate f32 sqrt
+  do_SQRTF32_PREC = UsePrecSqrtF32;
   // sm less than sm_20 does not support div.rnd. Use div.full.
   if (do_DIVF32_PREC == 2 && !Subtarget.reqPTX20())
     do_DIVF32_PREC = 1;
@@ -241,7 +248,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(fromType),
                       getI32Imm(fromTypeWidth), Addr, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
   } else if (Subtarget.is64Bit()
                  ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
                  : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
@@ -270,7 +277,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(fromType),
                       getI32Imm(fromTypeWidth), Base, Offset, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
   } else if (Subtarget.is64Bit()
                  ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
                  : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
@@ -324,7 +331,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(fromType),
                       getI32Imm(fromTypeWidth), Base, Offset, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
   } else {
     if (Subtarget.is64Bit()) {
       switch (TargetVT) {
@@ -376,7 +383,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
     SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(fromType),
                       getI32Imm(fromTypeWidth), N1, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
   }
 
   if (NVPTXLD != NULL) {
@@ -501,7 +508,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
                       getI32Imm(VecType), getI32Imm(FromType),
                       getI32Imm(FromTypeWidth), Addr, Chain };
-    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   } else if (Subtarget.is64Bit()
                  ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
                  : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
@@ -555,7 +562,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
                       getI32Imm(VecType), getI32Imm(FromType),
                       getI32Imm(FromTypeWidth), Base, Offset, Chain };
-    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   } else if (Subtarget.is64Bit()
                  ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
                  : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
@@ -659,7 +666,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
                       getI32Imm(VecType), getI32Imm(FromType),
                       getI32Imm(FromTypeWidth), Base, Offset, Chain };
 
-    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   } else {
     if (Subtarget.is64Bit()) {
       switch (N->getOpcode()) {
@@ -760,7 +767,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
     SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
                       getI32Imm(VecType), getI32Imm(FromType),
                       getI32Imm(FromTypeWidth), Op1, Chain };
-    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
+    LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
   }
 
   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
@@ -962,7 +969,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
   }
 
   SDValue Ops[] = { Op1, Chain };
-  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), &Ops[0], 2);
+  LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
 
   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
@@ -1055,7 +1062,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(toType),
                       getI32Imm(toTypeWidth), Addr, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   } else if (Subtarget.is64Bit()
                  ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
                  : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
@@ -1084,7 +1091,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(toType),
                       getI32Imm(toTypeWidth), Base, Offset, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   } else if (Subtarget.is64Bit()
                  ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
                  : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
@@ -1138,7 +1145,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(toType),
                       getI32Imm(toTypeWidth), Base, Offset, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   } else {
     if (Subtarget.is64Bit()) {
       switch (SourceVT) {
@@ -1190,7 +1197,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
     SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
                       getI32Imm(vecType), getI32Imm(toType),
                       getI32Imm(toTypeWidth), N2, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
   }
 
   if (NVPTXST != NULL) {
@@ -1569,7 +1576,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
 
   StOps.push_back(Chain);
 
-  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, &StOps[0], StOps.size());
+  ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
 
   MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
   MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 70e8e46..ed16d44 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -41,6 +41,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
   //    Otherwise, use div.full
   int do_DIVF32_PREC;
 
+  // If true, generate sqrt.rn, else generate sqrt.approx. If FTZ
+  // is true, then generate the corresponding FTZ version.
+  bool do_SQRTF32_PREC;
+
   // If true, add .ftz to f32 instructions.
   // This is only meaningful for sm_20 and later, as the default
   // is not ftz.
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index f43abe2..da6dd39 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -75,6 +75,9 @@ def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
 def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
 def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
 
+def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">;
+def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">;
+
 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
 
 def true : Predicate<"1">;
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 49e2568..24037ca 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -512,6 +512,16 @@ def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
 def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
   Float64Regs, int_nvvm_sqrt_rp_d>;
 
+// nvvm_sqrt intrinsic
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+          (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+          (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+          (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
+def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
+          (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
+
 //
 // Rsqrt
 //
@@ -1510,38 +1520,12 @@ multiclass G_TO_NG<string Str, Intrinsic Intrin> {
 defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
 defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
 defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
+defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
 
 defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
 defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
 defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
-
-def cvta_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
-               "mov.u32 \t$result, $src;",
-     [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen Int32Regs:$src))]>;
-def cvta_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
-               "mov.u64 \t$result, $src;",
-     [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen Int64Regs:$src))]>;
-
-
-
-// @TODO: Revisit this.  There is a type
-// contradiction between iPTRAny and iPTR for the def.
-/*def cvta_const_addr : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src),
-               "mov.u32 \t$result, $src;",
-     [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen
-     (Wrapper tglobaladdr:$src)))]>;
-def cvta_const_addr_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src),
-               "mov.u64 \t$result, $src;",
-     [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen
-     (Wrapper tglobaladdr:$src)))]>;*/
-
-
-def cvta_to_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
-            "mov.u32 \t$result, $src;",
-     [(set Int32Regs:$result, (int_nvvm_ptr_gen_to_constant Int32Regs:$src))]>;
-def cvta_to_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
-            "mov.u64 \t$result, $src;",
-     [(set Int64Regs:$result, (int_nvvm_ptr_gen_to_constant Int64Regs:$src))]>;
+defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
 
 
 // nvvm.ptr.gen.to.param
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index e166be5..e57ace9 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -32,7 +32,8 @@ public:
   /// Override this as NVPTX has its own way of printing switching
   /// to a section.
   virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
-                                    raw_ostream &OS) const {}
+                                    raw_ostream &OS,
+                                    const MCExpr *Subsection) const {}
 
   /// Base address of PTX sections is zero.
   virtual bool isBaseAddressKnownZero() const { return true; }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 67ca6b5..1ae2a7c 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -49,6 +49,7 @@ using namespace llvm;
 
 namespace llvm {
 void initializeNVVMReflectPass(PassRegistry&);
+void initializeGenericToNVVMPass(PassRegistry&);
 }
 
 extern "C" void LLVMInitializeNVPTXTarget() {
@@ -62,6 +63,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
   // FIXME: This pass is really intended to be invoked during IR optimization,
   // but it's very NVPTX-specific.
   initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
+  initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
 }
 
 NVPTXTargetMachine::NVPTXTargetMachine(
@@ -100,6 +102,7 @@ public:
     return getTM<NVPTXTargetMachine>();
   }
 
+  virtual void addIRPasses();
   virtual bool addInstSelector();
   virtual bool addPreRegAlloc();
 };
@@ -110,6 +113,11 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
   return PassConfig;
 }
 
+void NVPTXPassConfig::addIRPasses() {
+  TargetPassConfig::addIRPasses();
+  addPass(createGenericToNVVMPass());
+}
+
 bool NVPTXPassConfig::addInstSelector() {
   addPass(createLowerAggrCopies());
   addPass(createSplitBBatBarPass());
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index 0ad62ce..3cc324b 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -14,6 +14,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "NVPTX.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
@@ -40,7 +41,7 @@ using namespace llvm;
 namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
 
 namespace {
-class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass {
+class NVVMReflect : public ModulePass {
 private:
   StringMap<int> VarMap;
   typedef DenseMap<std::string, int>::iterator VarMapIter;
@@ -48,9 +49,18 @@ private:
 
 public:
   static char ID;
-  NVVMReflect() : ModulePass(ID) {
+  NVVMReflect() : ModulePass(ID), ReflectFunction(0) {
+    initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
     VarMap.clear();
-    ReflectFunction = 0;
+  }
+
+  NVVMReflect(const StringMap<int> &Mapping)
+  : ModulePass(ID), ReflectFunction(0) {
+    initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
+    for (StringMap<int>::const_iterator I = Mapping.begin(), E = Mapping.end();
+         I != E; ++I) {
+      VarMap[(*I).getKey()] = (*I).getValue();
+    }
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); }
@@ -60,6 +70,14 @@ public:
 };
 }
 
+ModulePass *llvm::createNVVMReflectPass() {
+  return new NVVMReflect();
+}
+
+ModulePass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping) {
+  return new NVVMReflect(Mapping);
+}
+
 static cl::opt<bool>
 NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true),
                    cl::desc("NVVM reflection, enabled by default"));
diff --git a/lib/Target/PowerPC/AsmParser/CMakeLists.txt b/lib/Target/PowerPC/AsmParser/CMakeLists.txt
new file mode 100644
index 0000000..3aa59c0
--- /dev/null
+++ b/lib/Target/PowerPC/AsmParser/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPowerPCAsmParser
+  PPCAsmParser.cpp
+  )
+
+add_dependencies(LLVMPowerPCAsmParser PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
new file mode 100644
index 0000000..bd08c13
--- /dev/null
+++ b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt --------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PowerPCAsmParser
+parent = PowerPC
+required_libraries = PowerPCInfo MC MCParser Support
+add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/AsmParser/Makefile b/lib/Target/PowerPC/AsmParser/Makefile
new file mode 100644
index 0000000..c8a8915
--- /dev/null
+++ b/lib/Target/PowerPC/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/PowerPC/AsmParser/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCAsmParser
+
+# Hack: we need to include 'main' PowerPC target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
new file mode 100644
index 0000000..f2cb8b8
--- /dev/null
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -0,0 +1,739 @@
+//===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+static unsigned RRegs[32] = {
+  PPC::R0,  PPC::R1,  PPC::R2,  PPC::R3,
+  PPC::R4,  PPC::R5,  PPC::R6,  PPC::R7,
+  PPC::R8,  PPC::R9,  PPC::R10, PPC::R11,
+  PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+  PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+  PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+  PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+  PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+static unsigned RRegsNoR0[32] = {
+  PPC::ZERO,
+            PPC::R1,  PPC::R2,  PPC::R3,
+  PPC::R4,  PPC::R5,  PPC::R6,  PPC::R7,
+  PPC::R8,  PPC::R9,  PPC::R10, PPC::R11,
+  PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+  PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+  PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+  PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+  PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+static unsigned XRegs[32] = {
+  PPC::X0,  PPC::X1,  PPC::X2,  PPC::X3,
+  PPC::X4,  PPC::X5,  PPC::X6,  PPC::X7,
+  PPC::X8,  PPC::X9,  PPC::X10, PPC::X11,
+  PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+  PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+  PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+  PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+  PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+static unsigned XRegsNoX0[32] = {
+  PPC::ZERO8,
+            PPC::X1,  PPC::X2,  PPC::X3,
+  PPC::X4,  PPC::X5,  PPC::X6,  PPC::X7,
+  PPC::X8,  PPC::X9,  PPC::X10, PPC::X11,
+  PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+  PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+  PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+  PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+  PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+static unsigned FRegs[32] = {
+  PPC::F0,  PPC::F1,  PPC::F2,  PPC::F3,
+  PPC::F4,  PPC::F5,  PPC::F6,  PPC::F7,
+  PPC::F8,  PPC::F9,  PPC::F10, PPC::F11,
+  PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+  PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+  PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+  PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+  PPC::F28, PPC::F29, PPC::F30, PPC::F31
+};
+static unsigned VRegs[32] = {
+  PPC::V0,  PPC::V1,  PPC::V2,  PPC::V3,
+  PPC::V4,  PPC::V5,  PPC::V6,  PPC::V7,
+  PPC::V8,  PPC::V9,  PPC::V10, PPC::V11,
+  PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+  PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+  PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+  PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+  PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+static unsigned CRBITRegs[32] = {
+  PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
+  PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
+  PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+  PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+  PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+  PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
+  PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
+  PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
+};
+static unsigned CRRegs[8] = {
+  PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
+  PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
+};
+
+struct PPCOperand;
+
+class PPCAsmParser : public MCTargetAsmParser {
+  MCSubtargetInfo &STI;
+  MCAsmParser &Parser;
+  bool IsPPC64;
+
+  MCAsmParser &getParser() const { return Parser; }
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+  bool isPPC64() const { return IsPPC64; }
+
+  bool MatchRegisterName(const AsmToken &Tok,
+                         unsigned &RegNo, int64_t &IntVal);
+
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+  bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+  bool ParseDirectiveTC(unsigned Size, SMLoc L);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out, unsigned &ErrorInfo,
+                               bool MatchingInlineAsm);
+
+  void ProcessInstruction(MCInst &Inst,
+                          const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+
+  /// @name Auto-generated Match Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "PPCGenAsmMatcher.inc"
+
+  /// }
+
+
+public:
+  PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+    : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+    // Check for 64-bit vs. 32-bit pointer mode.
+    Triple TheTriple(STI.getTargetTriple());
+    IsPPC64 = TheTriple.getArch() == Triple::ppc64;
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+
+  virtual bool ParseInstruction(ParseInstructionInfo &Info,
+                                StringRef Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  virtual bool ParseDirective(AsmToken DirectiveID);
+};
+
+/// PPCOperand - Instances of this class represent a parsed PowerPC machine
+/// instruction.
+struct PPCOperand : public MCParsedAsmOperand {
+  enum KindTy {
+    Token,
+    Immediate,
+    Expression
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+  bool IsPPC64;
+
+  struct TokOp {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct ImmOp {
+    int64_t Val;
+  };
+
+  struct ExprOp {
+    const MCExpr *Val;
+  };
+
+  union {
+    struct TokOp Tok;
+    struct ImmOp Imm;
+    struct ExprOp Expr;
+  };
+
+  PPCOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+  PPCOperand(const PPCOperand &o) : MCParsedAsmOperand() {
+    Kind = o.Kind;
+    StartLoc = o.StartLoc;
+    EndLoc = o.EndLoc;
+    IsPPC64 = o.IsPPC64;
+    switch (Kind) {
+    case Token:
+      Tok = o.Tok;
+      break;
+    case Immediate:
+      Imm = o.Imm;
+      break;
+    case Expression:
+      Expr = o.Expr;
+      break;
+    }
+  }
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
+
+  /// isPPC64 - True if this operand is for an instruction in 64-bit mode.
+  bool isPPC64() const { return IsPPC64; }
+
+  int64_t getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  const MCExpr *getExpr() const {
+    assert(Kind == Expression && "Invalid access!");
+    return Expr.Val;
+  }
+
+  unsigned getReg() const {
+    assert(isRegNumber() && "Invalid access!");
+    return (unsigned) Imm.Val;
+  }
+
+  unsigned getCCReg() const {
+    assert(isCCRegNumber() && "Invalid access!");
+    return (unsigned) Imm.Val;
+  }
+
+  unsigned getCRBitMask() const {
+    assert(isCRBitMask() && "Invalid access!");
+    return 7 - CountTrailingZeros_32(Imm.Val);
+  }
+
+  bool isToken() const { return Kind == Token; }
+  bool isImm() const { return Kind == Immediate || Kind == Expression; }
+  bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
+  bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
+  bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
+  bool isU16Imm() const { return Kind == Expression ||
+                                 (Kind == Immediate && isUInt<16>(getImm())); }
+  bool isS16Imm() const { return Kind == Expression ||
+                                 (Kind == Immediate && isInt<16>(getImm())); }
+  bool isS16ImmX4() const { return Kind == Expression ||
+                                   (Kind == Immediate && isInt<16>(getImm()) &&
+                                    (getImm() & 3) == 0); }
+  bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
+  bool isCCRegNumber() const { return Kind == Immediate &&
+                                      isUInt<3>(getImm()); }
+  bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) &&
+                                    isPowerOf2_32(getImm()); }
+  bool isMem() const { return false; }
+  bool isReg() const { return false; }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    llvm_unreachable("addRegOperands");
+  }
+
+  void addRegGPRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(RRegs[getReg()]));
+  }
+
+  void addRegGPRCNoR0Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(RRegsNoR0[getReg()]));
+  }
+
+  void addRegG8RCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(XRegs[getReg()]));
+  }
+
+  void addRegG8RCNoX0Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(XRegsNoX0[getReg()]));
+  }
+
+  void addRegGxRCOperands(MCInst &Inst, unsigned N) const {
+    if (isPPC64())
+      addRegG8RCOperands(Inst, N);
+    else
+      addRegGPRCOperands(Inst, N);
+  }
+
+  void addRegGxRCNoR0Operands(MCInst &Inst, unsigned N) const {
+    if (isPPC64())
+      addRegG8RCNoX0Operands(Inst, N);
+    else
+      addRegGPRCNoR0Operands(Inst, N);
+  }
+
+  void addRegF4RCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+  }
+
+  void addRegF8RCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+  }
+
+  void addRegVRRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()]));
+  }
+
+  void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getReg()]));
+  }
+
+  void addRegCRRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(CRRegs[getCCReg()]));
+  }
+
+  void addCRBitMaskOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(CRRegs[getCRBitMask()]));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    if (Kind == Immediate)
+      Inst.addOperand(MCOperand::CreateImm(getImm()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+  }
+
+  void addDispRIOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    if (Kind == Immediate)
+      Inst.addOperand(MCOperand::CreateImm(getImm()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+  }
+
+  void addDispRIXOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    if (Kind == Immediate)
+      Inst.addOperand(MCOperand::CreateImm(getImm() / 4));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+  }
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  virtual void print(raw_ostream &OS) const;
+
+  static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) {
+    PPCOperand *Op = new PPCOperand(Token);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    Op->IsPPC64 = IsPPC64;
+    return Op;
+  }
+
+  static PPCOperand *CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
+    PPCOperand *Op = new PPCOperand(Immediate);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    Op->IsPPC64 = IsPPC64;
+    return Op;
+  }
+
+  static PPCOperand *CreateExpr(const MCExpr *Val,
+                                SMLoc S, SMLoc E, bool IsPPC64) {
+    PPCOperand *Op = new PPCOperand(Expression);
+    Op->Expr.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    Op->IsPPC64 = IsPPC64;
+    return Op;
+  }
+};
+
+} // end anonymous namespace.
+
+void PPCOperand::print(raw_ostream &OS) const {
+  switch (Kind) {
+  case Token:
+    OS << "'" << getToken() << "'";
+    break;
+  case Immediate:
+    OS << getImm();
+    break;
+  case Expression:
+    getExpr()->print(OS);
+    break;
+  }
+}
+
+
+void PPCAsmParser::
+ProcessInstruction(MCInst &Inst,
+                   const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  switch (Inst.getOpcode()) {
+  case PPC::SLWI: {
+    MCInst TmpInst;
+    int64_t N = Inst.getOperand(2).getImm();
+    TmpInst.setOpcode(PPC::RLWINM);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(MCOperand::CreateImm(N));
+    TmpInst.addOperand(MCOperand::CreateImm(0));
+    TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::SRWI: {
+    MCInst TmpInst;
+    int64_t N = Inst.getOperand(2).getImm();
+    TmpInst.setOpcode(PPC::RLWINM);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(MCOperand::CreateImm(32 - N));
+    TmpInst.addOperand(MCOperand::CreateImm(N));
+    TmpInst.addOperand(MCOperand::CreateImm(31));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::SLDI: {
+    MCInst TmpInst;
+    int64_t N = Inst.getOperand(2).getImm();
+    TmpInst.setOpcode(PPC::RLDICR);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(MCOperand::CreateImm(N));
+    TmpInst.addOperand(MCOperand::CreateImm(63 - N));
+    Inst = TmpInst;
+    break;
+  }
+  case PPC::SRDI: {
+    MCInst TmpInst;
+    int64_t N = Inst.getOperand(2).getImm();
+    TmpInst.setOpcode(PPC::RLDICL);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    TmpInst.addOperand(MCOperand::CreateImm(64 - N));
+    TmpInst.addOperand(MCOperand::CreateImm(N));
+    Inst = TmpInst;
+    break;
+  }
+  }
+}
+
+bool PPCAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out, unsigned &ErrorInfo,
+                        bool MatchingInlineAsm) {
+  MCInst Inst;
+
+  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
+  default: break;
+  case Match_Success:
+    // Post-process instructions (typically extended mnemonics)
+    ProcessInstruction(Inst, Operands);
+    Inst.setLoc(IDLoc);
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    return Error(IDLoc, "instruction use requires an option to be enabled");
+  case Match_MnemonicFail:
+      return Error(IDLoc, "unrecognized instruction mnemonic");
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((PPCOperand*)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+  }
+
+  llvm_unreachable("Implement any new match types added!");
+}
+
+bool PPCAsmParser::
+MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
+  if (Tok.is(AsmToken::Identifier)) {
+    StringRef Name = Tok.getString();
+
+    if (Name.equals_lower("lr")) {
+      RegNo = isPPC64()? PPC::LR8 : PPC::LR;
+      IntVal = 8;
+      return false;
+    } else if (Name.equals_lower("ctr")) {
+      RegNo = isPPC64()? PPC::CTR8 : PPC::CTR;
+      IntVal = 9;
+      return false;
+    } else if (Name.substr(0, 1).equals_lower("r") &&
+               !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+      RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal];
+      return false;
+    } else if (Name.substr(0, 1).equals_lower("f") &&
+               !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+      RegNo = FRegs[IntVal];
+      return false;
+    } else if (Name.substr(0, 1).equals_lower("v") &&
+               !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+      RegNo = VRegs[IntVal];
+      return false;
+    } else if (Name.substr(0, 2).equals_lower("cr") &&
+               !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
+      RegNo = CRRegs[IntVal];
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool PPCAsmParser::
+ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+  const AsmToken &Tok = Parser.getTok();
+  StartLoc = Tok.getLoc();
+  EndLoc = Tok.getEndLoc();
+  RegNo = 0;
+  int64_t IntVal;
+
+  if (!MatchRegisterName(Tok, RegNo, IntVal)) {
+    Parser.Lex(); // Eat identifier token.
+    return false;
+  }
+
+  return Error(StartLoc, "invalid register name");
+}
+
+bool PPCAsmParser::
+ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  const MCExpr *EVal;
+  PPCOperand *Op;
+
+  // Attempt to parse the next token as an immediate
+  switch (getLexer().getKind()) {
+  // Special handling for register names.  These are interpreted
+  // as immediates corresponding to the register number.
+  case AsmToken::Percent:
+    Parser.Lex(); // Eat the '%'.
+    unsigned RegNo;
+    int64_t IntVal;
+    if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+      Parser.Lex(); // Eat the identifier token.
+      Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
+      Operands.push_back(Op);
+      return false;
+    }
+    return Error(S, "invalid register name");
+
+  // All other expressions
+  case AsmToken::LParen:
+  case AsmToken::Plus:
+  case AsmToken::Minus:
+  case AsmToken::Integer:
+  case AsmToken::Identifier:
+  case AsmToken::Dot:
+  case AsmToken::Dollar:
+    if (!getParser().parseExpression(EVal))
+      break;
+    /* fall through */
+  default:
+    return Error(S, "unknown operand");
+  }
+
+  if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(EVal))
+    Op = PPCOperand::CreateImm(CE->getValue(), S, E, isPPC64());
+  else
+    Op = PPCOperand::CreateExpr(EVal, S, E, isPPC64());
+
+  // Push the parsed operand into the list of operands
+  Operands.push_back(Op);
+
+  // Check for D-form memory operands
+  if (getLexer().is(AsmToken::LParen)) {
+    Parser.Lex(); // Eat the '('.
+    S = Parser.getTok().getLoc();
+
+    int64_t IntVal;
+    switch (getLexer().getKind()) {
+    case AsmToken::Percent:
+      Parser.Lex(); // Eat the '%'.
+      unsigned RegNo;
+      if (MatchRegisterName(Parser.getTok(), RegNo, IntVal))
+        return Error(S, "invalid register name");
+      Parser.Lex(); // Eat the identifier token.
+      break;
+
+    case AsmToken::Integer:
+      if (getParser().parseAbsoluteExpression(IntVal) ||
+          IntVal < 0 || IntVal > 31)
+        return Error(S, "invalid register number");
+      break;
+
+    default:
+      return Error(S, "invalid memory operand");
+    }
+
+    if (getLexer().isNot(AsmToken::RParen))
+      return Error(Parser.getTok().getLoc(), "missing ')'");
+    E = Parser.getTok().getLoc();
+    Parser.Lex(); // Eat the ')'.
+
+    Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
+    Operands.push_back(Op);
+  }
+
+  return false;
+}
+
+/// Parse an instruction mnemonic followed by its operands.
+bool PPCAsmParser::
+ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
+                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // The first operand is the token for the instruction name.
+  // If the instruction ends in a '.', we need to create a separate
+  // token for it, to match what TableGen is doing.
+  size_t Dot = Name.find('.');
+  StringRef Mnemonic = Name.slice(0, Dot);
+  Operands.push_back(PPCOperand::CreateToken(Mnemonic, NameLoc, isPPC64()));
+  if (Dot != StringRef::npos) {
+    SMLoc DotLoc = SMLoc::getFromPointer(NameLoc.getPointer() + Dot);
+    StringRef DotStr = Name.slice(Dot, StringRef::npos);
+    Operands.push_back(PPCOperand::CreateToken(DotStr, DotLoc, isPPC64()));
+  }
+
+  // If there are no more operands then finish
+  if (getLexer().is(AsmToken::EndOfStatement))
+    return false;
+
+  // Parse the first operand
+  if (ParseOperand(Operands))
+    return true;
+
+  while (getLexer().isNot(AsmToken::EndOfStatement) &&
+         getLexer().is(AsmToken::Comma)) {
+    // Consume the comma token
+    getLexer().Lex();
+
+    // Parse the next operand
+    if (ParseOperand(Operands))
+      return true;
+  }
+
+  return false;
+}
+
+/// ParseDirective parses the PPC specific directives
+bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".word")
+    return ParseDirectiveWord(4, DirectiveID.getLoc());
+  if (IDVal == ".tc")
+    return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
+  return true;
+}
+
+/// ParseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().parseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+
+  Parser.Lex();
+  return false;
+}
+
+/// ParseDirectiveTC
+///  ::= .tc [ symbol (, expression)* ]
+bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
+  // Skip TC symbol, which is only used with XCOFF.
+  while (getLexer().isNot(AsmToken::EndOfStatement)
+         && getLexer().isNot(AsmToken::Comma))
+    Parser.Lex();
+  if (getLexer().isNot(AsmToken::Comma))
+    return Error(L, "unexpected token in directive");
+  Parser.Lex();
+
+  // Align to word size.
+  getParser().getStreamer().EmitValueToAlignment(Size);
+
+  // Emit expressions.
+  return ParseDirectiveWord(Size, L);
+}
+
+/// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmParser() {
+  RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target);
+  RegisterMCAsmParser<PPCAsmParser> B(ThePPC64Target);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "PPCGenAsmMatcher.inc"
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 6036428..71803cd 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(LLVM_TARGET_DEFINITIONS PPC.td)
 
 tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter)
 tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
 tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)
@@ -32,6 +33,7 @@ add_llvm_target(PowerPCCodeGen
 
 add_dependencies(LLVMPowerPCCodeGen intrinsics_gen)
 
+add_subdirectory(AsmParser)
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index bacc108..93fca00 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -151,8 +151,8 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
     return printOperand(MI, OpNo, O);
 
   // Branches can take an immediate operand.  This is used by the branch
-  // selection pass to print $+8, an eight byte displacement from the PC.
-  O << "$+";
+  // selection pass to print .+8, an eight byte displacement from the PC.
+  O << ".+";
   printAbsAddrOperand(MI, OpNo, O);
 }
 
diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt
index 95fac54..7b3e843 100644
--- a/lib/Target/PowerPC/LLVMBuild.txt
+++ b/lib/Target/PowerPC/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = InstPrinter MCTargetDesc TargetInfo
+subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo
 
 [component_0]
 type = TargetGroup
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 84e4175..7a84723 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -77,6 +77,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
     case PPC::fixup_ppc_br24:
       Type = ELF::R_PPC_REL24;
       break;
+    case PPC::fixup_ppc_brcond14:
+      Type = ELF::R_PPC_REL14;
+      break;
     case FK_Data_4:
     case FK_PCRel_4:
       Type = ELF::R_PPC_REL32;
@@ -104,7 +107,8 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_PPC_DTPREL16_HA:
         Type = ELF::R_PPC64_DTPREL16_HA;
         break;
-      case MCSymbolRefExpr::VK_None:
+      case MCSymbolRefExpr::VK_PPC_GAS_HA16:
+      case MCSymbolRefExpr::VK_PPC_DARWIN_HA16:
         Type = ELF::R_PPC_ADDR16_HA;
 	break;
       case MCSymbolRefExpr::VK_PPC_TOC16_HA:
@@ -131,6 +135,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
         Type = ELF::R_PPC64_DTPREL16_LO;
         break;
       case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_PPC_ADDR16;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GAS_LO16:
+      case MCSymbolRefExpr::VK_PPC_DARWIN_LO16:
         Type = ELF::R_PPC_ADDR16_LO;
 	break;
       case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
@@ -153,6 +161,10 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_None:
         Type = ELF::R_PPC64_ADDR16_DS;
         break;
+      case MCSymbolRefExpr::VK_PPC_GAS_LO16:
+      case MCSymbolRefExpr::VK_PPC_DARWIN_LO16:
+        Type = ELF::R_PPC64_ADDR16_LO_DS;
+        break;
       case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
         Type = ELF::R_PPC64_TOC16_DS;
 	break;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index d84eb9c..853e505 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -29,3 +29,18 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
   }
   llvm_unreachable("Unknown PPC branch opcode!");
 }
+
+PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) {
+  switch (Opcode) {
+  case PPC::PRED_EQ: return PPC::PRED_EQ;
+  case PPC::PRED_NE: return PPC::PRED_NE;
+  case PPC::PRED_LT: return PPC::PRED_GT;
+  case PPC::PRED_GE: return PPC::PRED_LE;
+  case PPC::PRED_GT: return PPC::PRED_LT;
+  case PPC::PRED_LE: return PPC::PRED_GE;
+  case PPC::PRED_NU: return PPC::PRED_NU;
+  case PPC::PRED_UN: return PPC::PRED_UN;
+  }
+  llvm_unreachable("Unknown PPC branch opcode!");
+}
+
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index ad2b018..444758c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -37,6 +37,10 @@ namespace PPC {
   
   /// Invert the specified predicate.  != -> ==, < -> >=.
   Predicate InvertPredicate(Predicate Opcode);
+
+  /// Assume the condition register is set by MI(a,b), return the predicate if
+  /// we modify the instructions such that condition register is set by MI(b,a).
+  Predicate getSwappedPredicate(Predicate Opcode);
 }
 }
 
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
index 1617b26..6666694 100644
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -12,12 +12,12 @@ LIBRARYNAME = LLVMPowerPCCodeGen
 TARGET = PPC
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = PPCGenRegisterInfo.inc \
+BUILT_SOURCES = PPCGenRegisterInfo.inc PPCGenAsmMatcher.inc \
                 PPCGenAsmWriter.inc  PPCGenCodeEmitter.inc \
                 PPCGenInstrInfo.inc PPCGenDAGISel.inc \
                 PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \
                 PPCGenMCCodeEmitter.inc
 
-DIRS = InstPrinter TargetInfo MCTargetDesc
+DIRS = AsmParser InstPrinter TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 446b685..b4be51a 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -31,6 +31,7 @@ namespace llvm {
   class MCInst;
 
   FunctionPass *createPPCCTRLoops();
+  FunctionPass *createPPCEarlyReturnPass();
   FunctionPass *createPPCBranchSelectionPass();
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
   FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
@@ -40,7 +41,7 @@ namespace llvm {
 
   /// \brief Creates an PPC-specific Target Transformation Info pass.
   ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
-  
+
   namespace PPCII {
     
   /// Target Operand Flag enum.
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 3892162..eb73c67 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -95,6 +95,43 @@ def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
 // VSX          p7                 vector-scalar instruction set
 
 //===----------------------------------------------------------------------===//
+// Classes used for relation maps.
+//===----------------------------------------------------------------------===//
+// RecFormRel - Filter class used to relate non-record-form instructions with
+// their record-form variants.
+class RecFormRel;
+
+//===----------------------------------------------------------------------===//
+// Relation Map Definitions.
+//===----------------------------------------------------------------------===//
+
+def getRecordFormOpcode : InstrMapping {
+  let FilterClass = "RecFormRel";
+  // Instructions with the same BaseName and Interpretation64Bit values
+  // form a row.
+  let RowFields = ["BaseName", "Interpretation64Bit"];
+  // Instructions with the same RC value form a column.
+  let ColFields = ["RC"];
+  // The key column are the non-record-form instructions.
+  let KeyCol = ["0"];
+  // Value columns RC=1
+  let ValueCols = [["1"]];
+}
+
+def getNonRecordFormOpcode : InstrMapping {
+  let FilterClass = "RecFormRel";
+  // Instructions with the same BaseName and Interpretation64Bit values
+  // form a row.
+  let RowFields = ["BaseName", "Interpretation64Bit"];
+  // Instructions with the same RC value form a column.
+  let ColFields = ["RC"];
+  // The key column are the record-form instructions.
+  let KeyCol = ["1"];
+  // Value columns are RC=0
+  let ValueCols = [["0"]];
+}
+
+//===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
 
@@ -216,7 +253,6 @@ def : ProcessorModel<"ppc64", G5Model,
                    FeatureFRSQRTE, FeatureSTFIWX,
                    Feature64Bit /*, Feature64BitRegs */]>;
 
-
 //===----------------------------------------------------------------------===//
 // Calling Conventions
 //===----------------------------------------------------------------------===//
@@ -232,9 +268,14 @@ def PPCAsmWriter : AsmWriter {
   bit isMCAsmWriter = 1;
 }
 
+def PPCAsmParser : AsmParser {
+  let ShouldEmitMatchRegisterName = 0;
+}
+
 def PPC : Target {
   // Information about the instructions.
   let InstructionSet = PPCInstrInfo;
   
   let AssemblyWriters = [PPCAsmWriter];
+  let AssemblyParsers = [PPCAsmParser];
 }
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 96a9f0a..3c7cc4e 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -721,7 +721,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
     return AsmPrinter::EmitFunctionEntryLabel();
     
   // Emit an official procedure descriptor.
-  const MCSection *Current = OutStreamer.getCurrentSection();
+  MCSectionSubPair Current = OutStreamer.getCurrentSection();
   const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
       ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
       SectionKind::getReadOnly());
@@ -741,7 +741,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
                         8/*size*/);
   // Emit a null environment pointer.
   OutStreamer.EmitIntValue(0, 8 /* size */);
-  OutStreamer.SwitchSection(Current);
+  OutStreamer.SwitchSection(Current.first, Current.second);
 
   MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
                           ".L." + Twine(CurrentFnSym->getName()));
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index bd1c378..3e608ca 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -112,15 +112,21 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
       unsigned MBBStartOffset = 0;
       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
            I != E; ++I) {
-        if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) {
+        MachineBasicBlock *Dest = 0;
+        if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())
+          Dest = I->getOperand(2).getMBB();
+        else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ ||
+                  I->getOpcode() == PPC::BDZ8  || I->getOpcode() == PPC::BDZ) &&
+                 !I->getOperand(0).isImm())
+          Dest = I->getOperand(0).getMBB();
+
+        if (!Dest) {
           MBBStartOffset += TII->GetInstSizeInBytes(I);
           continue;
         }
         
         // Determine the offset from the current branch to the destination
         // block.
-        MachineBasicBlock *Dest = I->getOperand(2).getMBB();
-        
         int BranchSize;
         if (Dest->getNumber() <= MBB.getNumber()) {
           // If this is a backwards branch, the delta is the offset from the
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 3244b90..c845909 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -223,9 +223,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
 
   // If we are a leaf function, and use up to 224 bytes of stack space,
   // don't have a frame pointer, calls, or dynamic alloca then we do not need
-  // to adjust the stack pointer (we fit in the Red Zone).  For 64-bit
-  // SVR4, we also require a stack frame if we need to spill the CR,
-  // since this spill area is addressed relative to the stack pointer.
+  // to adjust the stack pointer (we fit in the Red Zone).
   // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
   // stackless code if all local vars are reg-allocated.
   bool DisableRedZone = MF.getFunction()->getAttributes().
@@ -237,9 +235,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
       FrameSize <= 224 &&                          // Fits in red zone.
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
       !MFI->adjustsStack() &&                      // No calls.
-      !(Subtarget.isPPC64() &&                     // No 64-bit SVR4 CRsave.
-	Subtarget.isSVR4ABI()
-	&& spillsCR(MF)) &&
       (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
     // No need for frame
     if (UpdateMF)
@@ -373,6 +368,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
   // Check if the link register (LR) must be saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool MustSaveLR = FI->mustSaveLR();
+  const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs();
   // Do we have a frame pointer for this function?
   bool HasFP = hasFP(MF);
 
@@ -394,6 +390,13 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
 
+    if (!MustSaveCRs.empty()) {
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), PPC::X12);
+      for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+        MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
+    }
+
     if (HasFP)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
         .addReg(PPC::X31)
@@ -405,6 +408,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
         .addReg(PPC::X0)
         .addImm(LROffset / 4)
         .addReg(PPC::X1);
+
+    if (!MustSaveCRs.empty())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
+        .addReg(PPC::X12, getKillRegState(true))
+        .addImm(8)
+        .addReg(PPC::X1);
   } else {
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
@@ -417,6 +426,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
         .addImm(FPOffset)
         .addReg(PPC::R1);
 
+    assert(MustSaveCRs.empty() &&
+           "Prologue CR saving supported only in 64-bit mode");
+
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
         .addReg(PPC::R0)
@@ -580,7 +592,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
       // spilled CRs.
       if (Subtarget.isSVR4ABI()
 	  && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
-	  && !spillsCR(MF))
+	  && MustSaveCRs.empty())
 	continue;
 
       // For 64-bit SVR4 when we have spilled CRs, the spill location
@@ -636,6 +648,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   // Check if the link register (LR) has been saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool MustSaveLR = FI->mustSaveLR();
+  const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs();
   // Do we have a frame pointer for this function?
   bool HasFP = hasFP(MF);
 
@@ -736,10 +749,19 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
         .addImm(LROffset/4).addReg(PPC::X1);
 
+    if (!MustSaveCRs.empty())
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), PPC::X12)
+        .addImm(8).addReg(PPC::X1);
+
     if (HasFP)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
         .addImm(FPOffset/4).addReg(PPC::X1);
 
+    if (!MustSaveCRs.empty())
+      for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::MTCRF8), MustSaveCRs[i])
+          .addReg(PPC::X12, getKillRegState(i == e-1));
+
     if (MustSaveLR)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
   } else {
@@ -747,6 +769,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
           .addImm(LROffset).addReg(PPC::R1);
 
+    assert(MustSaveCRs.empty() &&
+           "Epilogue CR restoring supported only in 64-bit mode");
+
     if (HasFP)
       BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
           .addImm(FPOffset).addReg(PPC::R1);
@@ -1122,44 +1147,42 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
     *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
   DebugLoc DL;
   bool CRSpilled = false;
+  MachineInstrBuilder CRMIB;
   
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
     // CR2 through CR4 are the nonvolatile CR fields.
     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
 
-    if (CRSpilled && IsCRField)
-      continue;
-
     // Add the callee-saved register as live-in; it's killed at the spill.
     MBB.addLiveIn(Reg);
 
+    if (CRSpilled && IsCRField) {
+      CRMIB.addReg(Reg, RegState::ImplicitKill);
+      continue;
+    }
+
     // Insert the spill to the stack frame.
     if (IsCRField) {
-      CRSpilled = true;
-      // The first time we see a CR field, store the whole CR into the
-      // save slot via GPR12 (available in the prolog for 32- and 64-bit).
+      PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
       if (Subtarget.isPPC64()) {
-	// 64-bit:  SP+8
-	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR8), PPC::X12));
-	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW8))
-			       .addReg(PPC::X12,
-				       getKillRegState(true))
-			       .addImm(8)
-			       .addReg(PPC::X1));
+        // The actual spill will happen at the start of the prologue.
+        FuncInfo->addMustSaveCR(Reg);
       } else {
+        CRSpilled = true;
+        FuncInfo->setSpillsCR();
+
 	// 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
 	// the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
-	MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12));
+	CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
+                  .addReg(Reg, RegState::ImplicitKill);
+
+	MBB.insert(MI, CRMIB);
 	MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
 					 .addReg(PPC::R12,
 						 getKillRegState(true)),
 					 CSI[i].getFrameIdx()));
       }
-      
-      // Record that we spill the CR in this function.
-      PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
-      FuncInfo->setSpillsCR();
     } else {
       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
       TII.storeRegToStackSlot(MBB, MI, Reg, true,
@@ -1170,7 +1193,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 }
 
 static void
-restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
+restoreCRs(bool isPPC64, bool is31,
+           bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
 	   MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
 	   const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
 
@@ -1180,14 +1204,10 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
   DebugLoc DL;
   unsigned RestoreOp, MoveReg;
 
-  if (isPPC64) {
-    // 64-bit:  SP+8
-    MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ8), PPC::X12)
-	       .addImm(8)
-	       .addReg(PPC::X1));
-    RestoreOp = PPC::MTCRF8;
-    MoveReg = PPC::X12;
-  } else {
+  if (isPPC64)
+    // This is handled during epilogue generation.
+    return;
+  else {
     // 32-bit:  FP-relative
     MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
 					     PPC::R12),
@@ -1297,7 +1317,9 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
       // least one CR register, restore all spilled CRs together.
       if ((CR2Spilled || CR3Spilled || CR4Spilled)
 	  && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
-	restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
+        bool is31 = needsFP(*MF);
+        restoreCRs(Subtarget.isPPC64(), is31,
+                   CR2Spilled, CR3Spilled, CR4Spilled,
 		   MBB, I, CSI, CSIIndex);
 	CR2Spilled = CR3Spilled = CR4Spilled = false;
       }
@@ -1320,9 +1342,11 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   }
 
   // If we haven't yet spilled the CRs, do so now.
-  if (CR2Spilled || CR3Spilled || CR4Spilled)
-    restoreCRs(Subtarget.isPPC64(), CR2Spilled, CR3Spilled, CR4Spilled,
+  if (CR2Spilled || CR3Spilled || CR4Spilled) {
+    bool is31 = needsFP(*MF); 
+    restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
 	       MBB, I, CSI, CSIIndex);
+  }
 
   return true;
 }
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 95efc11..aed0fbb 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -457,7 +457,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
       SH &= 31;
       SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
                           getI32Imm(ME) };
-      return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+      return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
     }
   }
   return 0;
@@ -780,7 +780,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
       }
       case ISD::SETGT: {
         SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
-        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
+        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
                      0);
         return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
                                     getI32Imm(1));
@@ -873,7 +873,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
 
   // Get the specified bit.
   SDValue Tmp =
-    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
   if (Inv) {
     assert(OtherCondIdx == -1 && "Can't have split plus negation");
     return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
@@ -885,7 +885,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   // Get the other bit of the comparison.
   Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
   SDValue OtherCond =
-    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
 
   return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
 }
@@ -1079,7 +1079,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       SDValue Ops[] = { Offset, Base, Chain };
       return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
                                     PPCLowering.getPointerTy(),
-                                    MVT::Other, Ops, 3);
+                                    MVT::Other, Ops);
     } else {
       unsigned Opcode;
       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
@@ -1114,7 +1114,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       SDValue Ops[] = { Base, Offset, Chain };
       return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
                                     PPCLowering.getPointerTy(),
-                                    MVT::Other, Ops, 3);
+                                    MVT::Other, Ops);
     }
   }
 
@@ -1163,7 +1163,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
         SDValue Ops[] = { N->getOperand(0).getOperand(0),
                             N->getOperand(0).getOperand(1),
                             getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
-        return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+        return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
       }
     }
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 16fc8a0..3fcafdc 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -71,6 +71,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
   const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
   PPCRegInfo = TM.getRegisterInfo();
+  PPCII = TM.getInstrInfo();
 
   setPow2DivIsCheap();
 
@@ -513,7 +514,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
 
   setBooleanContents(ZeroOrOneBooleanContent);
-  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+  // Altivec instructions set fields to all zeros or all ones.
+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
   if (isPPC64) {
     setStackPointerRegisterToSaveRestore(PPC::X1);
@@ -4672,10 +4674,14 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
       !Op.getOperand(2).getValueType().isFloatingPoint())
     return Op;
 
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  // We might be able to do better than this under some circumstances, but in
+  // general, fsel-based lowering of select is a finite-math-only optimization.
+  // For more information, see section F.3 of the 2.06 ISA specification.
+  if (!DAG.getTarget().Options.NoInfsFPMath ||
+      !DAG.getTarget().Options.NoNaNsFPMath)
+    return Op;
 
-  // Cannot handle SETEQ/SETNE.
-  if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
 
   EVT ResVT = Op.getValueType();
   EVT CmpVT = Op.getOperand(0).getValueType();
@@ -4685,9 +4691,20 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
 
   // If the RHS of the comparison is a 0.0, we don't need to do the
   // subtraction at all.
+  SDValue Sel1;
   if (isFloatingPointZero(RHS))
     switch (CC) {
     default: break;       // SETUO etc aren't handled by fsel.
+    case ISD::SETNE:
+      std::swap(TV, FV);
+    case ISD::SETEQ:
+      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+      Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+      if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
     case ISD::SETULT:
     case ISD::SETLT:
       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
@@ -4710,30 +4727,41 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Cmp;
   switch (CC) {
   default: break;       // SETUO etc aren't handled by fsel.
+  case ISD::SETNE:
+    std::swap(TV, FV);
+  case ISD::SETEQ:
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+    Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+    if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+                       DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
   case ISD::SETULT:
   case ISD::SETLT:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   case ISD::SETOGE:
   case ISD::SETGE:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
   case ISD::SETUGT:
   case ISD::SETGT:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   case ISD::SETOLE:
   case ISD::SETLE:
     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
   }
   return Op;
 }
@@ -6239,29 +6267,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
   if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
                                  MI->getOpcode() == PPC::SELECT_CC_I8)) {
-    unsigned OpCode = MI->getOpcode() == PPC::SELECT_CC_I8 ?
-                                         PPC::ISEL8 : PPC::ISEL;
-    unsigned SelectPred = MI->getOperand(4).getImm();
-    DebugLoc dl = MI->getDebugLoc();
+    SmallVector<MachineOperand, 2> Cond;
+    Cond.push_back(MI->getOperand(4));
+    Cond.push_back(MI->getOperand(1));
 
-    unsigned SubIdx;
-    bool SwapOps;
-    switch (SelectPred) {
-    default: llvm_unreachable("invalid predicate for isel");
-    case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
-    case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
-    case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
-    case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
-    case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
-    case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
-    case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
-    case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
-    }
-
-    BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg())
-      .addReg(MI->getOperand(SwapOps? 3 : 2).getReg())
-      .addReg(MI->getOperand(SwapOps? 2 : 3).getReg())
-      .addReg(MI->getOperand(1).getReg(), 0, SubIdx);
+    DebugLoc dl = MI->getDebugLoc();
+    PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond,
+                        MI->getOperand(2).getReg(), MI->getOperand(3).getReg());
   } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
              MI->getOpcode() == PPC::SELECT_CC_I8 ||
              MI->getOpcode() == PPC::SELECT_CC_F4 ||
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 7157b70..423e983 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -16,6 +16,7 @@
 #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
 
 #include "PPC.h"
+#include "PPCInstrInfo.h"
 #include "PPCRegisterInfo.h"
 #include "PPCSubtarget.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -327,6 +328,7 @@ namespace llvm {
   class PPCTargetLowering : public TargetLowering {
     const PPCSubtarget &PPCSubTarget;
     const PPCRegisterInfo *PPCRegInfo;
+    const PPCInstrInfo *PPCII;
 
   public:
     explicit PPCTargetLowering(PPCTargetMachine &TM);
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index fa5b65f..bff4c23 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -17,17 +17,21 @@
 //
 def s16imm64 : Operand<i64> {
   let PrintMethod = "printS16ImmOperand";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
 }
 def u16imm64 : Operand<i64> {
   let PrintMethod = "printU16ImmOperand";
+  let ParserMatchClass = PPCU16ImmAsmOperand;
 }
 def symbolHi64 : Operand<i64> {
   let PrintMethod = "printSymbolHi";
   let EncoderMethod = "getHA16Encoding";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
 }
 def symbolLo64 : Operand<i64> {
   let PrintMethod = "printSymbolLo";
   let EncoderMethod = "getLO16Encoding";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
 }
 def tocentry : Operand<iPTR> {
   let MIOperandInfo = (ops i64imm:$imm);
@@ -66,10 +70,17 @@ def HI48_64 : SDNodeXForm<imm, [{
 // Calls.
 //
 
+let Interpretation64Bit = 1 in {
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
-  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in
+  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in {
     def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
         Requires<[In64BitMode]>;
+
+    let isCodeGenOnly = 1 in
+    def BCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+                             "b${cond:cc}ctr ${cond:reg}", BrB, []>,
+        Requires<[In64BitMode]>;
+  }
 }
 
 let Defs = [LR8] in
@@ -83,8 +94,17 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
     def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
                         "bdnz $dst">;
   }
+
+  let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in {
+    def BDZLR8  : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+                              "bdzlr", BrB, []>;
+    def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+                              "bdnzlr", BrB, []>;
+  }
 }
 
+
+
 let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
@@ -116,9 +136,14 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
     def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
                               "bctrl", BrB, [(PPCbctrl)]>,
                  Requires<[In64BitMode]>;
+
+    let isCodeGenOnly = 1 in
+    def BCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+                              "b${cond:cc}ctrl ${cond:reg}", BrB, []>,
+        Requires<[In64BitMode]>;
   }
 }
-
+} // Interpretation64Bit
 
 // Calls
 def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
@@ -135,45 +160,46 @@ def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
 let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64",
       [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_SUB_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64",
       [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_OR_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64",
       [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_XOR_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64",
       [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_AND_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64",
       [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
     def ATOMIC_LOAD_NAND_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
       [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
 
     def ATOMIC_CMP_SWAP_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
       [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
 
     def ATOMIC_SWAP_I64 : Pseudo<
-      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64",
+      (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64",
       [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
   }
 }
 
 // Instructions to support atomic operations
-def LDARX : XForm_1<31,  84, (outs G8RC:$rD), (ins memrr:$ptr),
+def LDARX : XForm_1<31,  84, (outs g8rc:$rD), (ins memrr:$ptr),
                    "ldarx $rD, $ptr", LdStLDARX,
                    [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
 
 let Defs = [CR0] in
-def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
+def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
                    "stdcx. $rS, $dst", LdStSTDCX,
                    [(PPCstcx i64:$rS, xoaddr:$dst)]>,
                    isDOT;
 
+let Interpretation64Bit = 1 in {
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
 def TCRETURNdi8 :Pseudo< (outs),
                         (ins calltarget:$dst, i32imm:$offset),
@@ -212,6 +238,7 @@ def TAILBA8   : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
                   []>;
 
 }
+} // Interpretation64Bit
 
 def : Pat<(PPCtc_return (i64 tglobaladdr:$dst),  imm:$imm),
           (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
@@ -224,21 +251,25 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
 
 
 // 64-bit CR instructions
-def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
+let Interpretation64Bit = 1 in {
+let neverHasSideEffects = 1 in {
+def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins g8rc:$rS),
                       "mtcrf $FXM, $rS", BrMCRX>,
             PPC970_MicroCode, PPC970_Unit_CRU;
 
 let isCodeGenOnly = 1 in
-def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
+def MFCR8pseud: XFXForm_3<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
                        "#MFCR8pseud", SprMFCR>,
             PPC970_MicroCode, PPC970_Unit_CRU;
-            
-def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
+} // neverHasSideEffects = 1
+
+let neverHasSideEffects = 1 in
+def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
                      "mfcr $rT", SprMFCR>,
                      PPC970_MicroCode, PPC970_Unit_CRU;
 
 let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
-  def EH_SjLj_SetJmp64  : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+  def EH_SjLj_SetJmp64  : Pseudo<(outs gprc:$dst), (ins memr:$buf),
                             "#EH_SJLJ_SETJMP64",
                             [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
                           Requires<[In64BitMode]>;
@@ -253,18 +284,18 @@ let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
 // 64-bit SPR manipulation instrs.
 
 let Uses = [CTR8] in {
-def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$rT), (ins),
                            "mfctr $rT", SprMFSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
-def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
+def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
                            "mtctr $rS", SprMTSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 
 let Pattern = [(set i64:$rT, readcyclecounter)] in
-def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
+def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
                           "mfspr $rT, 268", SprMFTB>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 // Note that encoding mftb using mfspr is now the preferred form,
@@ -273,252 +304,265 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
 // the POWER3.
 
 let Defs = [X1], Uses = [X1] in
-def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8",
+def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
                        [(set i64:$result,
                              (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
 
 let Defs = [LR8] in {
-def MTLR8  : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
+def MTLR8  : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
                            "mtlr $rS", SprMTSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 let Uses = [LR8] in {
-def MFLR8  : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
+def MFLR8  : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
                            "mflr $rT", SprMFSPR>,
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
+} // Interpretation64Bit
 
 //===----------------------------------------------------------------------===//
 // Fixed point instructions.
 //
 
 let PPC970_Unit = 1 in {  // FXU Operations.
+let Interpretation64Bit = 1 in {
+let neverHasSideEffects = 1 in {
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
-def LI8  : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
+def LI8  : DForm_2_r0<14, (outs g8rc:$rD), (ins symbolLo64:$imm),
                       "li $rD, $imm", IntSimple,
                       [(set i64:$rD, immSExt16:$imm)]>;
-def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
+def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins symbolHi64:$imm),
                       "lis $rD, $imm", IntSimple,
                       [(set i64:$rD, imm16ShiftedSExt:$imm)]>;
 }
 
 // Logical ops.
-def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "nand $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
-def AND8 : XForm_6<31,  28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "and $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
-def ANDC8: XForm_6<31,  60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "andc $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
-def OR8  : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "or $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
-def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "nor $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
-def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "orc $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
-def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "eqv $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
-def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
-                   "xor $rA, $rS, $rB", IntSimple,
-                   [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
+defm NAND8: XForm_6r<31, 476, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "nand", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
+defm AND8 : XForm_6r<31,  28, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "and", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
+defm ANDC8: XForm_6r<31,  60, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "andc", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
+defm OR8  : XForm_6r<31, 444, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "or", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
+defm NOR8 : XForm_6r<31, 124, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "nor", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
+defm ORC8 : XForm_6r<31, 412, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "orc", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
+defm EQV8 : XForm_6r<31, 284, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "eqv", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
+defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+                     "xor", "$rA, $rS, $rB", IntSimple,
+                     [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
 
 // Logical ops with immediate.
-def ANDIo8  : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+let Defs = [CR0] in {
+def ANDIo8  : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
                       "andi. $dst, $src1, $src2", IntGeneral,
                       [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
                       isDOT;
-def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
                      "andis. $dst, $src1, $src2", IntGeneral,
                     [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
                      isDOT;
-def ORI8    : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+}
+def ORI8    : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
                       "ori $dst, $src1, $src2", IntSimple,
                       [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
-def ORIS8   : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+def ORIS8   : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
                       "oris $dst, $src1, $src2", IntSimple,
                     [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
-def XORI8   : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+def XORI8   : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
                       "xori $dst, $src1, $src2", IntSimple,
                       [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
-def XORIS8  : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+def XORIS8  : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm:$src2),
                       "xoris $dst, $src1, $src2", IntSimple,
                    [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
 
-def ADD8  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "add $rT, $rA, $rB", IntSimple,
-                     [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
+defm ADD8  : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "add", "$rT, $rA, $rB", IntSimple,
+                       [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
 // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
 // initial-exec thread-local storage model.
 let isCodeGenOnly = 1 in
-def ADD8TLS  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
+def ADD8TLS  : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB),
                         "add $rT, $rA, $rB@tls", IntSimple,
                         [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
                      
-let Defs = [CARRY] in {
-def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "addc $rT, $rA, $rB", IntGeneral,
-                     [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
-                     PPC970_DGroup_Cracked;
-def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                        "addc", "$rT, $rA, $rB", IntGeneral,
+                        [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
+                        PPC970_DGroup_Cracked;
+let Defs = [CARRY] in
+def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
                      "addic $rD, $rA, $imm", IntGeneral,
                      [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>;
-}
-def ADDI8  : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm),
+def ADDI8  : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolLo64:$imm),
                      "addi $rD, $rA, $imm", IntSimple,
                      [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>;
-def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm),
+def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolHi64:$imm),
                      "addis $rD, $rA, $imm", IntSimple,
                      [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
 
 let Defs = [CARRY] in {
-def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
                      "subfic $rD, $rA, $imm", IntGeneral,
                      [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>;
-def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                      "subfc $rT, $rA, $rB", IntGeneral,
-                      [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
-                      PPC970_DGroup_Cracked;
-}
-def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "subf $rT, $rA, $rB", IntGeneral,
-                     [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
-def NEG8    : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "neg $rT, $rA", IntSimple,
-                       [(set i64:$rT, (ineg i64:$rA))]>;
-let Uses = [CARRY], Defs = [CARRY] in {
-def ADDE8   : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                       "adde $rT, $rA, $rB", IntGeneral,
-                       [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
-def ADDME8  : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "addme $rT, $rA", IntGeneral,
-                       [(set i64:$rT, (adde i64:$rA, -1))]>;
-def ADDZE8  : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "addze $rT, $rA", IntGeneral,
-                       [(set i64:$rT, (adde i64:$rA, 0))]>;
-def SUBFE8  : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                       "subfe $rT, $rA, $rB", IntGeneral,
-                       [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
-def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "subfme $rT, $rA", IntGeneral,
-                       [(set i64:$rT, (sube -1, i64:$rA))]>;
-def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
-                       "subfze $rT, $rA", IntGeneral,
-                       [(set i64:$rT, (sube 0, i64:$rA))]>;
-}
-
-
-def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "mulhd $rT, $rA, $rB", IntMulHW,
-                     [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
-def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "mulhdu $rT, $rA, $rB", IntMulHWU,
-                     [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
-
-def CMPD   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
-                          "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
-def CMPLD  : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
-                          "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
-def CMPDI  : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm),
-                         "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
-def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
-                         "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
-
-def SLD  : XForm_6<31,  27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
-                   "sld $rA, $rS, $rB", IntRotateD,
-                   [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
-def SRD  : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
-                   "srd $rA, $rS, $rB", IntRotateD,
-                   [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
-let Defs = [CARRY] in {
-def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
-                   "srad $rA, $rS, $rB", IntRotateD,
-                   [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
+defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                        "subfc", "$rT, $rA, $rB", IntGeneral,
+                        [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
+                        PPC970_DGroup_Cracked;
+}
+defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "subf", "$rT, $rA, $rB", IntGeneral,
+                       [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
+defm NEG8    : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+                        "neg", "$rT, $rA", IntSimple,
+                        [(set i64:$rT, (ineg i64:$rA))]>;
+let Uses = [CARRY] in {
+defm ADDE8   : XOForm_1rc<31, 138, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                          "adde", "$rT, $rA, $rB", IntGeneral,
+                          [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
+defm ADDME8  : XOForm_3rc<31, 234, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+                          "addme", "$rT, $rA", IntGeneral,
+                          [(set i64:$rT, (adde i64:$rA, -1))]>;
+defm ADDZE8  : XOForm_3rc<31, 202, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+                          "addze", "$rT, $rA", IntGeneral,
+                          [(set i64:$rT, (adde i64:$rA, 0))]>;
+defm SUBFE8  : XOForm_1rc<31, 136, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                          "subfe", "$rT, $rA, $rB", IntGeneral,
+                          [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
+defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+                          "subfme", "$rT, $rA", IntGeneral,
+                          [(set i64:$rT, (sube -1, i64:$rA))]>;
+defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+                          "subfze", "$rT, $rA", IntGeneral,
+                          [(set i64:$rT, (sube 0, i64:$rA))]>;
 }
-                   
-def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "extsb $rA, $rS", IntSimple,
-                      [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
-def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "extsh $rA, $rS", IntSimple,
-                      [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
-
-def EXTSW  : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "extsw $rA, $rS", IntSimple,
-                      [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
-def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
-                      "extsw $rA, $rS", IntSimple,
-                      [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
 
-let Defs = [CARRY] in {
-def SRADI  : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
-                      "sradi $rA, $rS, $SH", IntRotateDI,
-                      [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
+
+defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "mulhd", "$rT, $rA, $rB", IntMulHW,
+                       [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
+defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "mulhdu", "$rT, $rA, $rB", IntMulHWU,
+                       [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
+}
+} // Interpretation64Bit
+
+let isCompare = 1, neverHasSideEffects = 1 in {
+  def CMPD   : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
+                            "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+  def CMPLD  : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
+                            "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
+  def CMPDI  : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm:$imm),
+                           "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
+  def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm:$src2),
+                           "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
 }
-def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "cntlzd $rA, $rS", IntGeneral,
-                      [(set i64:$rA, (ctlz i64:$rS))]>;
-def POPCNTD : XForm_11<31, 506, (outs G8RC:$rA), (ins G8RC:$rS),
-                      "popcntd $rA, $rS", IntGeneral,
-                      [(set i64:$rA, (ctpop i64:$rS))]>;
+
+let neverHasSideEffects = 1 in {
+defm SLD  : XForm_6r<31,  27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+                     "sld", "$rA, $rS, $rB", IntRotateD,
+                     [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
+defm SRD  : XForm_6r<31, 539, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+                     "srd", "$rA, $rS, $rB", IntRotateD,
+                     [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
+defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+                      "srad", "$rA, $rS, $rB", IntRotateD,
+                      [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
+
+let Interpretation64Bit = 1 in { 
+defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS),
+                        "extsb", "$rA, $rS", IntSimple,
+                        [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
+defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
+                        "extsh", "$rA, $rS", IntSimple,
+                        [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
+} // Interpretation64Bit
+
+defm EXTSW  : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
+                        "extsw", "$rA, $rS", IntSimple,
+                        [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
+let Interpretation64Bit = 1 in
+defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS),
+                             "extsw", "$rA, $rS", IntSimple,
+                             [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
+
+defm SRADI  : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+                         "sradi", "$rA, $rS, $SH", IntRotateDI,
+                         [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
+defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
+                        "cntlzd", "$rA, $rS", IntGeneral,
+                        [(set i64:$rA, (ctlz i64:$rS))]>;
+defm POPCNTD : XForm_11r<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
+                         "popcntd", "$rA, $rS", IntGeneral,
+                         [(set i64:$rA, (ctpop i64:$rS))]>;
 
 // popcntw also does a population count on the high 32 bits (storing the
 // results in the high 32-bits of the output). We'll ignore that here (which is
 // safe because we never separately use the high part of the 64-bit registers).
-def POPCNTW : XForm_11<31, 378, (outs GPRC:$rA), (ins GPRC:$rS),
-                      "popcntw $rA, $rS", IntGeneral,
-                      [(set i32:$rA, (ctpop i32:$rS))]>;
-
-def DIVD  : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "divd $rT, $rA, $rB", IntDivD,
-                     [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
-                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "divdu $rT, $rA, $rB", IntDivD,
-                     [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
-                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
-                     "mulld $rT, $rA, $rB", IntMulHD,
-                     [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
-
+defm POPCNTW : XForm_11r<31, 378, (outs gprc:$rA), (ins gprc:$rS),
+                         "popcntw", "$rA, $rS", IntGeneral,
+                         [(set i32:$rA, (ctpop i32:$rS))]>;
+
+defm DIVD  : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "divd", "$rT, $rA, $rB", IntDivD,
+                       [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
+                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "divdu", "$rT, $rA, $rB", IntDivD,
+                       [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
+                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+                       "mulld", "$rT, $rA, $rB", IntMulHD,
+                       [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
+}
 
+let neverHasSideEffects = 1 in {
 let isCommutable = 1 in {
-def RLDIMI : MDForm_1<30, 3,
-                      (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
-                      "rldimi $rA, $rS, $SH, $MB", IntRotateDI,
-                      []>, isPPC64, RegConstraint<"$rSi = $rA">,
-                      NoEncode<"$rSi">;
+defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA),
+                        (ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+                        "rldimi", "$rA, $rS, $SH, $MBE", IntRotateDI,
+                        []>, isPPC64, RegConstraint<"$rSi = $rA">,
+                        NoEncode<"$rSi">;
 }
 
 // Rotate instructions.
-def RLDCL  : MDForm_1<30, 0,
-                      (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MBE),
-                      "rldcl $rA, $rS, $rB, $MBE", IntRotateD,
-                      []>, isPPC64;
-def RLDICL : MDForm_1<30, 0,
-                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
-                      "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
-                      []>, isPPC64;
-def RLDICR : MDForm_1<30, 1,
-                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MBE),
-                      "rldicr $rA, $rS, $SH, $MBE", IntRotateDI,
-                      []>, isPPC64;
-
-def RLWINM8 : MForm_2<21,
-                     (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
-                     "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
-                     []>;
-
+defm RLDCL  : MDSForm_1r<30, 8,
+                        (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
+                        "rldcl", "$rA, $rS, $rB, $MBE", IntRotateD,
+                        []>, isPPC64;
+defm RLDICL : MDForm_1r<30, 0,
+                        (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+                        "rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI,
+                        []>, isPPC64;
+defm RLDICR : MDForm_1r<30, 1,
+                        (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+                        "rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI,
+                        []>, isPPC64;
+
+let Interpretation64Bit = 1 in {
+defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA),
+                        (ins g8rc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                        "rlwinm", "$rA, $rS, $SH, $MB, $ME", IntGeneral,
+                        []>;
+
+let isSelect = 1 in
 def ISEL8   : AForm_4<31, 15,
-                     (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, CRBITRC:$cond),
+                     (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond),
                      "isel $rT, $rA, $rB, $cond", IntGeneral,
                      []>;
+}  // Interpretation64Bit
+}  // neverHasSideEffects = 1
 }  // End FXU Operations.
 
 
@@ -529,39 +573,43 @@ def ISEL8   : AForm_4<31, 15,
 
 // Sign extending loads.
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
+let Interpretation64Bit = 1 in
+def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
                   "lha $rD, $src", LdStLHA,
                   [(set i64:$rD, (sextloadi16 iaddr:$src))]>,
                   PPC970_DGroup_Cracked;
-def LWA  : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
+def LWA  : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
                     "lwa $rD, $src", LdStLWA,
                     [(set i64:$rD,
                           (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
                     PPC970_DGroup_Cracked;
-def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
+let Interpretation64Bit = 1 in
+def LHAX8: XForm_1<31, 343, (outs g8rc:$rD), (ins memrr:$src),
                    "lhax $rD, $src", LdStLHA,
                    [(set i64:$rD, (sextloadi16 xaddr:$src))]>,
                    PPC970_DGroup_Cracked;
-def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
+def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src),
                    "lwax $rD, $src", LdStLHA,
                    [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
                    PPC970_DGroup_Cracked;
 
 // Update forms.
-let mayLoad = 1 in {
-def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Interpretation64Bit = 1 in
+def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                     (ins memri:$addr),
                     "lhau $rD, $addr", LdStLHAU,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
 // NO LWAU!
 
-def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+let Interpretation64Bit = 1 in
+def LHAUX8 : XForm_1<31, 375, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                     (ins memrr:$addr),
                     "lhaux $rD, $addr", LdStLHAU,
                     []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                     NoEncode<"$ea_result">;
-def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                     (ins memrr:$addr),
                     "lwaux $rD, $addr", LdStLHAU,
                     []>, RegConstraint<"$addr.ptrreg = $ea_result">,
@@ -569,87 +617,89 @@ def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
 }
 }
 
+let Interpretation64Bit = 1 in {
 // Zero extending loads.
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
+def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
                   "lbz $rD, $src", LdStLoad,
                   [(set i64:$rD, (zextloadi8 iaddr:$src))]>;
-def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
+def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src),
                   "lhz $rD, $src", LdStLoad,
                   [(set i64:$rD, (zextloadi16 iaddr:$src))]>;
-def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
+def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src),
                   "lwz $rD, $src", LdStLoad,
                   [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
 
-def LBZX8 : XForm_1<31,  87, (outs G8RC:$rD), (ins memrr:$src),
+def LBZX8 : XForm_1<31,  87, (outs g8rc:$rD), (ins memrr:$src),
                    "lbzx $rD, $src", LdStLoad,
                    [(set i64:$rD, (zextloadi8 xaddr:$src))]>;
-def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
+def LHZX8 : XForm_1<31, 279, (outs g8rc:$rD), (ins memrr:$src),
                    "lhzx $rD, $src", LdStLoad,
                    [(set i64:$rD, (zextloadi16 xaddr:$src))]>;
-def LWZX8 : XForm_1<31,  23, (outs G8RC:$rD), (ins memrr:$src),
+def LWZX8 : XForm_1<31,  23, (outs g8rc:$rD), (ins memrr:$src),
                    "lwzx $rD, $src", LdStLoad,
                    [(set i64:$rD, (zextloadi32 xaddr:$src))]>;
                    
                    
 // Update forms.
-let mayLoad = 1 in {
-def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                     "lbzu $rD, $addr", LdStLoadUpd,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
-def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LHZU8 : DForm_1<41, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                     "lhzu $rD, $addr", LdStLoadUpd,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
-def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LWZU8 : DForm_1<33, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                     "lwzu $rD, $addr", LdStLoadUpd,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
 
-def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LBZUX8 : XForm_1<31, 119, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lbzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LHZUX8 : XForm_1<31, 311, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lhzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
-def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lwzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 }
 }
+} // Interpretation64Bit
 
 
 // Full 8-byte loads.
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LD   : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
+def LD   : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
                     "ld $rD, $src", LdStLD,
                     [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
 // The following three definitions are selected for small code model only.
 // Otherwise, we need to create two instructions to form a 32-bit offset,
 // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
-def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   "#LDtoc",
                   [(set i64:$rD,
                      (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
-def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+def LDtocJTI: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   "#LDtocJTI",
                   [(set i64:$rD,
                      (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
-def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
                   "#LDtocCPT",
                   [(set i64:$rD,
                      (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
 
 let hasSideEffects = 1, isCodeGenOnly = 1 in {
 let RST = 2, DS = 2 in
-def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg),
+def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg),
                     "ld 2, 8($reg)", LdStLD,
                     [(PPCload_toc i64:$reg)]>, isPPC64;
                     
@@ -658,25 +708,26 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
                     "ld 2, 40(1)", LdStLD,
                     [(PPCtoc_restore)]>, isPPC64;
 }
-def LDX  : XForm_1<31,  21, (outs G8RC:$rD), (ins memrr:$src),
+def LDX  : XForm_1<31,  21, (outs g8rc:$rD), (ins memrr:$src),
                    "ldx $rD, $src", LdStLD,
                    [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
-def LDBRX : XForm_1<31,  532, (outs G8RC:$rD), (ins memrr:$src),
+def LDBRX : XForm_1<31,  532, (outs g8rc:$rD), (ins memrr:$src),
                    "ldbrx $rD, $src", LdStLoad,
                    [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
 
-let mayLoad = 1 in
-def LDU  : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LDU  : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
                     "ldu $rD, $addr", LdStLDU,
                     []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
                     NoEncode<"$ea_result">;
 
-def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result),
+def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "ldux $rD, $addr", LdStLDU,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">, isPPC64;
 }
+}
 
 def : Pat<(PPCload ixaddr:$src),
           (LD ixaddr:$src)>;
@@ -684,108 +735,111 @@ def : Pat<(PPCload xaddr:$src),
           (LDX xaddr:$src)>;
 
 // Support for medium and large code model.
-def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
                        "#ADDIStocHA",
                        [(set i64:$rD,
                          (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>,
                        isPPC64;
-def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC_NOX0:$reg),
+def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
                    "#LDtocL",
                    [(set i64:$rD,
                      (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64;
-def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp),
+def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
                      "#ADDItocL",
                      [(set i64:$rD,
                        (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
 
 // Support for thread-local storage.
-def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
                          "#ADDISgotTprelHA",
                          [(set i64:$rD,
                            (PPCaddisGotTprelHA i64:$reg,
                                                tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC_NOX0:$reg),
+def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins symbolLo64:$disp, g8rc_nox0:$reg),
                         "#LDgotTprelL",
                         [(set i64:$rD,
                           (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
                  isPPC64;
 def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
           (ADD8TLS $in, tglobaltlsaddr:$g)>;
-def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
                          "#ADDIStlsgdHA",
                          [(set i64:$rD,
                            (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
                        "#ADDItlsgdL",
                        [(set i64:$rD,
                          (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
                  isPPC64;
-def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
                         "#GETtlsADDR",
                         [(set i64:$rD,
                           (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
                  isPPC64;
-def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
                          "#ADDIStlsldHA",
                          [(set i64:$rD,
                            (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
                   isPPC64;
-def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
                        "#ADDItlsldL",
                        [(set i64:$rD,
                          (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
                  isPPC64;
-def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
+def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
                           "#GETtlsldADDR",
                           [(set i64:$rD,
                             (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
                    isPPC64;
-def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp),
+def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
                           "#ADDISdtprelHA",
                           [(set i64:$rD,
                             (PPCaddisDtprelHA i64:$reg,
                                               tglobaltlsaddr:$disp))]>,
                    isPPC64;
-def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp),
+def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
                          "#ADDIdtprelL",
                          [(set i64:$rD,
                            (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
                   isPPC64;
 
 let PPC970_Unit = 2 in {
+let Interpretation64Bit = 1 in {
 // Truncating stores.                       
-def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
+def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src),
                    "stb $rS, $src", LdStStore,
                    [(truncstorei8 i64:$rS, iaddr:$src)]>;
-def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
+def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src),
                    "sth $rS, $src", LdStStore,
                    [(truncstorei16 i64:$rS, iaddr:$src)]>;
-def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
+def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src),
                    "stw $rS, $src", LdStStore,
                    [(truncstorei32 i64:$rS, iaddr:$src)]>;
-def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
+def STBX8 : XForm_8<31, 215, (outs), (ins g8rc:$rS, memrr:$dst),
                    "stbx $rS, $dst", LdStStore,
                    [(truncstorei8 i64:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
-def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
+def STHX8 : XForm_8<31, 407, (outs), (ins g8rc:$rS, memrr:$dst),
                    "sthx $rS, $dst", LdStStore,
                    [(truncstorei16 i64:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
-def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
+def STWX8 : XForm_8<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
                    "stwx $rS, $dst", LdStStore,
                    [(truncstorei32 i64:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
+} // Interpretation64Bit
+
 // Normal 8-byte stores.
-def STD  : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
+def STD  : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
                     "std $rS, $dst", LdStSTD,
                     [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
-def STDX  : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
+def STDX  : XForm_8<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
                    "stdx $rS, $dst", LdStSTD,
                    [(store i64:$rS, xaddr:$dst)]>, isPPC64,
                    PPC970_DGroup_Cracked;
-def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
+def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
                    "stdbrx $rS, $dst", LdStStore,
                    [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
                    PPC970_DGroup_Cracked;
@@ -793,33 +847,36 @@ def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst),
 
 // Stores with Update (pre-inc).
 let PPC970_Unit = 2, mayStore = 1 in {
-def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+let Interpretation64Bit = 1 in {
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
                    "stbu $rS, $dst", LdStStoreUpd, []>,
                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
                    "sthu $rS, $dst", LdStStoreUpd, []>,
                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst),
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
                    "stwu $rS, $dst", LdStStoreUpd, []>,
                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst),
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst),
                    "stdu $rS, $dst", LdStSTDU, []>,
                    RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
                    isPPC64;
 
-def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
                     "stbux $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
                     "sthux $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
                     "stwux $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst),
+} // Interpretation64Bit
+
+def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
                     "stdux $rS, $dst", LdStSTDU, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked, isPPC64;
@@ -852,29 +909,30 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
 //
 
 
-let PPC970_Unit = 3, Uses = [RM] in {  // FPU Operations.
-def FCFID  : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fcfid $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
-def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fctidz $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
-
-def FCFIDU  : XForm_26<63, 974, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fcfidu $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
-def FCFIDS  : XForm_26<59, 846, (outs F4RC:$frD), (ins F8RC:$frB),
-                      "fcfids $frD, $frB", FPGeneral,
-                      [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
-def FCFIDUS : XForm_26<59, 974, (outs F4RC:$frD), (ins F8RC:$frB),
-                      "fcfidus $frD, $frB", FPGeneral,
-                      [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
-def FCTIDUZ : XForm_26<63, 943, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fctiduz $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
-def FCTIWUZ : XForm_26<63, 143, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fctiwuz $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
+let PPC970_Unit = 3, neverHasSideEffects = 1,
+    Uses = [RM] in {  // FPU Operations.
+defm FCFID  : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fcfid", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
+defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fctidz", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
+
+defm FCFIDU  : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fcfidu", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
+defm FCFIDS  : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB),
+                        "fcfids", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
+defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB),
+                        "fcfidus", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
+defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fctiduz", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
+defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fctiwuz", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
 }
 
 
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index a5ba4c8..cc9cf0a 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -163,7 +163,7 @@ def vecspltisw : PatLeaf<(build_vector), [{
 
 // VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
 class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
-  : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+  : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
               !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
                        [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
 
@@ -171,7 +171,7 @@ class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
 // inputs doesn't match the type of the output.
 class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
                    ValueType InTy>
-  : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+  : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
               !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
                        [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
 
@@ -179,14 +179,14 @@ class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
 // input types and an output type.
 class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
                    ValueType In1Ty, ValueType In2Ty>
-  : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+  : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
               !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
                        [(set OutTy:$vD,
                          (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
 
 // VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
 class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
-  : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+  : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
              !strconcat(opc, " $vD, $vA, $vB"), VecFP,
              [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
 
@@ -194,7 +194,7 @@ class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
 // inputs doesn't match the type of the output.
 class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
                   ValueType InTy>
-  : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+  : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
              !strconcat(opc, " $vD, $vA, $vB"), VecFP,
              [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
 
@@ -202,13 +202,13 @@ class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
 // input types and an output type.
 class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
                   ValueType In1Ty, ValueType In2Ty>
-  : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+  : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
              !strconcat(opc, " $vD, $vA, $vB"), VecFP,
              [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
 
 // VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
 class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
-  : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+  : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
              !strconcat(opc, " $vD, $vB"), VecFP,
              [(set v4f32:$vD, (IntID v4f32:$vB))]>;
 
@@ -216,7 +216,7 @@ class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
 // inputs doesn't match the type of the output.
 class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
                   ValueType InTy>
-  : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+  : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
              !strconcat(opc, " $vD, $vB"), VecFP,
              [(set OutTy:$vD, (IntID InTy:$vB))]>;
 
@@ -234,93 +234,93 @@ def DSSALL   : DSS_Form<822, (outs),
                         (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
                         "dssall", LdStLoad /*FIXME*/, []>;
 def DST      : DSS_Form<342, (outs),
-                        (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
                         "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTT     : DSS_Form<342, (outs),
-                        (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
                         "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTST    : DSS_Form<374, (outs),
-                        (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
                         "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTSTT   : DSS_Form<374, (outs),
-                        (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
                         "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 
 def DST64    : DSS_Form<342, (outs),
-                        (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
                         "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTT64   : DSS_Form<342, (outs),
-                        (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
                         "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTST64  : DSS_Form<374, (outs),
-                        (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
                         "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTSTT64 : DSS_Form<374, (outs),
-                        (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
                         "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 }
 
-def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
+def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
                       "mfvscr $vD", LdStStore,
                       [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; 
-def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
+def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
                       "mtvscr $vB", LdStLoad,
                       [(int_ppc_altivec_mtvscr v4i32:$vB)]>; 
 
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {  // Loads.
-def LVEBX: XForm_1<31,   7, (outs VRRC:$vD), (ins memrr:$src),
+def LVEBX: XForm_1<31,   7, (outs vrrc:$vD), (ins memrr:$src),
                    "lvebx $vD, $src", LdStLoad,
                    [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
-def LVEHX: XForm_1<31,  39, (outs VRRC:$vD), (ins memrr:$src),
+def LVEHX: XForm_1<31,  39, (outs vrrc:$vD), (ins memrr:$src),
                    "lvehx $vD, $src", LdStLoad,
                    [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
-def LVEWX: XForm_1<31,  71, (outs VRRC:$vD), (ins memrr:$src),
+def LVEWX: XForm_1<31,  71, (outs vrrc:$vD), (ins memrr:$src),
                    "lvewx $vD, $src", LdStLoad,
                    [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
-def LVX  : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
+def LVX  : XForm_1<31, 103, (outs vrrc:$vD), (ins memrr:$src),
                    "lvx $vD, $src", LdStLoad,
                    [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
-def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
+def LVXL : XForm_1<31, 359, (outs vrrc:$vD), (ins memrr:$src),
                    "lvxl $vD, $src", LdStLoad,
                    [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
 }
 
-def LVSL : XForm_1<31,   6, (outs VRRC:$vD), (ins memrr:$src),
+def LVSL : XForm_1<31,   6, (outs vrrc:$vD), (ins memrr:$src),
                    "lvsl $vD, $src", LdStLoad,
                    [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
                    PPC970_Unit_LSU;
-def LVSR : XForm_1<31,  38, (outs VRRC:$vD), (ins memrr:$src),
+def LVSR : XForm_1<31,  38, (outs vrrc:$vD), (ins memrr:$src),
                    "lvsr $vD, $src", LdStLoad,
                    [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
                    PPC970_Unit_LSU;
 
 let PPC970_Unit = 2 in {   // Stores.
-def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst),
                    "stvebx $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
-def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVEHX: XForm_8<31, 167, (outs), (ins vrrc:$rS, memrr:$dst),
                    "stvehx $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
-def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVEWX: XForm_8<31, 199, (outs), (ins vrrc:$rS, memrr:$dst),
                    "stvewx $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
-def STVX  : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVX  : XForm_8<31, 231, (outs), (ins vrrc:$rS, memrr:$dst),
                    "stvx $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
-def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
+def STVXL : XForm_8<31, 487, (outs), (ins vrrc:$rS, memrr:$dst),
                    "stvxl $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
 }
 
 let PPC970_Unit = 5 in {  // VALU Operations.
 // VA-Form instructions.  3-input AltiVec ops.
-def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
                        "vmaddfp $vD, $vA, $vC, $vB", VecFP,
                        [(set v4f32:$vD,
                         (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
 
 // FIXME: The fma+fneg pattern won't match because fneg is not legal.
-def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
                        "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
                        [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
                                                   (fneg v4f32:$vB))))]>; 
@@ -335,23 +335,23 @@ def VPERM      : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
 def VSEL       : VA1a_Int_Ty<42, "vsel",  int_ppc_altivec_vsel, v4i32>;
 
 // Shuffles.
-def VSLDOI  : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
+def VSLDOI  : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH),
                        "vsldoi $vD, $vA, $vB, $SH", VecFP,
                        [(set v16i8:$vD, 
                          (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
 
 // VX-Form instructions.  AltiVec arithmetic ops.
-def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vaddfp $vD, $vA, $vB", VecFP,
                       [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
                       
-def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vaddubm $vD, $vA, $vB", VecGeneral,
                       [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
-def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vadduhm $vD, $vA, $vB", VecGeneral,
                       [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
-def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vadduwm $vD, $vA, $vB", VecGeneral,
                       [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
                       
@@ -364,27 +364,27 @@ def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>;
 def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
                              
                              
-def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VAND : VXForm_1<1028, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                     "vand $vD, $vA, $vB", VecFP,
                     [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>;
-def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                      "vandc $vD, $vA, $vB", VecFP,
                      [(set v4i32:$vD, (and v4i32:$vA,
                                            (vnot_ppc v4i32:$vB)))]>;
 
-def VCFSX  : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VCFSX  : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vcfsx $vD, $vB, $UIMM", VecFP,
                       [(set v4f32:$vD,
                              (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
-def VCFUX  : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VCFUX  : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vcfux $vD, $vB, $UIMM", VecFP,
                       [(set v4f32:$vD,
                              (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
-def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vctsxs $vD, $vB, $UIMM", VecFP,
                       [(set v4i32:$vD,
                              (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
-def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vctuxs $vD, $vB, $UIMM", VecFP,
                       [(set v4i32:$vD,
                              (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
@@ -393,19 +393,19 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
 // to integer (fp_to_sint/fp_to_uint) conversions and integer
 // to floating-point (sint_to_fp/uint_to_fp) conversions.
 let VA = 0 in {
-def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB),
+def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB),
                        "vcfsx $vD, $vB, 0", VecFP,
                        [(set v4f32:$vD,
                              (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
-def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB),
+def VCTUXS_0 : VXForm_1<906, (outs vrrc:$vD), (ins vrrc:$vB),
                         "vctuxs $vD, $vB, 0", VecFP,
                         [(set v4i32:$vD,
                                (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
-def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB),
+def VCFUX_0 : VXForm_1<778, (outs vrrc:$vD), (ins vrrc:$vB),
                        "vcfux $vD, $vB, 0", VecFP,
                        [(set v4f32:$vD,
                                (int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
-def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB),
+def VCTSXS_0 : VXForm_1<970, (outs vrrc:$vD), (ins vrrc:$vB),
                       "vctsxs $vD, $vB, 0", VecFP,
                       [(set v4i32:$vD,
                              (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
@@ -435,22 +435,22 @@ def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>;
 def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
 def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
 
-def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGHB : VXForm_1< 12, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vmrghb $vD, $vA, $vB", VecFP,
                       [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGHH : VXForm_1< 76, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vmrghh $vD, $vA, $vB", VecFP,
                       [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGHW : VXForm_1<140, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vmrghw $vD, $vA, $vB", VecFP,
                       [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGLB : VXForm_1<268, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vmrglb $vD, $vA, $vB", VecFP,
                       [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGLH : VXForm_1<332, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vmrglh $vD, $vA, $vB", VecFP,
                       [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VMRGLW : VXForm_1<396, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vmrglw $vD, $vA, $vB", VecFP,
                       [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>;
 
@@ -491,18 +491,18 @@ def VRFIP     : VX2_Int_SP<650, "vrfip",     int_ppc_altivec_vrfip>;
 def VRFIZ     : VX2_Int_SP<586, "vrfiz",     int_ppc_altivec_vrfiz>;
 def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
 
-def VSUBCUW : VX1_Int_Ty<74, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
+def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
 
-def VSUBFP  : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VSUBFP  : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vsubfp $vD, $vA, $vB", VecGeneral,
                       [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
-def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vsububm $vD, $vA, $vB", VecGeneral,
                       [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
-def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vsubuhm $vD, $vA, $vB", VecGeneral,
                       [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
-def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vsubuwm $vD, $vA, $vB", VecGeneral,
                       [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
                       
@@ -516,21 +516,21 @@ def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>;
 def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
 def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
 
-def VSUM4SBS: VX1_Int_Ty3<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs,
+def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs,
                           v4i32, v16i8, v4i32>;
 def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
                           v4i32, v8i16, v4i32>;
 def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
                           v4i32, v16i8, v4i32>;
 
-def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                     "vnor $vD, $vA, $vB", VecFP,
                     [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA,
                                                    v4i32:$vB)))]>;
-def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VOR : VXForm_1<1156, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vor $vD, $vA, $vB", VecFP,
                       [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>;
-def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VXOR : VXForm_1<1220, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                       "vxor $vD, $vA, $vB", VecFP,
                       [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>;
 
@@ -545,15 +545,15 @@ def VSLB   : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>;
 def VSLH   : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
 def VSLW   : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
 
-def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VSPLTB : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vspltb $vD, $vB, $UIMM", VecPerm,
                       [(set v16i8:$vD,
                         (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>;
-def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vsplth $vD, $vB, $UIMM", VecPerm,
                       [(set v16i8:$vD,
                         (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
-def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vspltw $vD, $vB, $UIMM", VecPerm,
                       [(set v16i8:$vD, 
                         (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
@@ -569,13 +569,13 @@ def VSRH   : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>;
 def VSRW   : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
 
 
-def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
+def VSPLTISB : VXForm_3<780, (outs vrrc:$vD), (ins s5imm:$SIMM),
                        "vspltisb $vD, $SIMM", VecPerm,
                        [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
-def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
+def VSPLTISH : VXForm_3<844, (outs vrrc:$vD), (ins s5imm:$SIMM),
                        "vspltish $vD, $SIMM", VecPerm,
                        [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
-def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
+def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM),
                        "vspltisw $vD, $SIMM", VecPerm,
                        [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
 
@@ -590,13 +590,13 @@ def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
                           v16i8, v4i32>;
 def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
                           v8i16, v4i32>;
-def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                        "vpkuhum $vD, $vA, $vB", VecFP,
                        [(set v16i8:$vD,
                          (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
 def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
                           v16i8, v8i16>;
-def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                        "vpkuwum $vD, $vA, $vB", VecFP,
                        [(set v16i8:$vD,
                          (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
@@ -621,10 +621,10 @@ def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
 // Altivec Comparisons.
 
 class VCMP<bits<10> xo, string asmstr, ValueType Ty>
-  : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+  : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare,
               [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
 class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
-  : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+  : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),asmstr,VecFPCompare,
               [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
   let Defs = [CR6];
   let RC = 1;
@@ -665,11 +665,11 @@ def VCMPGTUW  : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
 def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
                       
 let isCodeGenOnly = 1 in
-def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
+def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
                       "vxor $vD, $vD, $vD", VecFP,
                       [(set v4i32:$vD, (v4i32 immAllZerosV))]>;
 let IMM=-1 in {
-def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins),
+def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
                       "vspltisw $vD, -1", VecFP,
                       [(set v4i32:$vD, (v4i32 immAllOnesV))]>;
 }
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 400b7e3..b6f4e85 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -35,6 +35,15 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
   let TSFlags{1}   = PPC970_Single;
   let TSFlags{2}   = PPC970_Cracked;
   let TSFlags{5-3} = PPC970_Unit;
+
+  // Fields used for relation models.
+  string BaseName = "";
+
+  // For cases where multiple instruction definitions really represent the
+  // same underlying instruction but with one definition for 64-bit arguments
+  // and one for 32-bit arguments, this bit breaks the degeneracy between
+  // the two forms and allows TableGen to generate mapping tables.
+  bit Interpretation64Bit = 0;
 }
 
 class PPC970_DGroup_First   { bits<1> PPC970_First = 1;  }
@@ -80,6 +89,10 @@ class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
   let TSFlags{1}   = PPC970_Single;
   let TSFlags{2}   = PPC970_Cracked;
   let TSFlags{5-3} = PPC970_Unit;
+
+  // Fields used for relation models.
+  string BaseName = "";
+  bit Interpretation64Bit = 0;
 }
 
 // 1.7.1 I-Form
@@ -177,7 +190,12 @@ class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
 
 class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern>
-  : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>;
+  : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern> {
+
+  // Even though ADDICo does not really have an RC bit, provide
+  // the declaration of one here so that isDOT has something to set.
+  bit RC = 0;
+}
 
 class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
                  InstrItinClass itin, list<dag> pattern>
@@ -347,6 +365,12 @@ class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern> 
   : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
 
+class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let RST = 0;
+}
+
 class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin, list<dag> pattern> 
   : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
@@ -565,9 +589,9 @@ class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
   bits<7> BIBO;  // 2 bits of BI and 5 bits of BO.
   bits<3>  CR;
   
-  let BO = BIBO{2-6};
-  let BI{0-1} = BIBO{0-1};
-  let BI{2-4} = CR;
+  let BO = BIBO{4-0};
+  let BI{0-1} = BIBO{5-6};
+  let BI{2-4} = CR{0-2};
   let BH = 0;
 }
 
@@ -837,6 +861,25 @@ class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = RC;
 }
 
+class MDSForm_1<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin, list<dag> pattern>
+    : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RA;
+  bits<5> RS;
+  bits<5> RB;
+  bits<6> MBE;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RS;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = RB;
+  let Inst{21-26} = MBE{4,3,2,1,0,5};
+  let Inst{27-30} = xo;
+  let Inst{31}    = RC;
+}
 
 
 // E-1 VA-Form
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 69c54ed..1fb17eb 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -18,8 +18,10 @@
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -30,6 +32,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 
+#define GET_INSTRMAP_INFO
 #define GET_INSTRINFO_CTOR
 #include "PPCGenInstrInfo.inc"
 
@@ -39,6 +42,9 @@ static cl::
 opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
             cl::desc("Disable analysis for CTR loops"));
 
+static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
+cl::desc("Disable compare instruction optimization"), cl::Hidden);
+
 PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
   : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
     TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
@@ -147,7 +153,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
   MachineFunction &MF = *MI->getParent()->getParent();
 
   // Normal instructions can be commuted the obvious way.
-  if (MI->getOpcode() != PPC::RLWIMI)
+  if (MI->getOpcode() != PPC::RLWIMI &&
+      MI->getOpcode() != PPC::RLWIMIo)
     return TargetInstrInfo::commuteInstruction(MI, NewMI);
 
   // Cannot commute if it has a non-zero rotate count.
@@ -417,6 +424,105 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   return 2;
 }
 
+// Select analysis.
+bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+                const SmallVectorImpl<MachineOperand> &Cond,
+                unsigned TrueReg, unsigned FalseReg,
+                int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+  if (!TM.getSubtargetImpl()->hasISEL())
+    return false;
+
+  if (Cond.size() != 2)
+    return false;
+
+  // If this is really a bdnz-like condition, then it cannot be turned into a
+  // select.
+  if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+    return false;
+
+  // Check register classes.
+  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  const TargetRegisterClass *RC =
+    RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+  if (!RC)
+    return false;
+
+  // isel is for regular integer GPRs only.
+  if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
+      !PPC::G8RCRegClass.hasSubClassEq(RC))
+    return false;
+
+  // FIXME: These numbers are for the A2, how well they work for other cores is
+  // an open question. On the A2, the isel instruction has a 2-cycle latency
+  // but single-cycle throughput. These numbers are used in combination with
+  // the MispredictPenalty setting from the active SchedMachineModel.
+  CondCycles = 1;
+  TrueCycles = 1;
+  FalseCycles = 1;
+
+  return true;
+}
+
+void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI, DebugLoc dl,
+                                unsigned DestReg,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                unsigned TrueReg, unsigned FalseReg) const {
+  assert(Cond.size() == 2 &&
+         "PPC branch conditions have two components!");
+
+  assert(TM.getSubtargetImpl()->hasISEL() &&
+         "Cannot insert select on target without ISEL support");
+
+  // Get the register classes.
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  const TargetRegisterClass *RC =
+    RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+  assert(RC && "TrueReg and FalseReg must have overlapping register classes");
+  assert((PPC::GPRCRegClass.hasSubClassEq(RC) ||
+          PPC::G8RCRegClass.hasSubClassEq(RC)) &&
+         "isel is for regular integer GPRs only");
+
+  unsigned OpCode =
+    PPC::GPRCRegClass.hasSubClassEq(RC) ? PPC::ISEL : PPC::ISEL8;
+  unsigned SelectPred = Cond[0].getImm();
+
+  unsigned SubIdx;
+  bool SwapOps;
+  switch (SelectPred) {
+  default: llvm_unreachable("invalid predicate for isel");
+  case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
+  case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
+  case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
+  case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
+  case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
+  case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
+  case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
+  case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
+  }
+
+  unsigned FirstReg =  SwapOps ? FalseReg : TrueReg,
+           SecondReg = SwapOps ? TrueReg  : FalseReg;
+
+  // The first input register of isel cannot be r0. If it is a member
+  // of a register class that can be r0, then copy it first (the
+  // register allocator should eliminate the copy).
+  if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
+      MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
+    const TargetRegisterClass *FirstRC =
+      MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
+        &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
+    unsigned OldFirstReg = FirstReg;
+    FirstReg = MRI.createVirtualRegister(FirstRC);
+    BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
+      .addReg(OldFirstReg);
+  }
+
+  BuildMI(MBB, MI, dl, get(OpCode), DestReg)
+    .addReg(FirstReg).addReg(SecondReg)
+    .addReg(Cond[1].getReg(), 0, SubIdx);
+}
+
 void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator I, DebugLoc DL,
                                unsigned DestReg, unsigned SrcReg,
@@ -707,6 +813,555 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   return false;
 }
 
+bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                             unsigned Reg, MachineRegisterInfo *MRI) const {
+  // For some instructions, it is legal to fold ZERO into the RA register field.
+  // A zero immediate should always be loaded with a single li.
+  unsigned DefOpc = DefMI->getOpcode();
+  if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
+    return false;
+  if (!DefMI->getOperand(1).isImm())
+    return false;
+  if (DefMI->getOperand(1).getImm() != 0)
+    return false;
+
+  // Note that we cannot here invert the arguments of an isel in order to fold
+  // a ZERO into what is presented as the second argument. All we have here
+  // is the condition bit, and that might come from a CR-logical bit operation.
+
+  const MCInstrDesc &UseMCID = UseMI->getDesc();
+
+  // Only fold into real machine instructions.
+  if (UseMCID.isPseudo())
+    return false;
+
+  unsigned UseIdx;
+  for (UseIdx = 0; UseIdx < UseMI->getNumOperands(); ++UseIdx)
+    if (UseMI->getOperand(UseIdx).isReg() &&
+        UseMI->getOperand(UseIdx).getReg() == Reg)
+      break;
+
+  assert(UseIdx < UseMI->getNumOperands() && "Cannot find Reg in UseMI");
+  assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
+
+  const MCOperandInfo *UseInfo = &UseMCID.OpInfo[UseIdx];
+
+  // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
+  // register (which might also be specified as a pointer class kind).
+  if (UseInfo->isLookupPtrRegClass()) {
+    if (UseInfo->RegClass /* Kind */ != 1)
+      return false;
+  } else {
+    if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
+        UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
+      return false;
+  }
+
+  // Make sure this is not tied to an output register (or otherwise
+  // constrained). This is true for ST?UX registers, for example, which
+  // are tied to their output registers.
+  if (UseInfo->Constraints != 0)
+    return false;
+
+  unsigned ZeroReg;
+  if (UseInfo->isLookupPtrRegClass()) {
+    bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+    ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
+  } else {
+    ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
+              PPC::ZERO8 : PPC::ZERO;
+  }
+
+  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+  UseMI->getOperand(UseIdx).setReg(ZeroReg);
+
+  if (DeleteDef)
+    DefMI->eraseFromParent();
+
+  return true;
+}
+
+static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
+  for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+       I != IE; ++I)
+    if (I->definesRegister(PPC::CTR) || I->definesRegister(PPC::CTR8))
+      return true;
+  return false;
+}
+
+// We should make sure that, if we're going to predicate both sides of a
+// condition (a diamond), that both sides don't define the counter register. We
+// can predicate counter-decrement-based branches, but while that predicates
+// the branching, it does not predicate the counter decrement. If we tried to
+// merge the triangle into one predicated block, we'd decrement the counter
+// twice.
+bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                     unsigned NumT, unsigned ExtraT,
+                     MachineBasicBlock &FMBB,
+                     unsigned NumF, unsigned ExtraF,
+                     const BranchProbability &Probability) const {
+  return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
+}
+
+
+bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const {
+  // The predicated branches are identified by their type, not really by the
+  // explicit presence of a predicate. Furthermore, some of them can be
+  // predicated more than once. Because if conversion won't try to predicate
+  // any instruction which already claims to be predicated (by returning true
+  // here), always return false. In doing so, we let isPredicable() be the
+  // final word on whether not the instruction can be (further) predicated.
+
+  return false;
+}
+
+bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  if (!MI->isTerminator())
+    return false;
+
+  // Conditional branch is a special case.
+  if (MI->isBranch() && !MI->isBarrier())
+    return true;
+
+  return !isPredicated(MI);
+}
+
+bool PPCInstrInfo::PredicateInstruction(
+                     MachineInstr *MI,
+                     const SmallVectorImpl<MachineOperand> &Pred) const {
+  unsigned OpC = MI->getOpcode();
+  if (OpC == PPC::BLR) {
+    if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+      bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+      MI->setDesc(get(Pred[0].getImm() ?
+                      (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
+                      (isPPC64 ? PPC::BDZLR8  : PPC::BDZLR)));
+    } else {
+      MI->setDesc(get(PPC::BCLR));
+      MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+        .addImm(Pred[0].getImm())
+        .addReg(Pred[1].getReg());
+    }
+
+    return true;
+  } else if (OpC == PPC::B) {
+    if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+      bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+      MI->setDesc(get(Pred[0].getImm() ?
+                      (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+                      (isPPC64 ? PPC::BDZ8  : PPC::BDZ)));
+    } else {
+      MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+      MI->RemoveOperand(0);
+
+      MI->setDesc(get(PPC::BCC));
+      MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+        .addImm(Pred[0].getImm())
+        .addReg(Pred[1].getReg())
+        .addMBB(MBB);
+    }
+
+    return true;
+  } else if (OpC == PPC::BCTR  || OpC == PPC::BCTR8 ||
+             OpC == PPC::BCTRL || OpC == PPC::BCTRL8) {
+    if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
+      llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
+
+    bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
+    bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+    MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
+                              (setLR ? PPC::BCCTRL  : PPC::BCCTR)));
+    MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+      .addImm(Pred[0].getImm())
+      .addReg(Pred[1].getReg());
+    return true;
+  }
+
+  return false;
+}
+
+bool PPCInstrInfo::SubsumesPredicate(
+                     const SmallVectorImpl<MachineOperand> &Pred1,
+                     const SmallVectorImpl<MachineOperand> &Pred2) const {
+  assert(Pred1.size() == 2 && "Invalid PPC first predicate");
+  assert(Pred2.size() == 2 && "Invalid PPC second predicate");
+
+  if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
+    return false;
+  if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
+    return false;
+
+  PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
+  PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
+
+  if (P1 == P2)
+    return true;
+
+  // Does P1 subsume P2, e.g. GE subsumes GT.
+  if (P1 == PPC::PRED_LE &&
+      (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
+    return true;
+  if (P1 == PPC::PRED_GE &&
+      (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
+    return true;
+
+  return false;
+}
+
+bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                    std::vector<MachineOperand> &Pred) const {
+  // Note: At the present time, the contents of Pred from this function is
+  // unused by IfConversion. This implementation follows ARM by pushing the
+  // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
+  // predicate, instructions defining CTR or CTR8 are also included as
+  // predicate-defining instructions.
+
+  const TargetRegisterClass *RCs[] =
+    { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
+      &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
+
+  bool Found = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) {
+      const TargetRegisterClass *RC = RCs[c];
+      if (MO.isReg()) {
+        if (MO.isDef() && RC->contains(MO.getReg())) {
+          Pred.push_back(MO);
+          Found = true;
+        }
+      } else if (MO.isRegMask()) {
+        for (TargetRegisterClass::iterator I = RC->begin(),
+             IE = RC->end(); I != IE; ++I)
+          if (MO.clobbersPhysReg(*I)) {
+            Pred.push_back(MO);
+            Found = true;
+          }
+      }
+    }
+  }
+
+  return Found;
+}
+
+bool PPCInstrInfo::isPredicable(MachineInstr *MI) const {
+  unsigned OpC = MI->getOpcode();
+  switch (OpC) {
+  default:
+    return false;
+  case PPC::B:
+  case PPC::BLR:
+  case PPC::BCTR:
+  case PPC::BCTR8:
+  case PPC::BCTRL:
+  case PPC::BCTRL8:
+    return true;
+  }
+}
+
+bool PPCInstrInfo::analyzeCompare(const MachineInstr *MI,
+                                  unsigned &SrcReg, unsigned &SrcReg2,
+                                  int &Mask, int &Value) const {
+  unsigned Opc = MI->getOpcode();
+
+  switch (Opc) {
+  default: return false;
+  case PPC::CMPWI:
+  case PPC::CMPLWI:
+  case PPC::CMPDI:
+  case PPC::CMPLDI:
+    SrcReg = MI->getOperand(1).getReg();
+    SrcReg2 = 0;
+    Value = MI->getOperand(2).getImm();
+    Mask = 0xFFFF;
+    return true;
+  case PPC::CMPW:
+  case PPC::CMPLW:
+  case PPC::CMPD:
+  case PPC::CMPLD:
+  case PPC::FCMPUS:
+  case PPC::FCMPUD:
+    SrcReg = MI->getOperand(1).getReg();
+    SrcReg2 = MI->getOperand(2).getReg();
+    return true;
+  }
+}
+
+bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
+                                        unsigned SrcReg, unsigned SrcReg2,
+                                        int Mask, int Value,
+                                        const MachineRegisterInfo *MRI) const {
+  if (DisableCmpOpt)
+    return false;
+
+  int OpC = CmpInstr->getOpcode();
+  unsigned CRReg = CmpInstr->getOperand(0).getReg();
+
+  // FP record forms set CR1 based on the execption status bits, not a
+  // comparison with zero.
+  if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
+    return false;
+
+  // The record forms set the condition register based on a signed comparison
+  // with zero (so says the ISA manual). This is not as straightforward as it
+  // seems, however, because this is always a 64-bit comparison on PPC64, even
+  // for instructions that are 32-bit in nature (like slw for example).
+  // So, on PPC32, for unsigned comparisons, we can use the record forms only
+  // for equality checks (as those don't depend on the sign). On PPC64,
+  // we are restricted to equality for unsigned 64-bit comparisons and for
+  // signed 32-bit comparisons the applicability is more restricted.
+  bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+  bool is32BitSignedCompare   = OpC ==  PPC::CMPWI || OpC == PPC::CMPW;
+  bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
+  bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
+
+  // Get the unique definition of SrcReg.
+  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+  if (!MI) return false;
+  int MIOpC = MI->getOpcode();
+
+  bool equalityOnly = false;
+  bool noSub = false;
+  if (isPPC64) {
+    if (is32BitSignedCompare) {
+      // We can perform this optimization only if MI is sign-extending.
+      if (MIOpC == PPC::SRAW  || MIOpC == PPC::SRAWo ||
+          MIOpC == PPC::SRAWI || MIOpC == PPC::SRAWIo ||
+          MIOpC == PPC::EXTSB || MIOpC == PPC::EXTSBo ||
+          MIOpC == PPC::EXTSH || MIOpC == PPC::EXTSHo ||
+          MIOpC == PPC::EXTSW || MIOpC == PPC::EXTSWo) {
+        noSub = true;
+      } else
+        return false;
+    } else if (is32BitUnsignedCompare) {
+      // We can perform this optimization, equality only, if MI is
+      // zero-extending.
+      if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo ||
+          MIOpC == PPC::SLW    || MIOpC == PPC::SLWo ||
+          MIOpC == PPC::SRW    || MIOpC == PPC::SRWo) {
+        noSub = true;
+        equalityOnly = true;
+      } else
+        return false;
+    } else
+      equalityOnly = is64BitUnsignedCompare;
+  } else
+    equalityOnly = is32BitUnsignedCompare;
+
+  if (equalityOnly) {
+    // We need to check the uses of the condition register in order to reject
+    // non-equality comparisons.
+    for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
+         IE = MRI->use_end(); I != IE; ++I) {
+      MachineInstr *UseMI = &*I;
+      if (UseMI->getOpcode() == PPC::BCC) {
+        unsigned Pred = UseMI->getOperand(0).getImm();
+        if (Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE)
+          continue;
+
+        return false;
+      } else if (UseMI->getOpcode() == PPC::ISEL ||
+                 UseMI->getOpcode() == PPC::ISEL8) {
+        unsigned SubIdx = UseMI->getOperand(3).getSubReg();
+        if (SubIdx == PPC::sub_eq)
+          continue;
+
+        return false;
+      } else
+        return false;
+    }
+  }
+
+  // Get ready to iterate backward from CmpInstr.
+  MachineBasicBlock::iterator I = CmpInstr, E = MI,
+                              B = CmpInstr->getParent()->begin();
+
+  // Scan forward to find the first use of the compare.
+  for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end();
+       I != EL; ++I) {
+    bool FoundUse = false;
+    for (MachineRegisterInfo::use_iterator J = MRI->use_begin(CRReg),
+         JE = MRI->use_end(); J != JE; ++J)
+      if (&*J == &*I) {
+        FoundUse = true;
+        break;
+      }
+
+    if (FoundUse)
+      break;
+  }
+
+  // Early exit if we're at the beginning of the BB.
+  if (I == B) return false;
+
+  // There are two possible candidates which can be changed to set CR[01].
+  // One is MI, the other is a SUB instruction.
+  // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
+  MachineInstr *Sub = NULL;
+  if (SrcReg2 != 0)
+    // MI is not a candidate for CMPrr.
+    MI = NULL;
+  // FIXME: Conservatively refuse to convert an instruction which isn't in the
+  // same BB as the comparison. This is to allow the check below to avoid calls
+  // (and other explicit clobbers); instead we should really check for these
+  // more explicitly (in at least a few predecessors).
+  else if (MI->getParent() != CmpInstr->getParent() || Value != 0) {
+    // PPC does not have a record-form SUBri.
+    return false;
+  }
+
+  // Search for Sub.
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  --I;
+  for (; I != E && !noSub; --I) {
+    const MachineInstr &Instr = *I;
+    unsigned IOpC = Instr.getOpcode();
+
+    if (&*I != CmpInstr && (
+        Instr.modifiesRegister(PPC::CR0, TRI) ||
+        Instr.readsRegister(PPC::CR0, TRI)))
+      // This instruction modifies or uses the record condition register after
+      // the one we want to change. While we could do this transformation, it
+      // would likely not be profitable. This transformation removes one
+      // instruction, and so even forcing RA to generate one move probably
+      // makes it unprofitable.
+      return false;
+
+    // Check whether CmpInstr can be made redundant by the current instruction.
+    if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
+         OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
+        (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
+        ((Instr.getOperand(1).getReg() == SrcReg &&
+          Instr.getOperand(2).getReg() == SrcReg2) ||
+        (Instr.getOperand(1).getReg() == SrcReg2 &&
+         Instr.getOperand(2).getReg() == SrcReg))) {
+      Sub = &*I;
+      break;
+    }
+
+    if (I == B)
+      // The 'and' is below the comparison instruction.
+      return false;
+  }
+
+  // Return false if no candidates exist.
+  if (!MI && !Sub)
+    return false;
+
+  // The single candidate is called MI.
+  if (!MI) MI = Sub;
+
+  int NewOpC = -1;
+  MIOpC = MI->getOpcode();
+  if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8)
+    NewOpC = MIOpC;
+  else {
+    NewOpC = PPC::getRecordFormOpcode(MIOpC);
+    if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
+      NewOpC = MIOpC;
+  }
+
+  // FIXME: On the non-embedded POWER architectures, only some of the record
+  // forms are fast, and we should use only the fast ones.
+
+  // The defining instruction has a record form (or is already a record
+  // form). It is possible, however, that we'll need to reverse the condition
+  // code of the users.
+  if (NewOpC == -1)
+    return false;
+
+  SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate;
+  SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate;
+
+  // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
+  // needs to be updated to be based on SUB.  Push the condition code
+  // operands to OperandsToUpdate.  If it is safe to remove CmpInstr, the
+  // condition code of these operands will be modified.
+  bool ShouldSwap = false;
+  if (Sub) {
+    ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+      Sub->getOperand(2).getReg() == SrcReg;
+
+    // The operands to subf are the opposite of sub, so only in the fixed-point
+    // case, invert the order.
+    ShouldSwap = !ShouldSwap;
+  }
+
+  if (ShouldSwap)
+    for (MachineRegisterInfo::use_iterator I = MRI->use_begin(CRReg),
+         IE = MRI->use_end(); I != IE; ++I) {
+      MachineInstr *UseMI = &*I;
+      if (UseMI->getOpcode() == PPC::BCC) {
+        PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm();
+        assert((!equalityOnly ||
+                Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) &&
+               "Invalid predicate for equality-only optimization");
+        PredsToUpdate.push_back(std::make_pair(&((*I).getOperand(0)),
+                                PPC::getSwappedPredicate(Pred)));
+      } else if (UseMI->getOpcode() == PPC::ISEL ||
+                 UseMI->getOpcode() == PPC::ISEL8) {
+        unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
+        assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
+               "Invalid CR bit for equality-only optimization");
+
+        if (NewSubReg == PPC::sub_lt)
+          NewSubReg = PPC::sub_gt;
+        else if (NewSubReg == PPC::sub_gt)
+          NewSubReg = PPC::sub_lt;
+
+        SubRegsToUpdate.push_back(std::make_pair(&((*I).getOperand(3)),
+                                                 NewSubReg));
+      } else // We need to abort on a user we don't understand.
+        return false;
+    }
+
+  // Create a new virtual register to hold the value of the CR set by the
+  // record-form instruction. If the instruction was not previously in
+  // record form, then set the kill flag on the CR.
+  CmpInstr->eraseFromParent();
+
+  MachineBasicBlock::iterator MII = MI;
+  BuildMI(*MI->getParent(), llvm::next(MII), MI->getDebugLoc(),
+          get(TargetOpcode::COPY), CRReg)
+    .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
+
+  if (MIOpC != NewOpC) {
+    // We need to be careful here: we're replacing one instruction with
+    // another, and we need to make sure that we get all of the right
+    // implicit uses and defs. On the other hand, the caller may be holding
+    // an iterator to this instruction, and so we can't delete it (this is
+    // specifically the case if this is the instruction directly after the
+    // compare).
+
+    const MCInstrDesc &NewDesc = get(NewOpC);
+    MI->setDesc(NewDesc);
+
+    if (NewDesc.ImplicitDefs)
+      for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs();
+           *ImpDefs; ++ImpDefs)
+        if (!MI->definesRegister(*ImpDefs))
+          MI->addOperand(*MI->getParent()->getParent(),
+                         MachineOperand::CreateReg(*ImpDefs, true, true));
+    if (NewDesc.ImplicitUses)
+      for (const uint16_t *ImpUses = NewDesc.getImplicitUses();
+           *ImpUses; ++ImpUses)
+        if (!MI->readsRegister(*ImpUses))
+          MI->addOperand(*MI->getParent()->getParent(),
+                         MachineOperand::CreateReg(*ImpUses, false, true));
+  }
+
+  // Modify the condition code of operands in OperandsToUpdate.
+  // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+  // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+  for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
+    PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
+
+  for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
+    SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
+
+  return true;
+}
+
 /// GetInstSize - Return the number of bytes of code the specified
 /// instruction may be.  This returns the maximum number of bytes.
 ///
@@ -729,3 +1384,152 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     return 4; // PowerPC instructions are all 4 bytes
   }
 }
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-early-ret"
+STATISTIC(NumBCLR, "Number of early conditional returns");
+STATISTIC(NumBLR,  "Number of early returns");
+
+namespace llvm {
+  void initializePPCEarlyReturnPass(PassRegistry&);
+}
+
+namespace {
+  // PPCEarlyReturn pass - For simple functions without epilogue code, move
+  // returns up, and create conditional returns, to avoid unnecessary
+  // branch-to-blr sequences.
+  struct PPCEarlyReturn : public MachineFunctionPass {
+    static char ID;
+    PPCEarlyReturn() : MachineFunctionPass(ID) {
+      initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry());
+    }
+
+    const PPCTargetMachine *TM;
+    const PPCInstrInfo *TII;
+
+protected:
+    bool processBlock(MachineBasicBlock &ReturnMBB) {
+      bool Changed = false;
+
+      MachineBasicBlock::iterator I = ReturnMBB.begin();
+      I = ReturnMBB.SkipPHIsAndLabels(I);
+
+      // The block must be essentially empty except for the blr.
+      if (I == ReturnMBB.end() || I->getOpcode() != PPC::BLR ||
+          I != ReturnMBB.getLastNonDebugInstr())
+        return Changed;
+
+      SmallVector<MachineBasicBlock*, 8> PredToRemove;
+      for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
+           PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
+        bool OtherReference = false, BlockChanged = false;
+        for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
+          if (J->getOpcode() == PPC::B) {
+            if (J->getOperand(0).getMBB() == &ReturnMBB) {
+              // This is an unconditional branch to the return. Replace the
+	      // branch with a blr.
+              BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR));
+              MachineBasicBlock::iterator K = J--;
+              K->eraseFromParent();
+              BlockChanged = true;
+              ++NumBLR;
+              continue;
+            }
+          } else if (J->getOpcode() == PPC::BCC) {
+            if (J->getOperand(2).getMBB() == &ReturnMBB) {
+              // This is a conditional branch to the return. Replace the branch
+              // with a bclr.
+              BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCLR))
+                .addImm(J->getOperand(0).getImm())
+                .addReg(J->getOperand(1).getReg());
+              MachineBasicBlock::iterator K = J--;
+              K->eraseFromParent();
+              BlockChanged = true;
+              ++NumBCLR;
+              continue;
+            }
+          } else if (J->isBranch()) {
+            if (J->isIndirectBranch()) {
+              if (ReturnMBB.hasAddressTaken())
+                OtherReference = true;
+            } else
+              for (unsigned i = 0; i < J->getNumOperands(); ++i)
+                if (J->getOperand(i).isMBB() &&
+                    J->getOperand(i).getMBB() == &ReturnMBB)
+                  OtherReference = true;
+          } else if (!J->isTerminator() && !J->isDebugValue())
+            break;
+
+          if (J == (*PI)->begin())
+            break;
+
+          --J;
+        }
+
+        if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
+          OtherReference = true;
+
+	// Predecessors are stored in a vector and can't be removed here.
+        if (!OtherReference && BlockChanged) {
+          PredToRemove.push_back(*PI);
+        }
+
+        if (BlockChanged)
+          Changed = true;
+      }
+
+      for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
+        PredToRemove[i]->removeSuccessor(&ReturnMBB);
+
+      if (Changed && !ReturnMBB.hasAddressTaken()) {
+        // We now might be able to merge this blr-only block into its
+        // by-layout predecessor.
+        if (ReturnMBB.pred_size() == 1 &&
+            (*ReturnMBB.pred_begin())->isLayoutSuccessor(&ReturnMBB)) {
+          // Move the blr into the preceding block.
+          MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
+          PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
+          PrevMBB.removeSuccessor(&ReturnMBB);
+        }
+
+        if (ReturnMBB.pred_empty())
+          ReturnMBB.eraseFromParent();
+      }
+
+      return Changed;
+    }
+
+public:
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+      TII = TM->getInstrInfo();
+
+      bool Changed = false;
+
+      // If the function does not have at least two blocks, then there is
+      // nothing to do.
+      if (MF.size() < 2)
+        return Changed;
+
+      for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+        MachineBasicBlock &B = *I++; 
+        if (processBlock(B))
+          Changed = true;
+      }
+
+      return Changed;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
+                "PowerPC Early-Return Creation", false, false)
+
+char PPCEarlyReturn::ID = 0;
+FunctionPass*
+llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
+
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 635e348..34a1a73 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -120,6 +120,17 @@ public:
                                 MachineBasicBlock *FBB,
                                 const SmallVectorImpl<MachineOperand> &Cond,
                                 DebugLoc DL) const;
+
+  // Select analysis.
+  virtual bool canInsertSelect(const MachineBasicBlock&,
+                               const SmallVectorImpl<MachineOperand> &Cond,
+                               unsigned, unsigned, int&, int&, int&) const;
+  virtual void insertSelect(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI, DebugLoc DL,
+                            unsigned DstReg,
+                            const SmallVectorImpl<MachineOperand> &Cond,
+                            unsigned TrueReg, unsigned FalseReg) const;
+
   virtual void copyPhysReg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator I, DebugLoc DL,
                            unsigned DestReg, unsigned SrcReg,
@@ -146,6 +157,66 @@ public:
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
+  virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                             unsigned Reg, MachineRegisterInfo *MRI) const;
+
+  // If conversion by predication (only supported by some branch instructions).
+  // All of the profitability checks always return true; it is always
+  // profitable to use the predicated branches.
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+                                   unsigned NumCycles, unsigned ExtraPredCycles,
+                                   const BranchProbability &Probability) const {
+    return true;
+  }
+
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                                   unsigned NumT, unsigned ExtraT,
+                                   MachineBasicBlock &FMBB,
+                                   unsigned NumF, unsigned ExtraF,
+                                   const BranchProbability &Probability) const;
+
+  virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+                                         unsigned NumCycles,
+                                         const BranchProbability
+                                         &Probability) const {
+    return true;
+  }
+
+  virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+                                         MachineBasicBlock &FMBB) const {
+    return false;
+  }
+
+  // Predication support.
+  bool isPredicated(const MachineInstr *MI) const;
+
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+
+  virtual
+  bool PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  virtual
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const;
+
+  virtual bool isPredicable(MachineInstr *MI) const;
+
+  // Comparison optimization.
+
+
+  virtual bool analyzeCompare(const MachineInstr *MI,
+                              unsigned &SrcReg, unsigned &SrcReg2,
+                              int &Mask, int &Value) const;
+
+  virtual bool optimizeCompareInstr(MachineInstr *CmpInstr,
+                                    unsigned SrcReg, unsigned SrcReg2,
+                                    int Mask, int Value,
+                                    const MachineRegisterInfo *MRI) const;
+
   /// GetInstSize - Return the number of bytes of code the specified
   /// instruction may be.  This returns the maximum number of bytes.
   ///
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index ab90762..4763069 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -319,10 +319,7 @@ def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
 // PowerPC Flag Definitions.
 
 class isPPC64 { bit PPC64 = 1; }
-class isDOT   {
-  list<Register> Defs = [CR0];
-  bit RC  = 1;
-}
+class isDOT   { bit RC = 1; }
 
 class RegConstraint<string C> {
   string Constraints = C;
@@ -335,20 +332,111 @@ class NoEncode<string E> {
 //===----------------------------------------------------------------------===//
 // PowerPC Operand Definitions.
 
+// In the default PowerPC assembler syntax, registers are specified simply
+// by number, so they cannot be distinguished from immediate values (without
+// looking at the opcode).  This means that the default operand matching logic
+// for the asm parser does not work, and we need to specify custom matchers.
+// Since those can only be specified with RegisterOperand classes and not
+// directly on the RegisterClass, all instructions patterns used by the asm
+// parser need to use a RegisterOperand (instead of a RegisterClass) for
+// all their register operands.
+// For this purpose, we define one RegisterOperand for each RegisterClass,
+// using the same name as the class, just in lower case.
+
+def PPCRegGPRCAsmOperand : AsmOperandClass {
+  let Name = "RegGPRC"; let PredicateMethod = "isRegNumber";
+}
+def gprc : RegisterOperand<GPRC> {
+  let ParserMatchClass = PPCRegGPRCAsmOperand;
+}
+def PPCRegG8RCAsmOperand : AsmOperandClass {
+  let Name = "RegG8RC"; let PredicateMethod = "isRegNumber";
+}
+def g8rc : RegisterOperand<G8RC> {
+  let ParserMatchClass = PPCRegG8RCAsmOperand;
+}
+def PPCRegGPRCNoR0AsmOperand : AsmOperandClass {
+  let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def gprc_nor0 : RegisterOperand<GPRC_NOR0> {
+  let ParserMatchClass = PPCRegGPRCNoR0AsmOperand;
+}
+def PPCRegG8RCNoX0AsmOperand : AsmOperandClass {
+  let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber";
+}
+def g8rc_nox0 : RegisterOperand<G8RC_NOX0> {
+  let ParserMatchClass = PPCRegG8RCNoX0AsmOperand;
+}
+def PPCRegF8RCAsmOperand : AsmOperandClass {
+  let Name = "RegF8RC"; let PredicateMethod = "isRegNumber";
+}
+def f8rc : RegisterOperand<F8RC> {
+  let ParserMatchClass = PPCRegF8RCAsmOperand;
+}
+def PPCRegF4RCAsmOperand : AsmOperandClass {
+  let Name = "RegF4RC"; let PredicateMethod = "isRegNumber";
+}
+def f4rc : RegisterOperand<F4RC> {
+  let ParserMatchClass = PPCRegF4RCAsmOperand;
+}
+def PPCRegVRRCAsmOperand : AsmOperandClass {
+  let Name = "RegVRRC"; let PredicateMethod = "isRegNumber";
+}
+def vrrc : RegisterOperand<VRRC> {
+  let ParserMatchClass = PPCRegVRRCAsmOperand;
+}
+def PPCRegCRBITRCAsmOperand : AsmOperandClass {
+  let Name = "RegCRBITRC"; let PredicateMethod = "isRegNumber";
+}
+def crbitrc : RegisterOperand<CRBITRC> {
+  let ParserMatchClass = PPCRegCRBITRCAsmOperand;
+}
+def PPCRegCRRCAsmOperand : AsmOperandClass {
+  let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber";
+}
+def crrc : RegisterOperand<CRRC> {
+  let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+
+def PPCS5ImmAsmOperand : AsmOperandClass {
+  let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
+  let RenderMethod = "addImmOperands";
+}
 def s5imm   : Operand<i32> {
   let PrintMethod = "printS5ImmOperand";
+  let ParserMatchClass = PPCS5ImmAsmOperand;
+}
+def PPCU5ImmAsmOperand : AsmOperandClass {
+  let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
+  let RenderMethod = "addImmOperands";
 }
 def u5imm   : Operand<i32> {
   let PrintMethod = "printU5ImmOperand";
+  let ParserMatchClass = PPCU5ImmAsmOperand;
+}
+def PPCU6ImmAsmOperand : AsmOperandClass {
+  let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
+  let RenderMethod = "addImmOperands";
 }
 def u6imm   : Operand<i32> {
   let PrintMethod = "printU6ImmOperand";
+  let ParserMatchClass = PPCU6ImmAsmOperand;
+}
+def PPCS16ImmAsmOperand : AsmOperandClass {
+  let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
+  let RenderMethod = "addImmOperands";
 }
 def s16imm  : Operand<i32> {
   let PrintMethod = "printS16ImmOperand";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
+}
+def PPCU16ImmAsmOperand : AsmOperandClass {
+  let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
+  let RenderMethod = "addImmOperands";
 }
 def u16imm  : Operand<i32> {
   let PrintMethod = "printU16ImmOperand";
+  let ParserMatchClass = PPCU16ImmAsmOperand;
 }
 def directbrtarget : Operand<OtherVT> {
   let PrintMethod = "printBranchOperand";
@@ -367,21 +455,49 @@ def aaddr : Operand<iPTR> {
 def symbolHi: Operand<i32> {
   let PrintMethod = "printSymbolHi";
   let EncoderMethod = "getHA16Encoding";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
 }
 def symbolLo: Operand<i32> {
   let PrintMethod = "printSymbolLo";
   let EncoderMethod = "getLO16Encoding";
+  let ParserMatchClass = PPCS16ImmAsmOperand;
+}
+def PPCCRBitMaskOperand : AsmOperandClass {
+ let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask";
 }
 def crbitm: Operand<i8> {
   let PrintMethod = "printcrbitm";
   let EncoderMethod = "get_crbitm_encoding";
+  let ParserMatchClass = PPCCRBitMaskOperand;
 }
 // Address operands
 // A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
-def ptr_rc_nor0 : PointerLikeRegClass<1>;
+def PPCRegGxRCNoR0Operand : AsmOperandClass {
+  let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> {
+  let ParserMatchClass = PPCRegGxRCNoR0Operand;
+}
+// A version of ptr_rc usable with the asm parser.
+def PPCRegGxRCOperand : AsmOperandClass {
+  let Name = "RegGxRC"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
+  let ParserMatchClass = PPCRegGxRCOperand;
+}
 
-def dispRI : Operand<iPTR>;
-def dispRIX : Operand<iPTR>;
+def PPCDispRIOperand : AsmOperandClass {
+ let Name = "DispRI"; let PredicateMethod = "isS16Imm";
+}
+def dispRI : Operand<iPTR> {
+  let ParserMatchClass = PPCDispRIOperand;
+}
+def PPCDispRIXOperand : AsmOperandClass {
+ let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4";
+}
+def dispRIX : Operand<iPTR> {
+  let ParserMatchClass = PPCDispRIXOperand;
+}
 
 def memri : Operand<iPTR> {
   let PrintMethod = "printMemRegImm";
@@ -390,7 +506,7 @@ def memri : Operand<iPTR> {
 }
 def memrr : Operand<iPTR> {
   let PrintMethod = "printMemRegReg";
-  let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg);
+  let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg);
 }
 def memrix : Operand<iPTR> {   // memri where the imm is shifted 2 bits.
   let PrintMethod = "printMemRegImmShifted";
@@ -407,7 +523,7 @@ def memr : Operand<iPTR> {
 // PowerPC Predicate operand.
 def pred : Operand<OtherVT> {
   let PrintMethod = "printPredicateOperand";
-  let MIOperandInfo = (ops i32imm:$bibo, CRRC:$reg);
+  let MIOperandInfo = (ops i32imm:$bibo, crrc:$reg);
 }
 
 // Define PowerPC specific addressing mode.
@@ -430,6 +546,252 @@ def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;
 def IsBookE  : Predicate<"PPCSubTarget.isBookE()">;
 
 //===----------------------------------------------------------------------===//
+// PowerPC Multiclass Definitions.
+
+multiclass XForm_6r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XForm_6<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XForm_6<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                     string asmbase, string asmstr, InstrItinClass itin,
+                     list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XForm_6<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XForm_6<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_10r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XForm_10<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XForm_10<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XForm_10<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XForm_10<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_11r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XForm_11<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XForm_11<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XOForm_1<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : XOForm_3<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XOForm_3<opcode, xo, oe, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : MForm_2<opcode, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : MForm_2<opcode, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass MDForm_1r<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : MDForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : MDForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass MDSForm_1r<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
+                     string asmbase, string asmstr, InstrItinClass itin,
+                     list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : MDSForm_1<opcode, xo, OOL, IOL,
+                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                        pattern>, RecFormRel;
+    let Defs = [CR0] in
+    def o    : MDSForm_1<opcode, xo, OOL, IOL,
+                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                        []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XSForm_1rc<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
+                      string asmbase, string asmstr, InstrItinClass itin,
+                      list<dag> pattern> {
+  let BaseName = asmbase in {
+    let Defs = [CARRY] in
+    def NAME : XSForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CARRY, CR0] in
+    def o    : XSForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : XForm_26<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR1] in
+    def o    : XForm_26<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : AForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR1] in
+    def o    : AForm_1<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass AForm_2r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : AForm_2<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR1] in
+    def o    : AForm_2<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+                    string asmbase, string asmstr, InstrItinClass itin,
+                    list<dag> pattern> {
+  let BaseName = asmbase in {
+    def NAME : AForm_3<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+                       pattern>, RecFormRel;
+    let Defs = [CR1] in
+    def o    : AForm_3<opcode, xo, OOL, IOL,
+                       !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+                       []>, isDOT, RecFormRel;
+  }
+}
+
+//===----------------------------------------------------------------------===//
 // PowerPC Instruction Definitions.
 
 // Pseudo-instructions:
@@ -442,12 +804,12 @@ def ADJCALLSTACKUP   : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCAL
                               [(callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
-def UPDATE_VRSAVE    : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
+def UPDATE_VRSAVE    : Pseudo<(outs gprc:$rD), (ins gprc:$rS),
                               "UPDATE_VRSAVE $rD, $rS", []>;
 }
 
 let Defs = [R1], Uses = [R1] in
-def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC",
+def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
                        [(set i32:$result,
                              (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
                          
@@ -458,21 +820,21 @@ let usesCustomInserter = 1,    // Expanded after instruction selection.
   // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
   // because either operand might become the first operand in an isel, and
   // that operand cannot be r0.
-  def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond,
-                              GPRC_NOR0:$T, GPRC_NOR0:$F,
+  def SELECT_CC_I4 : Pseudo<(outs gprc:$dst), (ins crrc:$cond,
+                              gprc_nor0:$T, gprc_nor0:$F,
                               i32imm:$BROPC), "#SELECT_CC_I4",
                               []>;
-  def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond,
-                              G8RC_NOX0:$T, G8RC_NOX0:$F,
+  def SELECT_CC_I8 : Pseudo<(outs g8rc:$dst), (ins crrc:$cond,
+                              g8rc_nox0:$T, g8rc_nox0:$F,
                               i32imm:$BROPC), "#SELECT_CC_I8",
                               []>;
-  def SELECT_CC_F4  : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
+  def SELECT_CC_F4  : Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
                               i32imm:$BROPC), "#SELECT_CC_F4",
                               []>;
-  def SELECT_CC_F8  : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
+  def SELECT_CC_F8  : Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
                               i32imm:$BROPC), "#SELECT_CC_F8",
                               []>;
-  def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
+  def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
                               i32imm:$BROPC), "#SELECT_CC_VRRC",
                               []>;
 }
@@ -480,21 +842,26 @@ let usesCustomInserter = 1,    // Expanded after instruction selection.
 // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
 // scavenge a register for it.
 let mayStore = 1 in
-def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F),
+def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F),
                      "#SPILL_CR", []>;
 
 // RESTORE_CR - Indicate that we're restoring the CR register (previously
 // spilled), so we'll need to scavenge a register for it.
 let mayLoad = 1 in
-def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
+def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F),
                      "#RESTORE_CR", []>;
 
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
   let isReturn = 1, Uses = [LR, RM] in
     def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB,
                            [(retflag)]>;
-  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
+  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
     def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
+
+    let isCodeGenOnly = 1 in
+    def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+                            "b${cond:cc}ctr ${cond:reg}", BrB, []>;
+  }
 }
 
 let Defs = [LR] in
@@ -511,10 +878,21 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
   // BCC represents an arbitrary conditional branch on a predicate.
   // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
   // a two-value operand where a dag node expects two operands. :(
-  let isCodeGenOnly = 1 in
+  let isCodeGenOnly = 1 in {
     def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
                     "b${cond:cc} ${cond:reg}, $dst"
-                    /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+                    /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+    let isReturn = 1, Uses = [LR, RM] in
+    def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
+                           "b${cond:cc}lr ${cond:reg}", BrB, []>;
+
+    let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
+      def BDZLR  : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+                             "bdzlr", BrB, []>;
+      def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+                             "bdnzlr", BrB, []>;
+    }
+  }
 
   let Defs = [CTR], Uses = [CTR] in {
     def BDZ  : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
@@ -544,6 +922,10 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
     def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
                              "bctrl", BrB, [(PPCbctrl)]>,
                 Requires<[In32BitMode]>;
+
+    let isCodeGenOnly = 1 in
+    def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+                             "b${cond:cc}ctrl ${cond:reg}", BrB, []>;
   }
 }
 
@@ -589,7 +971,7 @@ def TAILBA   : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
                   []>;
 
 let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
-  def EH_SjLj_SetJmp32  : Pseudo<(outs GPRC:$dst), (ins memr:$buf),
+  def EH_SjLj_SetJmp32  : Pseudo<(outs gprc:$dst), (ins memr:$buf),
                             "#EH_SJLJ_SETJMP32",
                             [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
                           Requires<[In32BitMode]>;
@@ -638,89 +1020,89 @@ def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
 let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
     def ATOMIC_LOAD_ADD_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
       [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_SUB_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
       [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_AND_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
       [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_OR_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
       [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_XOR_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
       [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_NAND_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
       [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_ADD_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
       [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_SUB_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
       [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_AND_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
       [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_OR_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
       [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_XOR_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
       [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_NAND_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
       [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_ADD_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
       [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_SUB_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
       [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_AND_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
       [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_OR_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
       [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_XOR_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
       [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
     def ATOMIC_LOAD_NAND_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
       [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
 
     def ATOMIC_CMP_SWAP_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
       [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
     def ATOMIC_CMP_SWAP_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
       [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
     def ATOMIC_CMP_SWAP_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
       [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
 
     def ATOMIC_SWAP_I8 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
       [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
     def ATOMIC_SWAP_I16 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
       [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
     def ATOMIC_SWAP_I32 : Pseudo<
-      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32",
+      (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
       [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
   }
 }
 
 // Instructions to support atomic operations
-def LWARX : XForm_1<31,  20, (outs GPRC:$rD), (ins memrr:$src),
+def LWARX : XForm_1<31,  20, (outs gprc:$rD), (ins memrr:$src),
                    "lwarx $rD, $src", LdStLWARX,
                    [(set i32:$rD, (PPClarx xoaddr:$src))]>;
 
 let Defs = [CR0] in
-def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
+def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
                    "stwcx. $rS, $dst", LdStSTWCX,
                    [(PPCstcx i32:$rS, xoaddr:$dst)]>,
                    isDOT;
@@ -734,93 +1116,93 @@ def TRAP  : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
 
 // Unindexed (r+i) Loads. 
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
+def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
                   "lbz $rD, $src", LdStLoad,
                   [(set i32:$rD, (zextloadi8 iaddr:$src))]>;
-def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
+def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src),
                   "lha $rD, $src", LdStLHA,
                   [(set i32:$rD, (sextloadi16 iaddr:$src))]>,
                   PPC970_DGroup_Cracked;
-def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
+def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src),
                   "lhz $rD, $src", LdStLoad,
                   [(set i32:$rD, (zextloadi16 iaddr:$src))]>;
-def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
+def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src),
                   "lwz $rD, $src", LdStLoad,
                   [(set i32:$rD, (load iaddr:$src))]>;
 
-def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
+def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src),
                   "lfs $rD, $src", LdStLFD,
                   [(set f32:$rD, (load iaddr:$src))]>;
-def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
+def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
                   "lfd $rD, $src", LdStLFD,
                   [(set f64:$rD, (load iaddr:$src))]>;
 
 
 // Unindexed (r+i) Loads with Update (preinc).
-let mayLoad = 1 in {
-def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lbzu $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lhau $rD, $addr", LdStLHAU,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lhzu $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                    "lwzu $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                   "lfsu $rD, $addr", LdStLFDU,
                   []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
                   "lfdu $rD, $addr", LdStLFDU,
                   []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
 
 // Indexed (r+r) Loads with Update (preinc).
-def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+def LBZUX : XForm_1<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lbzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+def LHAUX : XForm_1<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lhaux $rD, $addr", LdStLHAU,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+def LHZUX : XForm_1<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lhzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result),
+def LWZUX : XForm_1<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lwzux $rD, $addr", LdStLoadUpd,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result),
+def LFSUX : XForm_1<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lfsux $rD, $addr", LdStLFDU,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
                    NoEncode<"$ea_result">;
 
-def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
+def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
                    (ins memrr:$addr),
                    "lfdux $rD, $addr", LdStLFDU,
                    []>, RegConstraint<"$addr.ptrreg = $ea_result">,
@@ -831,39 +1213,39 @@ def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result),
 // Indexed (r+r) Loads.
 //
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
-def LBZX : XForm_1<31,  87, (outs GPRC:$rD), (ins memrr:$src),
+def LBZX : XForm_1<31,  87, (outs gprc:$rD), (ins memrr:$src),
                    "lbzx $rD, $src", LdStLoad,
                    [(set i32:$rD, (zextloadi8 xaddr:$src))]>;
-def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
+def LHAX : XForm_1<31, 343, (outs gprc:$rD), (ins memrr:$src),
                    "lhax $rD, $src", LdStLHA,
                    [(set i32:$rD, (sextloadi16 xaddr:$src))]>,
                    PPC970_DGroup_Cracked;
-def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
+def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src),
                    "lhzx $rD, $src", LdStLoad,
                    [(set i32:$rD, (zextloadi16 xaddr:$src))]>;
-def LWZX : XForm_1<31,  23, (outs GPRC:$rD), (ins memrr:$src),
+def LWZX : XForm_1<31,  23, (outs gprc:$rD), (ins memrr:$src),
                    "lwzx $rD, $src", LdStLoad,
                    [(set i32:$rD, (load xaddr:$src))]>;
                    
                    
-def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
+def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src),
                    "lhbrx $rD, $src", LdStLoad,
                    [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
-def LWBRX : XForm_1<31,  534, (outs GPRC:$rD), (ins memrr:$src),
+def LWBRX : XForm_1<31,  534, (outs gprc:$rD), (ins memrr:$src),
                    "lwbrx $rD, $src", LdStLoad,
                    [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
 
-def LFSX   : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
+def LFSX   : XForm_25<31, 535, (outs f4rc:$frD), (ins memrr:$src),
                       "lfsx $frD, $src", LdStLFD,
                       [(set f32:$frD, (load xaddr:$src))]>;
-def LFDX   : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
+def LFDX   : XForm_25<31, 599, (outs f8rc:$frD), (ins memrr:$src),
                       "lfdx $frD, $src", LdStLFD,
                       [(set f64:$frD, (load xaddr:$src))]>;
 
-def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src),
+def LFIWAX : XForm_25<31, 855, (outs f8rc:$frD), (ins memrr:$src),
                       "lfiwax $frD, $src", LdStLFD,
                       [(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
-def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src),
+def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src),
                       "lfiwzx $frD, $src", LdStLFD,
                       [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
 }
@@ -874,38 +1256,38 @@ def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src),
 
 // Unindexed (r+i) Stores.
 let PPC970_Unit = 2 in {
-def STB  : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
+def STB  : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
                    "stb $rS, $src", LdStStore,
                    [(truncstorei8 i32:$rS, iaddr:$src)]>;
-def STH  : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
+def STH  : DForm_1<44, (outs), (ins gprc:$rS, memri:$src),
                    "sth $rS, $src", LdStStore,
                    [(truncstorei16 i32:$rS, iaddr:$src)]>;
-def STW  : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
+def STW  : DForm_1<36, (outs), (ins gprc:$rS, memri:$src),
                    "stw $rS, $src", LdStStore,
                    [(store i32:$rS, iaddr:$src)]>;
-def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
+def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
                    "stfs $rS, $dst", LdStSTFD,
                    [(store f32:$rS, iaddr:$dst)]>;
-def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
+def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
                    "stfd $rS, $dst", LdStSTFD,
                    [(store f64:$rS, iaddr:$dst)]>;
 }
 
 // Unindexed (r+i) Stores with Update (preinc).
 let PPC970_Unit = 2, mayStore = 1 in {
-def STBU  : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+def STBU  : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
                     "stbu $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STHU  : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+def STHU  : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
                     "sthu $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STWU  : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst),
+def STWU  : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
                     "stwu $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst),
+def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst),
                     "stfsu $rS, $dst", LdStSTFDU, []>,
                     RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst),
+def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst),
                     "stfdu $rS, $dst", LdStSTFDU, []>,
                     RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
 }
@@ -926,59 +1308,59 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
 
 // Indexed (r+r) Stores.
 let PPC970_Unit = 2 in {
-def STBX  : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
+def STBX  : XForm_8<31, 215, (outs), (ins gprc:$rS, memrr:$dst),
                    "stbx $rS, $dst", LdStStore,
                    [(truncstorei8 i32:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
-def STHX  : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
+def STHX  : XForm_8<31, 407, (outs), (ins gprc:$rS, memrr:$dst),
                    "sthx $rS, $dst", LdStStore,
                    [(truncstorei16 i32:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
-def STWX  : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
+def STWX  : XForm_8<31, 151, (outs), (ins gprc:$rS, memrr:$dst),
                    "stwx $rS, $dst", LdStStore,
                    [(store i32:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
  
-def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
+def STHBRX: XForm_8<31, 918, (outs), (ins gprc:$rS, memrr:$dst),
                    "sthbrx $rS, $dst", LdStStore,
                    [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
                    PPC970_DGroup_Cracked;
-def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
+def STWBRX: XForm_8<31, 662, (outs), (ins gprc:$rS, memrr:$dst),
                    "stwbrx $rS, $dst", LdStStore,
                    [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
                    PPC970_DGroup_Cracked;
 
-def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
+def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst),
                      "stfiwx $frS, $dst", LdStSTFD,
                      [(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
                      
-def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
+def STFSX : XForm_28<31, 663, (outs), (ins f4rc:$frS, memrr:$dst),
                      "stfsx $frS, $dst", LdStSTFD,
                      [(store f32:$frS, xaddr:$dst)]>;
-def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
+def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
                      "stfdx $frS, $dst", LdStSTFD,
                      [(store f64:$frS, xaddr:$dst)]>;
 }
 
 // Indexed (r+r) Stores with Update (preinc).
 let PPC970_Unit = 2, mayStore = 1 in {
-def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
                     "stbux $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
                     "sthux $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst),
+def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
                     "stwux $rS, $dst", LdStStoreUpd, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst),
+def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst),
                     "stfsux $rS, $dst", LdStSTFDU, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
-def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst),
+def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memrr:$dst),
                     "stfdux $rS, $dst", LdStSTFDU, []>,
                     RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
                     PPC970_DGroup_Cracked;
@@ -1007,193 +1389,206 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins),
 //
 
 let PPC970_Unit = 1 in {  // FXU Operations.
-def ADDI   : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm),
+def ADDI   : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$imm),
                      "addi $rD, $rA, $imm", IntSimple,
                      [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>;
-let Defs = [CARRY] in {
-def ADDIC  : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+let BaseName = "addic" in {
+let Defs = [CARRY] in
+def ADDIC  : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      "addic $rD, $rA, $imm", IntGeneral,
                      [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>,
-                     PPC970_DGroup_Cracked;
-def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+                     RecFormRel, PPC970_DGroup_Cracked;
+let Defs = [CARRY, CR0] in
+def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      "addic. $rD, $rA, $imm", IntGeneral,
-                     []>;
+                     []>, isDOT, RecFormRel;
 }
-def ADDIS  : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm),
+def ADDIS  : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolHi:$imm),
                      "addis $rD, $rA, $imm", IntSimple,
                      [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
 let isCodeGenOnly = 1 in
-def LA     : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym),
+def LA     : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$sym),
                      "la $rD, $sym($rA)", IntGeneral,
                      [(set i32:$rD, (add i32:$rA,
                                           (PPClo tglobaladdr:$sym, 0)))]>;
-def MULLI  : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+def MULLI  : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      "mulli $rD, $rA, $imm", IntMulLI,
                      [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>;
-let Defs = [CARRY] in {
-def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+let Defs = [CARRY] in
+def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
                      "subfic $rD, $rA, $imm", IntGeneral,
                      [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>;
-}
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
-  def LI  : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
+  def LI  : DForm_2_r0<14, (outs gprc:$rD), (ins symbolLo:$imm),
                        "li $rD, $imm", IntSimple,
                        [(set i32:$rD, immSExt16:$imm)]>;
-  def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
+  def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins symbolHi:$imm),
                        "lis $rD, $imm", IntSimple,
                        [(set i32:$rD, imm16ShiftedSExt:$imm)]>;
 }
 }
 
 let PPC970_Unit = 1 in {  // FXU Operations.
-def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+let Defs = [CR0] in {
+def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "andi. $dst, $src1, $src2", IntGeneral,
                     [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
                     isDOT;
-def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "andis. $dst, $src1, $src2", IntGeneral,
                     [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
                     isDOT;
-def ORI   : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+}
+def ORI   : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "ori $dst, $src1, $src2", IntSimple,
                     [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
-def ORIS  : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+def ORIS  : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "oris $dst, $src1, $src2", IntSimple,
                     [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
-def XORI  : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+def XORI  : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "xori $dst, $src1, $src2", IntSimple,
                     [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
-def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
                     "xoris $dst, $src1, $src2", IntSimple,
                     [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
 def NOP   : DForm_4_zero<24, (outs), (ins), "nop", IntSimple,
                          []>;
-def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
-                        "cmpwi $crD, $rA, $imm", IntCompare>;
-def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
-                         "cmplwi $dst, $src1, $src2", IntCompare>;
+let isCompare = 1, neverHasSideEffects = 1 in {
+  def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm),
+                          "cmpwi $crD, $rA, $imm", IntCompare>;
+  def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2),
+                           "cmplwi $dst, $src1, $src2", IntCompare>;
+}
+}
+
+let PPC970_Unit = 1, neverHasSideEffects = 1 in {  // FXU Operations.
+defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "nand", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
+defm AND  : XForm_6r<31,  28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "and", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
+defm ANDC : XForm_6r<31,  60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "andc", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
+defm OR   : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "or", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
+defm NOR  : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "nor", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
+defm ORC  : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "orc", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
+defm EQV  : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "eqv", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
+defm XOR  : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "xor", "$rA, $rS, $rB", IntSimple,
+                     [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
+defm SLW  : XForm_6r<31,  24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "slw", "$rA, $rS, $rB", IntGeneral,
+                     [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
+defm SRW  : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                     "srw", "$rA, $rS, $rB", IntGeneral,
+                     [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
+defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+                      "sraw", "$rA, $rS, $rB", IntShift,
+                      [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
 }
 
-
 let PPC970_Unit = 1 in {  // FXU Operations.
-def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "nand $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
-def AND  : XForm_6<31,  28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "and $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
-def ANDC : XForm_6<31,  60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "andc $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
-def OR   : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "or $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
-def NOR  : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "nor $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
-def ORC  : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "orc $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
-def EQV  : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "eqv $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
-def XOR  : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "xor $rA, $rS, $rB", IntSimple,
-                   [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
-def SLW  : XForm_6<31,  24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "slw $rA, $rS, $rB", IntGeneral,
-                   [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
-def SRW  : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "srw $rA, $rS, $rB", IntGeneral,
-                   [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
-let Defs = [CARRY] in {
-def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
-                   "sraw $rA, $rS, $rB", IntShift,
-                   [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
+let neverHasSideEffects = 1 in {
+defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH),
+                        "srawi", "$rA, $rS, $SH", IntShift,
+                        [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
+defm CNTLZW : XForm_11r<31,  26, (outs gprc:$rA), (ins gprc:$rS),
+                        "cntlzw", "$rA, $rS", IntGeneral,
+                        [(set i32:$rA, (ctlz i32:$rS))]>;
+defm EXTSB  : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS),
+                        "extsb", "$rA, $rS", IntSimple,
+                        [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
+defm EXTSH  : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS),
+                        "extsh", "$rA, $rS", IntSimple,
+                        [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
+}
+let isCompare = 1, neverHasSideEffects = 1 in {
+  def CMPW   : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
+                            "cmpw $crD, $rA, $rB", IntCompare>;
+  def CMPLW  : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
+                            "cmplw $crD, $rA, $rB", IntCompare>;
 }
 }
-
-let PPC970_Unit = 1 in {  // FXU Operations.
-let Defs = [CARRY] in {
-def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), 
-                     "srawi $rA, $rS, $SH", IntShift,
-                     [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
-}
-def CNTLZW : XForm_11<31,  26, (outs GPRC:$rA), (ins GPRC:$rS),
-                      "cntlzw $rA, $rS", IntGeneral,
-                      [(set i32:$rA, (ctlz i32:$rS))]>;
-def EXTSB  : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
-                      "extsb $rA, $rS", IntSimple,
-                      [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
-def EXTSH  : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
-                      "extsh $rA, $rS", IntSimple,
-                      [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
-
-def CMPW   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
-                          "cmpw $crD, $rA, $rB", IntCompare>;
-def CMPLW  : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
-                          "cmplw $crD, $rA, $rB", IntCompare>;
-}
 let PPC970_Unit = 3 in {  // FPU Operations.
 //def FCMPO  : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
 //                      "fcmpo $crD, $fA, $fB", FPCompare>;
-def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB),
-                      "fcmpu $crD, $fA, $fB", FPCompare>;
-def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
-                      "fcmpu $crD, $fA, $fB", FPCompare>;
+let isCompare = 1, neverHasSideEffects = 1 in {
+  def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
+                        "fcmpu $crD, $fA, $fB", FPCompare>;
+  def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
+                        "fcmpu $crD, $fA, $fB", FPCompare>;
+}
 
 let Uses = [RM] in {
-  def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "fctiwz $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
+  let neverHasSideEffects = 1 in {
+  defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "fctiwz", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
 
-  def FRSP   : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
-                        "frsp $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (fround f64:$frB))]>;
+  defm FRSP   : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
+                          "frsp", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (fround f64:$frB))]>;
 
   // The frin -> nearbyint mapping is valid only in fast-math mode.
-  def FRIND  : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "frin $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (fnearbyint f64:$frB))]>;
-  def FRINS  : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "frin $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (fnearbyint f32:$frB))]>;
+  let Interpretation64Bit = 1 in
+  defm FRIND  : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "frin", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (fnearbyint f64:$frB))]>;
+  defm FRINS  : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "frin", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (fnearbyint f32:$frB))]>;
+  }
 
   // These pseudos expand to rint but also set FE_INEXACT when the result does
   // not equal the argument.
   let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR!
-    def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB),
+    def FRINDrint : Pseudo<(outs f8rc:$frD), (ins f8rc:$frB),
                             "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>;
-    def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB),
+    def FRINSrint : Pseudo<(outs f4rc:$frD), (ins f4rc:$frB),
                             "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>;
   }
 
-  def FRIPD  : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "frip $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (fceil f64:$frB))]>;
-  def FRIPS  : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "frip $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (fceil f32:$frB))]>;
-  def FRIZD  : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "friz $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (ftrunc f64:$frB))]>;
-  def FRIZS  : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "friz $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (ftrunc f32:$frB))]>;
-  def FRIMD  : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "frim $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (ffloor f64:$frB))]>;
-  def FRIMS  : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "frim $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (ffloor f32:$frB))]>;
-
-  def FSQRT  : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "fsqrt $frD, $frB", FPSqrt,
-                        [(set f64:$frD, (fsqrt f64:$frB))]>;
-  def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "fsqrts $frD, $frB", FPSqrt,
-                        [(set f32:$frD, (fsqrt f32:$frB))]>;
+  let neverHasSideEffects = 1 in {
+  let Interpretation64Bit = 1 in
+  defm FRIPD  : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "frip", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (fceil f64:$frB))]>;
+  defm FRIPS  : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "frip", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (fceil f32:$frB))]>;
+  let Interpretation64Bit = 1 in
+  defm FRIZD  : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "friz", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (ftrunc f64:$frB))]>;
+  defm FRIZS  : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "friz", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (ftrunc f32:$frB))]>;
+  let Interpretation64Bit = 1 in
+  defm FRIMD  : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "frim", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (ffloor f64:$frB))]>;
+  defm FRIMS  : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "frim", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (ffloor f32:$frB))]>;
+
+  defm FSQRT  : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "fsqrt", "$frD, $frB", FPSqrt,
+                          [(set f64:$frD, (fsqrt f64:$frB))]>;
+  defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "fsqrts", "$frD, $frB", FPSqrt,
+                          [(set f32:$frD, (fsqrt f32:$frB))]>;
+  }
   }
 }
 
@@ -1201,69 +1596,74 @@ let Uses = [RM] in {
 /// often coalesced away and we don't want the dispatch group builder to think
 /// that they will fill slots (which could cause the load of a LSU reject to
 /// sneak into a d-group with a store).
-def FMR   : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
-                     "fmr $frD, $frB", FPGeneral,
-                     []>,  // (set f32:$frD, f32:$frB)
-                     PPC970_Unit_Pseudo;
+let neverHasSideEffects = 1 in
+defm FMR   : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB),
+                       "fmr", "$frD, $frB", FPGeneral,
+                       []>,  // (set f32:$frD, f32:$frB)
+                       PPC970_Unit_Pseudo;
 
-let PPC970_Unit = 3 in {  // FPU Operations.
+let PPC970_Unit = 3, neverHasSideEffects = 1 in {  // FPU Operations.
 // These are artificially split into two different forms, for 4/8 byte FP.
-def FABSS  : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
-                      "fabs $frD, $frB", FPGeneral,
-                      [(set f32:$frD, (fabs f32:$frB))]>;
-def FABSD  : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fabs $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (fabs f64:$frB))]>;
-def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
-                      "fnabs $frD, $frB", FPGeneral,
-                      [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
-def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fnabs $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
-def FNEGS  : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
-                      "fneg $frD, $frB", FPGeneral,
-                      [(set f32:$frD, (fneg f32:$frB))]>;
-def FNEGD  : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fneg $frD, $frB", FPGeneral,
-                      [(set f64:$frD, (fneg f64:$frB))]>;
+defm FABSS  : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB),
+                        "fabs", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (fabs f32:$frB))]>;
+let Interpretation64Bit = 1 in
+defm FABSD  : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fabs", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (fabs f64:$frB))]>;
+defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB),
+                        "fnabs", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
+let Interpretation64Bit = 1 in
+defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fnabs", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
+defm FNEGS  : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB),
+                        "fneg", "$frD, $frB", FPGeneral,
+                        [(set f32:$frD, (fneg f32:$frB))]>;
+let Interpretation64Bit = 1 in
+defm FNEGD  : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB),
+                        "fneg", "$frD, $frB", FPGeneral,
+                        [(set f64:$frD, (fneg f64:$frB))]>;
 
 // Reciprocal estimates.
-def FRE      : XForm_26<63, 24, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "fre $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (PPCfre f64:$frB))]>;
-def FRES     : XForm_26<59, 24, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "fres $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (PPCfre f32:$frB))]>;
-def FRSQRTE  : XForm_26<63, 26, (outs F8RC:$frD), (ins F8RC:$frB),
-                        "frsqrte $frD, $frB", FPGeneral,
-                        [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
-def FRSQRTES : XForm_26<59, 26, (outs F4RC:$frD), (ins F4RC:$frB),
-                        "frsqrtes $frD, $frB", FPGeneral,
-                        [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
+defm FRE      : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "fre", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (PPCfre f64:$frB))]>;
+defm FRES     : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "fres", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (PPCfre f32:$frB))]>;
+defm FRSQRTE  : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB),
+                          "frsqrte", "$frD, $frB", FPGeneral,
+                          [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
+defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
+                          "frsqrtes", "$frD, $frB", FPGeneral,
+                          [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
 }
 
 // XL-Form instructions.  condition register logical ops.
 //
-def MCRF   : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA),
+let neverHasSideEffects = 1 in
+def MCRF   : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA),
                       "mcrf $BF, $BFA", BrMCR>,
              PPC970_DGroup_First, PPC970_Unit_CRU;
 
-def CREQV  : XLForm_1<19, 289, (outs CRBITRC:$CRD),
-                               (ins CRBITRC:$CRA, CRBITRC:$CRB),
+def CREQV  : XLForm_1<19, 289, (outs crbitrc:$CRD),
+                               (ins crbitrc:$CRA, crbitrc:$CRB),
                       "creqv $CRD, $CRA, $CRB", BrCR,
                       []>;
 
-def CROR  : XLForm_1<19, 449, (outs CRBITRC:$CRD),
-                               (ins CRBITRC:$CRA, CRBITRC:$CRB),
+def CROR  : XLForm_1<19, 449, (outs crbitrc:$CRD),
+                               (ins crbitrc:$CRA, crbitrc:$CRB),
                       "cror $CRD, $CRA, $CRB", BrCR,
                       []>;
 
 let isCodeGenOnly = 1 in {
-def CRSET  : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
+def CRSET  : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
               "creqv $dst, $dst, $dst", BrCR,
               []>;
 
-def CRUNSET: XLForm_1_ext<19, 193, (outs CRBITRC:$dst), (ins),
+def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
               "crxor $dst, $dst, $dst", BrCR,
               []>;
 
@@ -1281,23 +1681,23 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
 // XFX-Form instructions.  Instructions that deal with SPRs.
 //
 let Uses = [CTR] in {
-def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
+def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins),
                           "mfctr $rT", SprMFSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
-def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
+def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
                           "mtctr $rS", SprMTSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 
 let Defs = [LR] in {
-def MTLR  : XFXForm_7_ext<31, 467, 8, (outs), (ins GPRC:$rS),
+def MTLR  : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
                           "mtlr $rS", SprMTSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 let Uses = [LR] in {
-def MFLR  : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins),
+def MFLR  : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
                           "mflr $rT", SprMFSPR>,
             PPC970_DGroup_First, PPC970_Unit_FXU;
 }
@@ -1305,19 +1705,19 @@ def MFLR  : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins),
 // Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like
 // a GPR on the PPC970.  As such, copies in and out have the same performance
 // characteristics as an OR instruction.
-def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins GPRC:$rS),
+def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS),
                              "mtspr 256, $rS", IntGeneral>,
                PPC970_DGroup_Single, PPC970_Unit_FXU;
-def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
+def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins),
                              "mfspr $rT, 256", IntGeneral>,
                PPC970_DGroup_First, PPC970_Unit_FXU;
 
 let isCodeGenOnly = 1 in {
   def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
-                                (outs VRSAVERC:$reg), (ins GPRC:$rS),
+                                (outs VRSAVERC:$reg), (ins gprc:$rS),
                                 "mtspr 256, $rS", IntGeneral>,
                   PPC970_DGroup_Single, PPC970_Unit_FXU;
-  def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT),
+  def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT),
                                 (ins VRSAVERC:$reg),
                                 "mfspr $rT, 256", IntGeneral>,
                   PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -1335,7 +1735,8 @@ let mayLoad = 1 in
 def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
                      "#RESTORE_VRSAVE", []>;
 
-def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
+let neverHasSideEffects = 1 in {
+def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins gprc:$rS),
                       "mtcrf $FXM, $rS", BrMCRX>,
             PPC970_MicroCode, PPC970_Unit_CRU;
 
@@ -1350,21 +1751,23 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
 //
 // FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
 let isCodeGenOnly = 1 in
-def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+def MFCRpseud: XFXForm_3<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
                        "#MFCRpseud", SprMFCR>,
             PPC970_MicroCode, PPC970_Unit_CRU;
-            
-def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
-                     "mfcr $rT", SprMFCR>,
-                     PPC970_MicroCode, PPC970_Unit_CRU;
 
-def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
                        "mfocrf $rT, $FXM", SprMFCR>,
             PPC970_DGroup_First, PPC970_Unit_CRU;
+} // neverHasSideEffects = 1
+
+let neverHasSideEffects = 1 in
+def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
+                     "mfcr $rT", SprMFCR>,
+                     PPC970_MicroCode, PPC970_Unit_CRU;
 
 // Pseudo instruction to perform FADD in round-to-zero mode.
 let usesCustomInserter = 1, Uses = [RM] in {
-  def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "",
+  def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
                       [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
 }
 
@@ -1377,123 +1780,118 @@ let Uses = [RM], Defs = [RM] in {
   def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
                         "mtfsb1 $FM", IntMTFSB0, []>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
-  def MTFSF  : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT),
+  def MTFSF  : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
                        "mtfsf $FM, $rT", IntMTFSB0, []>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
 }
 let Uses = [RM] in {
-  def MFFS   : XForm_42<63, 583, (outs F8RC:$rT), (ins), 
+  def MFFS   : XForm_42<63, 583, (outs f8rc:$rT), (ins),
                          "mffs $rT", IntMFFS,
                          [(set f64:$rT, (PPCmffs))]>,
                PPC970_DGroup_Single, PPC970_Unit_FPU;
 }
 
 
-let PPC970_Unit = 1 in {  // FXU Operations.
-
+let PPC970_Unit = 1, neverHasSideEffects = 1 in {  // FXU Operations.
 // XO-Form instructions.  Arithmetic instructions that can set overflow bit
 //
-def ADD4  : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "add $rT, $rA, $rB", IntSimple,
-                     [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
-let Defs = [CARRY] in {
-def ADDC  : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "addc $rT, $rA, $rB", IntGeneral,
-                     [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
-                     PPC970_DGroup_Cracked;
-}
-def DIVW  : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "divw $rT, $rA, $rB", IntDivW,
-                     [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
-                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "divwu $rT, $rA, $rB", IntDivW,
-                     [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
-                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
-def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "mulhw $rT, $rA, $rB", IntMulHW,
-                     [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
-def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "mulhwu $rT, $rA, $rB", IntMulHWU,
-                     [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
-def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "mullw $rT, $rA, $rB", IntMulHW,
-                     [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
-def SUBF  : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "subf $rT, $rA, $rB", IntGeneral,
-                     [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
-let Defs = [CARRY] in {
-def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                     "subfc $rT, $rA, $rB", IntGeneral,
-                     [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
-                     PPC970_DGroup_Cracked;
-}
-def NEG    : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "neg $rT, $rA", IntSimple,
-                      [(set i32:$rT, (ineg i32:$rA))]>;
-let Uses = [CARRY], Defs = [CARRY] in {
-def ADDE  : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                      "adde $rT, $rA, $rB", IntGeneral,
-                      [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
-def ADDME  : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "addme $rT, $rA", IntGeneral,
-                      [(set i32:$rT, (adde i32:$rA, -1))]>;
-def ADDZE  : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "addze $rT, $rA", IntGeneral,
-                      [(set i32:$rT, (adde i32:$rA, 0))]>;
-def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
-                      "subfe $rT, $rA, $rB", IntGeneral,
-                      [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
-def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "subfme $rT, $rA", IntGeneral,
-                      [(set i32:$rT, (sube -1, i32:$rA))]>;
-def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
-                      "subfze $rT, $rA", IntGeneral,
-                      [(set i32:$rT, (sube 0, i32:$rA))]>;
+defm ADD4  : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "add", "$rT, $rA, $rB", IntSimple,
+                       [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
+defm ADDC  : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                        "addc", "$rT, $rA, $rB", IntGeneral,
+                        [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
+                        PPC970_DGroup_Cracked;
+defm DIVW  : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "divw", "$rT, $rA, $rB", IntDivW,
+                       [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
+                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "divwu", "$rT, $rA, $rB", IntDivW,
+                       [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
+                       PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "mulhw", "$rT, $rA, $rB", IntMulHW,
+                       [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
+defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "mulhwu", "$rT, $rA, $rB", IntMulHWU,
+                       [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
+defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "mullw", "$rT, $rA, $rB", IntMulHW,
+                       [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
+defm SUBF  : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                       "subf", "$rT, $rA, $rB", IntGeneral,
+                       [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
+defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                        "subfc", "$rT, $rA, $rB", IntGeneral,
+                        [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
+                        PPC970_DGroup_Cracked;
+defm NEG    : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA),
+                        "neg", "$rT, $rA", IntSimple,
+                        [(set i32:$rT, (ineg i32:$rA))]>;
+let Uses = [CARRY] in {
+defm ADDE  : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                        "adde", "$rT, $rA, $rB", IntGeneral,
+                        [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
+defm ADDME  : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA),
+                         "addme", "$rT, $rA", IntGeneral,
+                         [(set i32:$rT, (adde i32:$rA, -1))]>;
+defm ADDZE  : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA),
+                         "addze", "$rT, $rA", IntGeneral,
+                         [(set i32:$rT, (adde i32:$rA, 0))]>;
+defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+                        "subfe", "$rT, $rA, $rB", IntGeneral,
+                        [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
+defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA),
+                         "subfme", "$rT, $rA", IntGeneral,
+                         [(set i32:$rT, (sube -1, i32:$rA))]>;
+defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
+                         "subfze", "$rT, $rA", IntGeneral,
+                         [(set i32:$rT, (sube 0, i32:$rA))]>;
 }
 }
 
 // A-Form instructions.  Most of the instructions executed in the FPU are of
 // this type.
 //
-let PPC970_Unit = 3 in {  // FPU Operations.
+let PPC970_Unit = 3, neverHasSideEffects = 1 in {  // FPU Operations.
 let Uses = [RM] in {
-  def FMADD : AForm_1<63, 29, 
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                      "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+  defm FMADD : AForm_1r<63, 29, 
+                      (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+                      "fmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
                       [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
-  def FMADDS : AForm_1<59, 29,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                      "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+  defm FMADDS : AForm_1r<59, 29,
+                      (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+                      "fmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
                       [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
-  def FMSUB : AForm_1<63, 28,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                      "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+  defm FMSUB : AForm_1r<63, 28,
+                      (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+                      "fmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
                       [(set f64:$FRT,
                             (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
-  def FMSUBS : AForm_1<59, 28,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                      "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+  defm FMSUBS : AForm_1r<59, 28,
+                      (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+                      "fmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
                       [(set f32:$FRT,
                             (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
-  def FNMADD : AForm_1<63, 31,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                      "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+  defm FNMADD : AForm_1r<63, 31,
+                      (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+                      "fnmadd", "$FRT, $FRA, $FRC, $FRB", FPFused,
                       [(set f64:$FRT,
                             (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
-  def FNMADDS : AForm_1<59, 31,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                      "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+  defm FNMADDS : AForm_1r<59, 31,
+                      (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+                      "fnmadds", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
                       [(set f32:$FRT,
                             (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
-  def FNMSUB : AForm_1<63, 30,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                      "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+  defm FNMSUB : AForm_1r<63, 30,
+                      (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+                      "fnmsub", "$FRT, $FRA, $FRC, $FRB", FPFused,
                       [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
                                                  (fneg f64:$FRB))))]>;
-  def FNMSUBS : AForm_1<59, 30,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                      "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+  defm FNMSUBS : AForm_1r<59, 30,
+                      (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+                      "fnmsubs", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
                       [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
                                                  (fneg f32:$FRB))))]>;
 }
@@ -1501,53 +1899,56 @@ let Uses = [RM] in {
 // having 4 of these, force the comparison to always be an 8-byte double (code
 // should use an FMRSD if the input comparison value really wants to be a float)
 // and 4/8 byte forms for the result and operand type..
-def FSELD : AForm_1<63, 23,
-                    (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
-                    "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                    [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
-def FSELS : AForm_1<63, 23,
-                     (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                     "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
-                    [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
+let Interpretation64Bit = 1 in
+defm FSELD : AForm_1r<63, 23,
+                      (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+                      "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
+defm FSELS : AForm_1r<63, 23,
+                      (outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+                      "fsel", "$FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
 let Uses = [RM] in {
-  def FADD  : AForm_2<63, 21,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
-                      "fadd $FRT, $FRA, $FRB", FPAddSub,
-                      [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
-  def FADDS : AForm_2<59, 21,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
-                      "fadds $FRT, $FRA, $FRB", FPGeneral,
-                      [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
-  def FDIV  : AForm_2<63, 18,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
-                      "fdiv $FRT, $FRA, $FRB", FPDivD,
-                      [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
-  def FDIVS : AForm_2<59, 18,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
-                      "fdivs $FRT, $FRA, $FRB", FPDivS,
-                      [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
-  def FMUL  : AForm_3<63, 25,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC),
-                      "fmul $FRT, $FRA, $FRC", FPFused,
-                      [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
-  def FMULS : AForm_3<59, 25,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC),
-                      "fmuls $FRT, $FRA, $FRC", FPGeneral,
-                      [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
-  def FSUB  : AForm_2<63, 20,
-                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
-                      "fsub $FRT, $FRA, $FRB", FPAddSub,
-                      [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
-  def FSUBS : AForm_2<59, 20,
-                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
-                      "fsubs $FRT, $FRA, $FRB", FPGeneral,
-                      [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
+  defm FADD  : AForm_2r<63, 21,
+                        (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+                        "fadd", "$FRT, $FRA, $FRB", FPAddSub,
+                        [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
+  defm FADDS : AForm_2r<59, 21,
+                        (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+                        "fadds", "$FRT, $FRA, $FRB", FPGeneral,
+                        [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
+  defm FDIV  : AForm_2r<63, 18,
+                        (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+                        "fdiv", "$FRT, $FRA, $FRB", FPDivD,
+                        [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
+  defm FDIVS : AForm_2r<59, 18,
+                        (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+                        "fdivs", "$FRT, $FRA, $FRB", FPDivS,
+                        [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
+  defm FMUL  : AForm_3r<63, 25,
+                        (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC),
+                        "fmul", "$FRT, $FRA, $FRC", FPFused,
+                        [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
+  defm FMULS : AForm_3r<59, 25,
+                        (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC),
+                        "fmuls", "$FRT, $FRA, $FRC", FPGeneral,
+                        [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
+  defm FSUB  : AForm_2r<63, 20,
+                        (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+                        "fsub", "$FRT, $FRA, $FRB", FPAddSub,
+                        [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
+  defm FSUBS : AForm_2r<59, 20,
+                        (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+                        "fsubs", "$FRT, $FRA, $FRB", FPGeneral,
+                        [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
   }
 }
 
+let neverHasSideEffects = 1 in {
 let PPC970_Unit = 1 in {  // FXU Operations.
+  let isSelect = 1 in
   def ISEL  : AForm_4<31, 15,
-                     (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, CRBITRC:$cond),
+                     (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond),
                      "isel $rT, $rA, $rB, $cond", IntGeneral,
                      []>;
 }
@@ -1557,26 +1958,29 @@ let PPC970_Unit = 1 in {  // FXU Operations.
 //
 let isCommutable = 1 in {
 // RLWIMI can be commuted if the rotate amount is zero.
-def RLWIMI : MForm_2<20,
-                     (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB, 
-                      u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate,
-                      []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
-                      NoEncode<"$rSi">;
+defm RLWIMI : MForm_2r<20, (outs gprc:$rA),
+                       (ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB,
+                       u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME", IntRotate,
+                       []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
+                       NoEncode<"$rSi">;
 }
+let BaseName = "rlwinm" in {
 def RLWINM : MForm_2<21,
-                     (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                     (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
                      "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
-                     []>;
+                     []>, RecFormRel;
+let Defs = [CR0] in
 def RLWINMo : MForm_2<21,
-                     (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
-                     "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
-                     []>, isDOT, PPC970_DGroup_Cracked;
-def RLWNM  : MForm_2<23,
-                     (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME),
-                     "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral,
-                     []>;
+                      (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                      "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+                      []>, isDOT, RecFormRel, PPC970_DGroup_Cracked;
 }
-
+defm RLWNM  : MForm_2r<23, (outs gprc:$rA),
+                       (ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME),
+                       "rlwnm", "$rA, $rS, $rB, $MB, $ME", IntGeneral,
+                       []>;
+}
+} // neverHasSideEffects = 1
 
 //===----------------------------------------------------------------------===//
 // PowerPC Instruction Patterns
@@ -1693,14 +2097,6 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),
 def : Pat<(f64 (fextend f32:$src)),
           (COPY_TO_REGCLASS $src, F8RC)>;
 
-// Memory barriers
-def : Pat<(membarrier (i32 imm /*ll*/),
-                      (i32 imm /*ls*/),
-                      (i32 imm /*sl*/),
-                      (i32 imm /*ss*/),
-                      (i32 imm /*device*/)),
-           (SYNC)>;
-
 def : Pat<(atomic_fence (imm), (imm)), (SYNC)>;
 
 // Additional FNMSUB patterns: -a*c + b == -(a*c - b)
@@ -1715,3 +2111,98 @@ def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
 
 include "PPCInstrAltivec.td"
 include "PPCInstr64Bit.td"
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instructions used for assembler/disassembler only
+//
+
+def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
+                         "isync", SprISYNC, []>;
+
+def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
+                    "icbi $src", LdStICBI, []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Assembler Instruction Aliases
+//
+
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class PPCAsmPseudo<string asm, dag iops>
+  : Instruction {
+  let Namespace = "PPC";
+  bit PPC64 = 0;  // Default value, override with isPPC64
+
+  let OutOperandList = (outs);
+  let InOperandList = iops;
+  let Pattern = [];
+  let AsmString = asm;
+  let isAsmParserOnly = 1;
+  let isPseudo = 1;
+}
+
+def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n",
+                        (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n",
+                        (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n",
+                        (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n",
+                        (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+
+def : InstAlias<"blt $cc, $dst", (BCC 12, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bgt $cc, $dst", (BCC 44, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"beq $cc, $dst", (BCC 76, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bun $cc, $dst", (BCC 108, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bso $cc, $dst", (BCC 108, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bge $cc, $dst", (BCC 4, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bnl $cc, $dst", (BCC 4, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"ble $cc, $dst", (BCC 36, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bng $cc, $dst", (BCC 36, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bne $cc, $dst", (BCC 68, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bnu $cc, $dst", (BCC 100, crrc:$cc, condbrtarget:$dst)>;
+def : InstAlias<"bns $cc, $dst", (BCC 100, crrc:$cc, condbrtarget:$dst)>;
+
+def : InstAlias<"bltlr $cc", (BCLR 12, crrc:$cc)>;
+def : InstAlias<"bgtlr $cc", (BCLR 44, crrc:$cc)>;
+def : InstAlias<"beqlr $cc", (BCLR 76, crrc:$cc)>;
+def : InstAlias<"bunlr $cc", (BCLR 108, crrc:$cc)>;
+def : InstAlias<"bsolr $cc", (BCLR 108, crrc:$cc)>;
+def : InstAlias<"bgelr $cc", (BCLR 4, crrc:$cc)>;
+def : InstAlias<"bnllr $cc", (BCLR 4, crrc:$cc)>;
+def : InstAlias<"blelr $cc", (BCLR 36, crrc:$cc)>;
+def : InstAlias<"bnglr $cc", (BCLR 36, crrc:$cc)>;
+def : InstAlias<"bnelr $cc", (BCLR 68, crrc:$cc)>;
+def : InstAlias<"bnulr $cc", (BCLR 100, crrc:$cc)>;
+def : InstAlias<"bnslr $cc", (BCLR 100, crrc:$cc)>;
+
+def : InstAlias<"bltctr $cc", (BCCTR 12, crrc:$cc)>;
+def : InstAlias<"bgtctr $cc", (BCCTR 44, crrc:$cc)>;
+def : InstAlias<"beqctr $cc", (BCCTR 76, crrc:$cc)>;
+def : InstAlias<"bunctr $cc", (BCCTR 108, crrc:$cc)>;
+def : InstAlias<"bsoctr $cc", (BCCTR 108, crrc:$cc)>;
+def : InstAlias<"bgectr $cc", (BCCTR 4, crrc:$cc)>;
+def : InstAlias<"bnlctr $cc", (BCCTR 4, crrc:$cc)>;
+def : InstAlias<"blectr $cc", (BCCTR 36, crrc:$cc)>;
+def : InstAlias<"bngctr $cc", (BCCTR 36, crrc:$cc)>;
+def : InstAlias<"bnectr $cc", (BCCTR 68, crrc:$cc)>;
+def : InstAlias<"bnuctr $cc", (BCCTR 100, crrc:$cc)>;
+def : InstAlias<"bnsctr $cc", (BCCTR 100, crrc:$cc)>;
+
+def : InstAlias<"bltctrl $cc", (BCCTRL 12, crrc:$cc)>;
+def : InstAlias<"bgtctrl $cc", (BCCTRL 44, crrc:$cc)>;
+def : InstAlias<"beqctrl $cc", (BCCTRL 76, crrc:$cc)>;
+def : InstAlias<"bunctrl $cc", (BCCTRL 108, crrc:$cc)>;
+def : InstAlias<"bsoctrl $cc", (BCCTRL 108, crrc:$cc)>;
+def : InstAlias<"bgectrl $cc", (BCCTRL 4, crrc:$cc)>;
+def : InstAlias<"bnlctrl $cc", (BCCTRL 4, crrc:$cc)>;
+def : InstAlias<"blectrl $cc", (BCCTRL 36, crrc:$cc)>;
+def : InstAlias<"bngctrl $cc", (BCCTRL 36, crrc:$cc)>;
+def : InstAlias<"bnectrl $cc", (BCCTRL 68, crrc:$cc)>;
+def : InstAlias<"bnuctrl $cc", (BCCTRL 100, crrc:$cc)>;
+def : InstAlias<"bnsctrl $cc", (BCCTRL 100, crrc:$cc)>;
+
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 9b0df3e..f8cf3a5 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -14,6 +14,7 @@
 
 #include "PPC.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
@@ -51,7 +52,14 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
   // before we return the symbol.
   if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
     Name += "$stub";
-    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+    const char *PGP = AP.MAI->getPrivateGlobalPrefix();
+    const char *Prefix = "";
+    if (!Name.startswith(PGP)) {
+      // http://llvm.org/bugs/show_bug.cgi?id=15763
+      // all stubs and lazy_ptrs should be local symbols, which need leading 'L'
+      Prefix = PGP;
+    }
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Twine(Prefix) + Twine(Name));
     MachineModuleInfoImpl::StubValueTy &StubSym =
       getMachOMMI(AP).getFnStubEntry(Sym);
     if (StubSym.getPointer())
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index ee18ead..40d1f3a 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -84,6 +84,11 @@ class PPCFunctionInfo : public MachineFunctionInfo {
   /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
   int CRSpillFrameIndex;
 
+  /// If any of CR[2-4] need to be saved in the prologue and restored in the
+  /// epilogue then they are added to this array. This is used for the
+  /// 64-bit SVR4 ABI.
+  SmallVector<unsigned, 3> MustSaveCRs;
+
 public:
   explicit PPCFunctionInfo(MachineFunction &MF) 
     : FramePointerSaveIndex(0),
@@ -154,6 +159,10 @@ public:
 
   int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
   void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
+
+  const SmallVector<unsigned, 3> &
+    getMustSaveCRs() const { return MustSaveCRs; }
+  void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 1d61a3a..2be6324 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -76,6 +76,8 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
 const TargetRegisterClass *
 PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
                                                                        const {
+  // Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value
+  // when it checks for ZERO folding.
   if (Kind == 1) {
     if (Subtarget.isPPC64())
       return &PPC::G8RC_NOX0RegClass;
@@ -452,6 +454,33 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
   return false;
 }
 
+// Figure out if the offset in the instruction is shifted right two bits. This
+// is true for instructions like "STD", which the machine implicitly adds two
+// low zeros to.
+static bool usesIXAddr(const MachineInstr &MI) {
+  unsigned OpC = MI.getOpcode();
+
+  switch (OpC) {
+  default:
+    return false;
+  case PPC::LWA:
+  case PPC::LD:
+  case PPC::STD:
+    return true;
+  }
+}
+
+// Return the OffsetOperandNo given the FIOperandNum (and the instruction).
+static unsigned getOffsetONFromFION(const MachineInstr &MI,
+                                    unsigned FIOperandNum) {
+  // Take into account whether it's an add or mem instruction
+  unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
+  if (MI.isInlineAsm())
+    OffsetOperandNo = FIOperandNum-1;
+
+  return OffsetOperandNo;
+}
+
 void
 PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                      int SPAdj, unsigned FIOperandNum,
@@ -469,10 +498,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   DebugLoc dl = MI.getDebugLoc();
 
-  // Take into account whether it's an add or mem instruction
-  unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
-  if (MI.isInlineAsm())
-    OffsetOperandNo = FIOperandNum-1;
+  unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
 
   // Get the frame index.
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
@@ -514,17 +540,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                                 (is64Bit ? PPC::X1 : PPC::R1),
                                               false);
 
-  // Figure out if the offset in the instruction is shifted right two bits. This
-  // is true for instructions like "STD", which the machine implicitly adds two
-  // low zeros to.
-  bool isIXAddr = false;
-  switch (OpC) {
-  case PPC::LWA:
-  case PPC::LD:
-  case PPC::STD:
-    isIXAddr = true;
-    break;
-  }
+  // Figure out if the offset in the instruction is shifted right two bits.
+  bool isIXAddr = usesIXAddr(MI);
 
   // If the instruction is not present in ImmToIdxMap, then it has no immediate
   // form (and must be r+r).
@@ -616,3 +633,124 @@ unsigned PPCRegisterInfo::getEHExceptionRegister() const {
 unsigned PPCRegisterInfo::getEHHandlerRegister() const {
   return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
 }
+
+/// Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool PPCRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+  assert(Offset < 0 && "Local offset must be negative");
+
+  unsigned FIOperandNum = 0;
+  while (!MI->getOperand(FIOperandNum).isFI()) {
+    ++FIOperandNum;
+    assert(FIOperandNum < MI->getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
+
+  if (!usesIXAddr(*MI))
+    Offset += MI->getOperand(OffsetOperandNo).getImm();
+  else
+    Offset += MI->getOperand(OffsetOperandNo).getImm() << 2;
+
+  // It's the load/store FI references that cause issues, as it can be difficult
+  // to materialize the offset if it won't fit in the literal field. Estimate
+  // based on the size of the local frame and some conservative assumptions
+  // about the rest of the stack frame (note, this is pre-regalloc, so
+  // we don't know everything for certain yet) whether this offset is likely
+  // to be out of range of the immediate. Return true if so.
+
+  // We only generate virtual base registers for loads and stores that have
+  // an r+i form. Return false for everything else.
+  unsigned OpC = MI->getOpcode();
+  if (!ImmToIdxMap.count(OpC))
+    return false;
+
+  // Don't generate a new virtual base register just to add zero to it.
+  if ((OpC == PPC::ADDI || OpC == PPC::ADDI8) &&
+      MI->getOperand(2).getImm() == 0)
+    return false;
+
+  MachineBasicBlock &MBB = *MI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+
+  const PPCFrameLowering *PPCFI =
+    static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+  unsigned StackEst =
+    PPCFI->determineFrameLayout(MF, false, true);
+
+  // If we likely don't need a stack frame, then we probably don't need a
+  // virtual base register either.
+  if (!StackEst)
+    return false;
+
+  // Estimate an offset from the stack pointer.
+  // The incoming offset is relating to the SP at the start of the function,
+  // but when we access the local it'll be relative to the SP after local
+  // allocation, so adjust our SP-relative offset by that allocation size.
+  Offset += StackEst;
+
+  // The frame pointer will point to the end of the stack, so estimate the
+  // offset as the difference between the object offset and the FP location.
+  return !isFrameOffsetLegal(MI, Offset);
+}
+
+/// Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
+void PPCRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                             unsigned BaseReg, int FrameIdx,
+                             int64_t Offset) const {
+  unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
+
+  MachineBasicBlock::iterator Ins = MBB->begin();
+  DebugLoc DL;                  // Defaults to "unknown"
+  if (Ins != MBB->end())
+    DL = Ins->getDebugLoc();
+
+  const MCInstrDesc &MCID = TII.get(ADDriOpc);
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  const MachineFunction &MF = *MBB->getParent();
+  MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
+
+  BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+    .addFrameIndex(FrameIdx).addImm(Offset);
+}
+
+void
+PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+                                   unsigned BaseReg, int64_t Offset) const {
+  MachineInstr &MI = *I;
+
+  unsigned FIOperandNum = 0;
+  while (!MI.getOperand(FIOperandNum).isFI()) {
+    ++FIOperandNum;
+    assert(FIOperandNum < MI.getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+
+  MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
+  unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
+
+  bool isIXAddr = usesIXAddr(MI);
+  if (!isIXAddr)
+    Offset += MI.getOperand(OffsetOperandNo).getImm();
+  else
+    Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
+
+  // Figure out if the offset in the instruction is shifted right two bits.
+  if (isIXAddr)
+    Offset >>= 2;    // The actual encoded value has the low two bits zero.
+
+  MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+}
+
+bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+                                         int64_t Offset) const {
+  return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
+         (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
+}
+
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 7e6683e..7a48b4b 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -61,6 +61,10 @@ public:
     return true;
   }
 
+  virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+    return true;
+  }
+
   void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
   void lowerCRSpilling(MachineBasicBlock::iterator II,
                        unsigned FrameIndex) const;
@@ -77,6 +81,15 @@ public:
                            int SPAdj, unsigned FIOperandNum,
                            RegScavenger *RS = NULL) const;
 
+  // Support for virtual base registers.
+  bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+  void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                                    unsigned BaseReg, int FrameIdx,
+                                    int64_t Offset) const;
+  void resolveFrameIndex(MachineBasicBlock::iterator I,
+                         unsigned BaseReg, int64_t Offset) const;
+  bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
+
   // Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const;
 
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
index ae084aa..8d5838e 100644
--- a/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -759,7 +759,7 @@ def PPCA2Model : SchedMachineModel {
   let LoadLatency = 6; // Optimistic load latency assuming bypass.
                        // This is overriden by OperandCycles if the
                        // Itineraries are queried instead.
-  let MispredictPenalty = 6;
+  let MispredictPenalty = 13;
 
   let Itineraries = PPCA2Itineraries;
 }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index fe851c1..14dc794 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -86,8 +86,14 @@ public:
     return getTM<PPCTargetMachine>();
   }
 
+  const PPCSubtarget &getPPCSubtarget() const {
+    return *getPPCTargetMachine().getSubtargetImpl();
+  }
+
   virtual bool addPreRegAlloc();
+  virtual bool addILPOpts();
   virtual bool addInstSelector();
+  virtual bool addPreSched2();
   virtual bool addPreEmitPass();
 };
 } // namespace
@@ -103,13 +109,31 @@ bool PPCPassConfig::addPreRegAlloc() {
   return false;
 }
 
+bool PPCPassConfig::addILPOpts() {
+  if (getPPCSubtarget().hasISEL()) {
+    addPass(&EarlyIfConverterID);
+    return true;
+  }
+
+  return false;
+}
+
 bool PPCPassConfig::addInstSelector() {
   // Install an instruction selector.
   addPass(createPPCISelDag(getPPCTargetMachine()));
   return false;
 }
 
+bool PPCPassConfig::addPreSched2() {
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(&IfConverterID);
+
+  return true;
+}
+
 bool PPCPassConfig::addPreEmitPass() {
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createPPCEarlyReturnPass());
   // Must run branch selection immediately preceding the asm printer.
   addPass(createPPCBranchSelectionPass());
   return false;
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index cc2ff96..514f840 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -126,25 +126,6 @@ produced this with bdnz, the loop would be a single dispatch group.
 
 ===-------------------------------------------------------------------------===
 
-Compile:
-
-void foo(int *P) {
- if (P)  *P = 0;
-}
-
-into:
-
-_foo:
-        cmpwi cr0,r3,0
-        beqlr cr0
-        li r0,0
-        stw r0,0(r3)
-        blr
-
-This is effectively a simple form of predication.
-
-===-------------------------------------------------------------------------===
-
 Lump the constant pool for each function into ONE pic object, and reference
 pieces of it as offsets from the start.  For functions like this (contrived
 to have lots of constants obviously):
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 0b01433..9792bd8 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -24,6 +24,7 @@ class AMDGPUTargetMachine;
 FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
 FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
+FunctionPass *createR600Packetizer(TargetMachine &tm);
 FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
 
 // SI Passes
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index f600144..4c35ecf 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -19,9 +19,16 @@
 
 #include "AMDGPUAsmPrinter.h"
 #include "AMDGPU.h"
+#include "SIDefines.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
+#include "R600Defines.h"
+#include "R600MachineFunctionInfo.h"
+#include "R600RegisterInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
@@ -50,15 +57,82 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   if (OutStreamer.hasRawTextSupport()) {
     OutStreamer.EmitRawText("@" + MF.getName() + ":");
   }
-  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+
+  const MCSectionELF *ConfigSection = getObjFileLowering().getContext()
+                                              .getELFSection(".AMDGPU.config",
+                                              ELF::SHT_PROGBITS, 0,
+                                              SectionKind::getReadOnly());
+  OutStreamer.SwitchSection(ConfigSection);
   if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-    EmitProgramInfo(MF);
+    EmitProgramInfoSI(MF);
+  } else {
+    EmitProgramInfoR600(MF);
   }
+  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
   EmitFunctionBody();
   return false;
 }
 
-void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
+void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
+  unsigned MaxGPR = 0;
+  bool killPixel = false;
+  const R600RegisterInfo * RI =
+                static_cast<const R600RegisterInfo*>(TM.getRegisterInfo());
+  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
+  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
+
+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+                                                  BB != BB_E; ++BB) {
+    MachineBasicBlock &MBB = *BB;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+                                                    I != E; ++I) {
+      MachineInstr &MI = *I;
+      if (MI.getOpcode() == AMDGPU::KILLGT)
+        killPixel = true;
+      unsigned numOperands = MI.getNumOperands();
+      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+        MachineOperand & MO = MI.getOperand(op_idx);
+        if (!MO.isReg())
+          continue;
+        unsigned HWReg = RI->getEncodingValue(MO.getReg()) & 0xff;
+
+        // Register with value > 127 aren't GPR
+        if (HWReg > 127)
+          continue;
+        MaxGPR = std::max(MaxGPR, HWReg);
+      }
+    }
+  }
+
+  unsigned RsrcReg;
+  if (STM.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX) {
+    // Evergreen / Northern Islands
+    switch (MFI->ShaderType) {
+    default: // Fall through
+    case ShaderType::COMPUTE:  RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
+    case ShaderType::GEOMETRY: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
+    case ShaderType::PIXEL:    RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
+    case ShaderType::VERTEX:   RsrcReg = R_028860_SQ_PGM_RESOURCES_VS; break;
+    }
+  } else {
+    // R600 / R700
+    switch (MFI->ShaderType) {
+    default: // Fall through
+    case ShaderType::GEOMETRY: // Fall through
+    case ShaderType::COMPUTE:  // Fall through
+    case ShaderType::VERTEX:   RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
+    case ShaderType::PIXEL:    RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
+    }
+  }
+
+  OutStreamer.EmitIntValue(RsrcReg, 4);
+  OutStreamer.EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
+                           S_STACK_SIZE(MFI->StackSize), 4);
+  OutStreamer.EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
+  OutStreamer.EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
+}
+
+void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
   unsigned MaxSGPR = 0;
   unsigned MaxVGPR = 0;
   bool VCCUsed = false;
@@ -107,6 +181,9 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
         } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
           isSGPR = false;
           width = 2;
+        } else if (AMDGPU::VReg_96RegClass.contains(reg)) {
+          isSGPR = false;
+          width = 3;
         } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
           isSGPR = true;
           width = 4;
@@ -139,7 +216,19 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
     MaxSGPR += 2;
   }
   SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
-  OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
-  OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
-  OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
+  unsigned RsrcReg;
+  switch (MFI->ShaderType) {
+  default: // Fall through
+  case ShaderType::COMPUTE:  RsrcReg = R_00B848_COMPUTE_PGM_RSRC1; break;
+  case ShaderType::GEOMETRY: RsrcReg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; break;
+  case ShaderType::PIXEL:    RsrcReg = R_00B028_SPI_SHADER_PGM_RSRC1_PS; break;
+  case ShaderType::VERTEX:   RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
+  }
+
+  OutStreamer.EmitIntValue(RsrcReg, 4);
+  OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4);
+  if (MFI->ShaderType == ShaderType::PIXEL) {
+    OutStreamer.EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
+    OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
+  }
 }
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index 3812282..f425ef4 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -33,7 +33,8 @@ public:
 
   /// \brief Emit register usage information so that the GPU driver
   /// can correctly setup the GPU state.
-  void EmitProgramInfo(MachineFunction &MF);
+  void EmitProgramInfoR600(MachineFunction &MF);
+  void EmitProgramInfoSI(MachineFunction &MF);
 
   /// Implemented in AMDGPUMCInstLower.cpp
   virtual void EmitInstruction(const MachineInstr *MI);
diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td
index 45ae37e..9c30515 100644
--- a/lib/Target/R600/AMDGPUCallingConv.td
+++ b/lib/Target/R600/AMDGPUCallingConv.td
@@ -32,8 +32,14 @@ def CC_SI : CallingConv<[
     VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
     VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
     VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
-  ]>>>
+  ]>>>,
 
+  // This is the default for i64 values.
+  // XXX: We should change this once clang understands the CC_AMDGPU.
+  CCIfType<[i64], CCAssignToRegWithShadow<
+   [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
+   [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+  >>
 ]>;
 
 def CC_AMDGPU : CallingConv<[
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index f31b646..c2a79ea 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -116,6 +116,7 @@ enum {
   BRANCH_COND,
   // End AMDIL ISD Opcodes
   BITALIGN,
+  BUFFER_STORE,
   DWORDADDR,
   FRACT,
   FMAX,
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index e740348..d2620b2 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -94,6 +94,7 @@ class Constants {
 int TWO_PI = 0x40c90fdb;
 int PI = 0x40490fdb;
 int TWO_PI_INV = 0x3e22f983;
+int FP_UINT_MAX_PLUS_1 = 0x4f800000;	// 1 << 32 in floating point encoding
 }
 def CONST : Constants;
 
@@ -115,21 +116,21 @@ class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
   (outs rc:$dst),
   (ins rc:$src0),
   "CLAMP $dst, $src0",
-  [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+  [(set f32:$dst, (int_AMDIL_clamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
 >;
 
 class FABS <RegisterClass rc> : AMDGPUShaderInst <
   (outs rc:$dst),
   (ins rc:$src0),
   "FABS $dst, $src0",
-  [(set rc:$dst, (fabs rc:$src0))]
+  [(set f32:$dst, (fabs f32:$src0))]
 >;
 
 class FNEG <RegisterClass rc> : AMDGPUShaderInst <
   (outs rc:$dst),
   (ins rc:$src0),
   "FNEG $dst, $src0",
-  [(set rc:$dst, (fneg rc:$src0))]
+  [(set f32:$dst, (fneg f32:$src0))]
 >;
 
 } // usesCustomInserter = 1
@@ -140,8 +141,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
     (outs dstClass:$dst),
     (ins addrClass:$addr, i32imm:$chan),
     "RegisterLoad $dst, $addr",
-    [(set (i32 dstClass:$dst), (AMDGPUregister_load addrPat:$addr,
-                                                    (i32 timm:$chan)))]
+    [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))]
   > {
     let isRegisterLoad = 1;
   }
@@ -150,7 +150,7 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
     (outs),
     (ins dstClass:$val, addrClass:$addr, i32imm:$chan),
     "RegisterStore $val, $addr",
-    [(AMDGPUregister_store (i32 dstClass:$val), addrPat:$addr, (i32 timm:$chan))]
+    [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))]
   > {
     let isRegisterStore = 1;
   }
@@ -161,105 +161,140 @@ multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
 /* Generic helper patterns for intrinsics */
 /* -------------------------------------- */
 
-class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
-                  RegisterClass rc> : Pat <
-  (fpow rc:$src0, rc:$src1),
-  (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
+  : Pat <
+  (fpow f32:$src0, f32:$src1),
+  (exp_ieee (mul f32:$src1, (log_ieee f32:$src0)))
 >;
 
 /* Other helper patterns */
 /* --------------------- */
 
 /* Extract element pattern */
-class Extract_Element <ValueType sub_type, ValueType vec_type,
-                     RegisterClass vec_class, int sub_idx, 
-                     SubRegIndex sub_reg>: Pat<
-  (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
-  (EXTRACT_SUBREG vec_class:$src, sub_reg)
+class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, 
+                       SubRegIndex sub_reg>
+  : Pat<
+  (sub_type (vector_extract vec_type:$src, sub_idx)),
+  (EXTRACT_SUBREG $src, sub_reg)
 >;
 
 /* Insert element pattern */
 class Insert_Element <ValueType elem_type, ValueType vec_type,
-                      RegisterClass elem_class, RegisterClass vec_class,
-                      int sub_idx, SubRegIndex sub_reg> : Pat <
-
-  (vec_type (vector_insert (vec_type vec_class:$vec),
-                           (elem_type elem_class:$elem), sub_idx)),
-  (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
+                      int sub_idx, SubRegIndex sub_reg>
+  : Pat <
+  (vector_insert vec_type:$vec, elem_type:$elem, sub_idx),
+  (INSERT_SUBREG $vec, $elem, sub_reg)
 >;
 
 // Vector Build pattern
-class Vector1_Build <ValueType vecType, RegisterClass vectorClass,
-                     ValueType elemType, RegisterClass elemClass> : Pat <
-  (vecType (build_vector (elemType elemClass:$src))),
-  (vecType elemClass:$src)
+class Vector1_Build <ValueType vecType, ValueType elemType,
+                     RegisterClass rc> : Pat <
+  (vecType (build_vector elemType:$src)),
+  (vecType (COPY_TO_REGCLASS $src, rc))
 >;
 
-class Vector2_Build <ValueType vecType, RegisterClass vectorClass,
-                     ValueType elemType, RegisterClass elemClass> : Pat <
-  (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1))),
+class Vector2_Build <ValueType vecType, ValueType elemType> : Pat <
+  (vecType (build_vector elemType:$sub0, elemType:$sub1)),
   (INSERT_SUBREG (INSERT_SUBREG
-  (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1)
+    (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1)
 >;
 
-class Vector4_Build <ValueType vecType, RegisterClass vectorClass,
-                     ValueType elemType, RegisterClass elemClass> : Pat <
-  (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
-                         (elemType elemClass:$z), (elemType elemClass:$w))),
+class Vector4_Build <ValueType vecType, ValueType elemType> : Pat <
+  (vecType (build_vector elemType:$x, elemType:$y, elemType:$z, elemType:$w)),
   (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
-  (vecType (IMPLICIT_DEF)), elemClass:$x, sub0), elemClass:$y, sub1),
-                            elemClass:$z, sub2), elemClass:$w, sub3)
+    (vecType (IMPLICIT_DEF)), $x, sub0), $y, sub1), $z, sub2), $w, sub3)
 >;
 
-class Vector8_Build <ValueType vecType, RegisterClass vectorClass,
-                     ValueType elemType, RegisterClass elemClass> : Pat <
-  (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
-                         (elemType elemClass:$sub2), (elemType elemClass:$sub3),
-                         (elemType elemClass:$sub4), (elemType elemClass:$sub5),
-                         (elemType elemClass:$sub6), (elemType elemClass:$sub7))),
-  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+class Vector8_Build <ValueType vecType, ValueType elemType> : Pat <
+  (vecType (build_vector elemType:$sub0, elemType:$sub1,
+                         elemType:$sub2, elemType:$sub3,
+                         elemType:$sub4, elemType:$sub5,
+                         elemType:$sub6, elemType:$sub7)),
   (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
-  (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
-                            elemClass:$sub2, sub2), elemClass:$sub3, sub3),
-                            elemClass:$sub4, sub4), elemClass:$sub5, sub5),
-                            elemClass:$sub6, sub6), elemClass:$sub7, sub7)
+    (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+    (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1),
+                              $sub2, sub2), $sub3, sub3),
+                              $sub4, sub4), $sub5, sub5),
+                              $sub6, sub6), $sub7, sub7)
 >;
 
-class Vector16_Build <ValueType vecType, RegisterClass vectorClass,
-                      ValueType elemType, RegisterClass elemClass> : Pat <
-  (vecType (build_vector (elemType elemClass:$sub0), (elemType elemClass:$sub1),
-                         (elemType elemClass:$sub2), (elemType elemClass:$sub3),
-                         (elemType elemClass:$sub4), (elemType elemClass:$sub5),
-                         (elemType elemClass:$sub6), (elemType elemClass:$sub7),
-                         (elemType elemClass:$sub8), (elemType elemClass:$sub9),
-                         (elemType elemClass:$sub10), (elemType elemClass:$sub11),
-                         (elemType elemClass:$sub12), (elemType elemClass:$sub13),
-                         (elemType elemClass:$sub14), (elemType elemClass:$sub15))),
-  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
-  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
-  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+class Vector16_Build <ValueType vecType, ValueType elemType> : Pat <
+  (vecType (build_vector elemType:$sub0, elemType:$sub1,
+                         elemType:$sub2, elemType:$sub3,
+                         elemType:$sub4, elemType:$sub5,
+                         elemType:$sub6, elemType:$sub7,
+                         elemType:$sub8, elemType:$sub9,
+                         elemType:$sub10, elemType:$sub11,
+                         elemType:$sub12, elemType:$sub13,
+                         elemType:$sub14, elemType:$sub15)),
   (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
-  (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1),
-                            elemClass:$sub2, sub2), elemClass:$sub3, sub3),
-                            elemClass:$sub4, sub4), elemClass:$sub5, sub5),
-                            elemClass:$sub6, sub6), elemClass:$sub7, sub7),
-                            elemClass:$sub8, sub8), elemClass:$sub9, sub9),
-                            elemClass:$sub10, sub10), elemClass:$sub11, sub11),
-                            elemClass:$sub12, sub12), elemClass:$sub13, sub13),
-                            elemClass:$sub14, sub14), elemClass:$sub15, sub15)
+    (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+    (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+    (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
+    (vecType (IMPLICIT_DEF)), $sub0, sub0), $sub1, sub1),
+                            $sub2, sub2), $sub3, sub3),
+                            $sub4, sub4), $sub5, sub5),
+                            $sub6, sub6), $sub7, sub7),
+                            $sub8, sub8), $sub9, sub9),
+                            $sub10, sub10), $sub11, sub11),
+                            $sub12, sub12), $sub13, sub13),
+                            $sub14, sub14), $sub15, sub15)
 >;
 
+// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
+// can handle COPY instructions.
 // bitconvert pattern
 class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
   (dt (bitconvert (st rc:$src0))),
   (dt rc:$src0)
 >;
 
+// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer
+// can handle COPY instructions.
 class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
   (vt (AMDGPUdwordaddr (vt rc:$addr))),
   (vt rc:$addr)
 >;
 
+// BFI_INT patterns
+
+multiclass BFIPatterns <Instruction BFI_INT> {
+
+  // Definition from ISA doc:
+  // (y & x) | (z & ~x)
+  def : Pat <
+    (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
+    (BFI_INT $x, $y, $z)
+  >;
+
+  // SHA-256 Ch function
+  // z ^ (x & (y ^ z))
+  def : Pat <
+    (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
+    (BFI_INT $x, $y, $z)
+  >;
+
+}
+
+// SHA-256 Ma patterns
+
+// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y
+class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
+  (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))),
+  (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y)
+>;
+
+// Bitfield extract patterns
+
+def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
+def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
+                            SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>;
+
+class BFEPattern <Instruction BFE> : Pat <
+  (and (srl i32:$x, legalshift32:$y), bfemask:$z),
+  (BFE $x, $y, $z)
+>;
+
 include "R600Instructions.td"
 
 include "SIInstrInfo.td"
diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp
index 0223ec8..0461025 100644
--- a/lib/Target/R600/AMDGPUMachineFunction.cpp
+++ b/lib/Target/R600/AMDGPUMachineFunction.cpp
@@ -1,4 +1,5 @@
 #include "AMDGPUMachineFunction.h"
+#include "AMDGPU.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 
@@ -8,6 +9,7 @@ const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType";
 
 AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
     MachineFunctionInfo() {
+  ShaderType = ShaderType::COMPUTE;
   AttributeSet Set = MF.getFunction()->getAttributes();
   Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
                                  ShaderTypeAttribute);
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 0f356a1..a7e1d7b 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -33,6 +33,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
   DefaultSize[0] = 64;
   DefaultSize[1] = 1;
   DefaultSize[2] = 1;
+  HasVertexCache = false;
   ParseSubtargetFeatures(GPU, FS);
   DevName = GPU;
   Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
@@ -53,6 +54,10 @@ AMDGPUSubtarget::is64bit() const  {
   return Is64bit;
 }
 bool
+AMDGPUSubtarget::hasVertexCache() const {
+  return HasVertexCache;
+}
+bool
 AMDGPUSubtarget::isTargetELF() const {
   return false;
 }
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 1973fc6..b6501a4 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -36,6 +36,7 @@ private:
   bool Is32on64bit;
   bool DumpCode;
   bool R600ALUInst;
+  bool HasVertexCache;
 
   InstrItineraryData InstrItins;
 
@@ -48,6 +49,7 @@ public:
 
   bool isOverride(AMDGPUDeviceInfo::Caps) const;
   bool is64bit() const;
+  bool hasVertexCache() const;
 
   // Helper functions to simplify if statements
   bool isTargetELF() const;
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index e7ea876..31fbf32 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -115,7 +115,6 @@ AMDGPUPassConfig::addPreISel() {
 }
 
 bool AMDGPUPassConfig::addInstSelector() {
-  addPass(createAMDGPUPeepholeOpt(*TM));
   addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
 
   const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
@@ -153,8 +152,9 @@ bool AMDGPUPassConfig::addPreEmitPass() {
     addPass(createAMDGPUCFGStructurizerPass(*TM));
     addPass(createR600EmitClauseMarkers(*TM));
     addPass(createR600ExpandSpecialInstrsPass(*TM));
-    addPass(createR600ControlFlowFinalizer(*TM));
     addPass(&FinalizeMachineBundlesID);
+    addPass(createR600Packetizer(*TM));
+    addPass(createR600ControlFlowFinalizer(*TM));
   } else {
     addPass(createSILowerControlFlowPass(*TM));
   }
diff --git a/lib/Target/R600/AMDILBase.td b/lib/Target/R600/AMDILBase.td
index c12cedc..e221110 100644
--- a/lib/Target/R600/AMDILBase.td
+++ b/lib/Target/R600/AMDILBase.td
@@ -74,6 +74,10 @@ def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
         "false",
         "Older version of ALU instructions encoding.">;
 
+def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
+        "HasVertexCache",
+        "true",
+        "Specify use of dedicated vertex cache.">;
 
 //===----------------------------------------------------------------------===//
 // Register File, Calling Conv, Instruction Descriptions
diff --git a/lib/Target/R600/AMDILDeviceInfo.cpp b/lib/Target/R600/AMDILDeviceInfo.cpp
index 9605fbe..126514b 100644
--- a/lib/Target/R600/AMDILDeviceInfo.cpp
+++ b/lib/Target/R600/AMDILDeviceInfo.cpp
@@ -44,7 +44,7 @@ AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
           " on 32bit pointers!");
 #endif
     return new AMDGPUEvergreenDevice(ptr);
-  } else if (deviceName == "redwood") {
+  } else if (deviceName == "redwood" || deviceName == "sumo") {
 #if DEBUG
     assert(!is64bit && "This device does not support 64bit pointers!");
     assert(!is64on32bit && "This device does not support 64bit"
@@ -79,7 +79,10 @@ AMDGPUDevice* getDeviceFromName(const std::string &deviceName,
           " on 32bit pointers!");
 #endif
     return new AMDGPUNIDevice(ptr);
-  } else if (deviceName == "SI") {
+  } else if (deviceName == "SI" ||
+             deviceName == "tahiti" || deviceName == "pitcairn" ||
+             deviceName == "verde"  || deviceName == "oland" ||
+	     deviceName == "hainan") {
     return new AMDGPUSIDevice(ptr);
   } else {
 #if DEBUG
diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp
index fa8f62d..ba75a44 100644
--- a/lib/Target/R600/AMDILISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp
@@ -191,6 +191,29 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
         RegSeqArgs, 2 * N->getNumOperands() + 1);
   }
+  case ISD::BUILD_PAIR: {
+    SDValue RC, SubReg0, SubReg1;
+    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+    if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+      break;
+    }
+    if (N->getValueType(0) == MVT::i128) {
+      RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32);
+      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
+      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
+    } else if (N->getValueType(0) == MVT::i64) {
+      RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
+      SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
+      SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
+    } else {
+      llvm_unreachable("Unhandled value type for BUILD_PAIR");
+    }
+    const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
+                            N->getOperand(1), SubReg1 };
+    return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
+                                  N->getDebugLoc(), N->getValueType(0), Ops);
+  }
+
   case ISD::ConstantFP:
   case ISD::Constant: {
     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
diff --git a/lib/Target/R600/AMDILPeepholeOptimizer.cpp b/lib/Target/R600/AMDILPeepholeOptimizer.cpp
deleted file mode 100644
index 3a28038..0000000
--- a/lib/Target/R600/AMDILPeepholeOptimizer.cpp
+++ /dev/null
@@ -1,1215 +0,0 @@
-//===-- AMDILPeepholeOptimizer.cpp - AMDGPU Peephole optimizations ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-/// \file
-//==-----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "PeepholeOpt"
-#ifdef DEBUG
-#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE))
-#else
-#define DEBUGME 0
-#endif
-
-#include "AMDILDevices.h"
-#include "AMDGPUInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-
-#include <sstream>
-
-#if 0
-STATISTIC(PointerAssignments, "Number of dynamic pointer "
-    "assigments discovered");
-STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
-#endif
-
-using namespace llvm;
-// The Peephole optimization pass is used to do simple last minute optimizations
-// that are required for correct code or to remove redundant functions
-namespace {
-
-class OpaqueType;
-
-class LLVM_LIBRARY_VISIBILITY AMDGPUPeepholeOpt : public FunctionPass {
-public:
-  TargetMachine &TM;
-  static char ID;
-  AMDGPUPeepholeOpt(TargetMachine &tm);
-  ~AMDGPUPeepholeOpt();
-  const char *getPassName() const;
-  bool runOnFunction(Function &F);
-  bool doInitialization(Module &M);
-  bool doFinalization(Module &M);
-  void getAnalysisUsage(AnalysisUsage &AU) const;
-protected:
-private:
-  // Function to initiate all of the instruction level optimizations.
-  bool instLevelOptimizations(BasicBlock::iterator *inst);
-  // Quick check to see if we need to dump all of the pointers into the
-  // arena. If this is correct, then we set all pointers to exist in arena. This
-  // is a workaround for aliasing of pointers in a struct/union.
-  bool dumpAllIntoArena(Function &F);
-  // Because I don't want to invalidate any pointers while in the
-  // safeNestedForEachFunction. I push atomic conversions to a vector and handle
-  // it later. This function does the conversions if required.
-  void doAtomicConversionIfNeeded(Function &F);
-  // Because __amdil_is_constant cannot be properly evaluated if
-  // optimizations are disabled, the call's are placed in a vector
-  // and evaluated after the __amdil_image* functions are evaluated
-  // which should allow the __amdil_is_constant function to be
-  // evaluated correctly.
-  void doIsConstCallConversionIfNeeded();
-  bool mChanged;
-  bool mDebug;
-  bool mConvertAtomics;
-  CodeGenOpt::Level optLevel;
-  // Run a series of tests to see if we can optimize a CALL instruction.
-  bool optimizeCallInst(BasicBlock::iterator *bbb);
-  // A peephole optimization to optimize bit extract sequences.
-  bool optimizeBitExtract(Instruction *inst);
-  // A peephole optimization to optimize bit insert sequences.
-  bool optimizeBitInsert(Instruction *inst);
-  bool setupBitInsert(Instruction *base, 
-                      Instruction *&src, 
-                      Constant *&mask, 
-                      Constant *&shift);
-  // Expand the bit field insert instruction on versions of OpenCL that
-  // don't support it.
-  bool expandBFI(CallInst *CI);
-  // Expand the bit field mask instruction on version of OpenCL that 
-  // don't support it.
-  bool expandBFM(CallInst *CI);
-  // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in
-  // this case we need to expand them. These functions check for 24bit functions
-  // and then expand.
-  bool isSigned24BitOps(CallInst *CI);
-  void expandSigned24BitOps(CallInst *CI);
-  // One optimization that can occur is that if the required workgroup size is
-  // specified then the result of get_local_size is known at compile time and
-  // can be returned accordingly.
-  bool isRWGLocalOpt(CallInst *CI);
-  // On northern island cards, the division is slightly less accurate than on
-  // previous generations, so we need to utilize a more accurate division. So we
-  // can translate the accurate divide to a normal divide on all other cards.
-  bool convertAccurateDivide(CallInst *CI);
-  void expandAccurateDivide(CallInst *CI);
-  // If the alignment is set incorrectly, it can produce really inefficient
-  // code. This checks for this scenario and fixes it if possible.
-  bool correctMisalignedMemOp(Instruction *inst);
-
-  // If we are in no opt mode, then we need to make sure that
-  // local samplers are properly propagated as constant propagation 
-  // doesn't occur and we need to know the value of kernel defined
-  // samplers at compile time.
-  bool propagateSamplerInst(CallInst *CI);
-
-  // Helper functions
-
-  // Group of functions that recursively calculate the size of a structure based
-  // on it's sub-types.
-  size_t getTypeSize(Type * const T, bool dereferencePtr = false);
-  size_t getTypeSize(StructType * const ST, bool dereferencePtr = false);
-  size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false);
-  size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false);
-  size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false);
-  size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false);
-  size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false);
-  size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false);
-
-  LLVMContext *mCTX;
-  Function *mF;
-  const AMDGPUSubtarget *mSTM;
-  SmallVector< std::pair<CallInst *, Function *>, 16> atomicFuncs;
-  SmallVector<CallInst *, 16> isConstVec;
-}; // class AMDGPUPeepholeOpt
-  char AMDGPUPeepholeOpt::ID = 0;
-
-// A template function that has two levels of looping before calling the
-// function with a pointer to the current iterator.
-template<class InputIterator, class SecondIterator, class Function>
-Function safeNestedForEach(InputIterator First, InputIterator Last,
-                              SecondIterator S, Function F) {
-  for ( ; First != Last; ++First) {
-    SecondIterator sf, sl;
-    for (sf = First->begin(), sl = First->end();
-         sf != sl; )  {
-      if (!F(&sf)) {
-        ++sf;
-      } 
-    }
-  }
-  return F;
-}
-
-} // anonymous namespace
-
-namespace llvm {
-  FunctionPass *
-  createAMDGPUPeepholeOpt(TargetMachine &tm) {
-    return new AMDGPUPeepholeOpt(tm);
-  }
-} // llvm namespace
-
-AMDGPUPeepholeOpt::AMDGPUPeepholeOpt(TargetMachine &tm)
-  : FunctionPass(ID), TM(tm)  {
-  mDebug = DEBUGME;
-  optLevel = TM.getOptLevel();
-
-}
-
-AMDGPUPeepholeOpt::~AMDGPUPeepholeOpt()  {
-}
-
-const char *
-AMDGPUPeepholeOpt::getPassName() const  {
-  return "AMDGPU PeepHole Optimization Pass";
-}
-
-bool 
-containsPointerType(Type *Ty)  {
-  if (!Ty) {
-    return false;
-  }
-  switch(Ty->getTypeID()) {
-  default:
-    return false;
-  case Type::StructTyID: {
-    const StructType *ST = dyn_cast<StructType>(Ty);
-    for (StructType::element_iterator stb = ST->element_begin(),
-           ste = ST->element_end(); stb != ste; ++stb) {
-      if (!containsPointerType(*stb)) {
-        continue;
-      }
-      return true;
-    }
-    break;
-  }
-  case Type::VectorTyID:
-  case Type::ArrayTyID:
-    return containsPointerType(dyn_cast<SequentialType>(Ty)->getElementType());
-  case Type::PointerTyID:
-    return true;
-  };
-  return false;
-}
-
-bool 
-AMDGPUPeepholeOpt::dumpAllIntoArena(Function &F)  {
-  bool dumpAll = false;
-  for (Function::const_arg_iterator cab = F.arg_begin(),
-       cae = F.arg_end(); cab != cae; ++cab) {
-    const Argument *arg = cab;
-    const PointerType *PT = dyn_cast<PointerType>(arg->getType());
-    if (!PT) {
-      continue;
-    }
-    Type *DereferencedType = PT->getElementType();
-    if (!dyn_cast<StructType>(DereferencedType) 
-        ) {
-      continue;
-    }
-    if (!containsPointerType(DereferencedType)) {
-      continue;
-    }
-    // FIXME: Because a pointer inside of a struct/union may be aliased to
-    // another pointer we need to take the conservative approach and place all
-    // pointers into the arena until more advanced detection is implemented.
-    dumpAll = true;
-  }
-  return dumpAll;
-}
-void
-AMDGPUPeepholeOpt::doIsConstCallConversionIfNeeded() {
-  if (isConstVec.empty()) {
-    return;
-  }
-  for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) {
-    CallInst *CI = isConstVec[x];
-    Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
-    Type *aType = Type::getInt32Ty(*mCTX);
-    Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
-      : ConstantInt::get(aType, 0);
-    CI->replaceAllUsesWith(Val);
-    CI->eraseFromParent();
-  }
-  isConstVec.clear();
-}
-void 
-AMDGPUPeepholeOpt::doAtomicConversionIfNeeded(Function &F)  {
-  // Don't do anything if we don't have any atomic operations.
-  if (atomicFuncs.empty()) {
-    return;
-  }
-  // Change the function name for the atomic if it is required
-  uint32_t size = atomicFuncs.size();
-  for (uint32_t x = 0; x < size; ++x) {
-    atomicFuncs[x].first->setOperand(
-        atomicFuncs[x].first->getNumOperands()-1, 
-        atomicFuncs[x].second);
-
-  }
-  mChanged = true;
-  if (mConvertAtomics) {
-    return;
-  }
-}
-
-bool 
-AMDGPUPeepholeOpt::runOnFunction(Function &MF)  {
-  mChanged = false;
-  mF = &MF;
-  mSTM = &TM.getSubtarget<AMDGPUSubtarget>();
-  if (mDebug) {
-    MF.dump();
-  }
-  mCTX = &MF.getType()->getContext();
-  mConvertAtomics = true;
-  safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
-     std::bind1st(std::mem_fun(&AMDGPUPeepholeOpt::instLevelOptimizations),
-                  this));
-
-  doAtomicConversionIfNeeded(MF);
-  doIsConstCallConversionIfNeeded();
-
-  if (mDebug) {
-    MF.dump();
-  }
-  return mChanged;
-}
-
-bool 
-AMDGPUPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)  {
-  Instruction *inst = (*bbb);
-  CallInst *CI = dyn_cast<CallInst>(inst);
-  if (!CI) {
-    return false;
-  }
-  if (isSigned24BitOps(CI)) {
-    expandSigned24BitOps(CI);
-    ++(*bbb);
-    CI->eraseFromParent();
-    return true;
-  }
-  if (propagateSamplerInst(CI)) {
-    return false;
-  }
-  if (expandBFI(CI) || expandBFM(CI)) {
-    ++(*bbb);
-    CI->eraseFromParent();
-    return true;
-  }
-  if (convertAccurateDivide(CI)) {
-    expandAccurateDivide(CI);
-    ++(*bbb);
-    CI->eraseFromParent();
-    return true;
-  }
-
-  StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName();
-  if (calleeName.startswith("__amdil_is_constant")) {
-    // If we do not have optimizations, then this
-    // cannot be properly evaluated, so we add the
-    // call instruction to a vector and process
-    // them at the end of processing after the
-    // samplers have been correctly handled.
-    if (optLevel == CodeGenOpt::None) {
-      isConstVec.push_back(CI);
-      return false;
-    } else {
-      Constant *CV = dyn_cast<Constant>(CI->getOperand(0));
-      Type *aType = Type::getInt32Ty(*mCTX);
-      Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1)
-        : ConstantInt::get(aType, 0);
-      CI->replaceAllUsesWith(Val);
-      ++(*bbb);
-      CI->eraseFromParent();
-      return true;
-    }
-  }
-
-  if (calleeName.equals("__amdil_is_asic_id_i32")) {
-    ConstantInt *CV = dyn_cast<ConstantInt>(CI->getOperand(0));
-    Type *aType = Type::getInt32Ty(*mCTX);
-    Value *Val = CV;
-    if (Val) {
-      Val = ConstantInt::get(aType, 
-          mSTM->device()->getDeviceFlag() & CV->getZExtValue());
-    } else {
-      Val = ConstantInt::get(aType, 0);
-    }
-    CI->replaceAllUsesWith(Val);
-    ++(*bbb);
-    CI->eraseFromParent();
-    return true;
-  }
-  Function *F = dyn_cast<Function>(CI->getOperand(CI->getNumOperands()-1));
-  if (!F) {
-    return false;
-  } 
-  if (F->getName().startswith("__atom") && !CI->getNumUses() 
-      && F->getName().find("_xchg") == StringRef::npos) {
-    std::string buffer(F->getName().str() + "_noret");
-    F = dyn_cast<Function>(
-          F->getParent()->getOrInsertFunction(buffer, F->getFunctionType()));
-    atomicFuncs.push_back(std::make_pair(CI, F));
-  }
-  
-  if (!mSTM->device()->isSupported(AMDGPUDeviceInfo::ArenaSegment)
-      && !mSTM->device()->isSupported(AMDGPUDeviceInfo::MultiUAV)) {
-    return false;
-  }
-  if (!mConvertAtomics) {
-    return false;
-  }
-  StringRef name = F->getName();
-  if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
-    mConvertAtomics = false;
-  }
-  return false;
-}
-
-bool
-AMDGPUPeepholeOpt::setupBitInsert(Instruction *base, 
-    Instruction *&src, 
-    Constant *&mask, 
-    Constant *&shift) {
-  if (!base) {
-    if (mDebug) {
-      dbgs() << "Null pointer passed into function.\n";
-    }
-    return false;
-  }
-  bool andOp = false;
-  if (base->getOpcode() == Instruction::Shl) {
-    shift = dyn_cast<Constant>(base->getOperand(1));
-  } else if (base->getOpcode() == Instruction::And) {
-    mask = dyn_cast<Constant>(base->getOperand(1));
-    andOp = true;
-  } else {
-    if (mDebug) {
-      dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n";
-    }
-    // If the base is neither a Shl or a And, we don't fit any of the patterns above.
-    return false;
-  }
-  src = dyn_cast<Instruction>(base->getOperand(0));
-  if (!src) {
-    if (mDebug) {
-      dbgs() << "Failed setup since the base operand is not an instruction!\n";
-    }
-    return false;
-  }
-  // If we find an 'and' operation, then we don't need to
-  // find the next operation as we already know the
-  // bits that are valid at this point.
-  if (andOp) {
-    return true;
-  }
-  if (src->getOpcode() == Instruction::Shl && !shift) {
-    shift = dyn_cast<Constant>(src->getOperand(1));
-    src = dyn_cast<Instruction>(src->getOperand(0));
-  } else if (src->getOpcode() == Instruction::And && !mask) {
-    mask = dyn_cast<Constant>(src->getOperand(1));
-  }
-  if (!mask && !shift) {
-    if (mDebug) {
-      dbgs() << "Failed setup since both mask and shift are NULL!\n";
-    }
-    // Did not find a constant mask or a shift.
-    return false;
-  }
-  return true;
-}
-bool
-AMDGPUPeepholeOpt::optimizeBitInsert(Instruction *inst)  {
-  if (!inst) {
-    return false;
-  }
-  if (!inst->isBinaryOp()) {
-    return false;
-  }
-  if (inst->getOpcode() != Instruction::Or) {
-    return false;
-  }
-  if (optLevel == CodeGenOpt::None) {
-    return false;
-  }
-  // We want to do an optimization on a sequence of ops that in the end equals a
-  // single ISA instruction.
-  // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F)
-  // Some simplified versions of this pattern are as follows:
-  // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0
-  // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E
-  // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B
-  // (A & B) | (D << F) when (1 << F) >= B
-  // (A << C) | (D & E) when (1 << C) >= E
-  if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
-    // The HD4XXX hardware doesn't support the ubit_insert instruction.
-    return false;
-  }
-  Type *aType = inst->getType();
-  bool isVector = aType->isVectorTy();
-  int numEle = 1;
-  // This optimization only works on 32bit integers.
-  if (aType->getScalarType()
-      != Type::getInt32Ty(inst->getContext())) {
-    return false;
-  }
-  if (isVector) {
-    const VectorType *VT = dyn_cast<VectorType>(aType);
-    numEle = VT->getNumElements();
-    // We currently cannot support more than 4 elements in a intrinsic and we
-    // cannot support Vec3 types.
-    if (numEle > 4 || numEle == 3) {
-      return false;
-    }
-  }
-  // TODO: Handle vectors.
-  if (isVector) {
-    if (mDebug) {
-      dbgs() << "!!! Vectors are not supported yet!\n";
-    }
-    return false;
-  }
-  Instruction *LHSSrc = NULL, *RHSSrc = NULL;
-  Constant *LHSMask = NULL, *RHSMask = NULL;
-  Constant *LHSShift = NULL, *RHSShift = NULL;
-  Instruction *LHS = dyn_cast<Instruction>(inst->getOperand(0));
-  Instruction *RHS = dyn_cast<Instruction>(inst->getOperand(1));
-  if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) {
-    if (mDebug) {
-      dbgs() << "Found an OR Operation that failed setup!\n";
-      inst->dump();
-      if (LHS) { LHS->dump(); }
-      if (LHSSrc) { LHSSrc->dump(); }
-      if (LHSMask) { LHSMask->dump(); }
-      if (LHSShift) { LHSShift->dump(); }
-    }
-    // There was an issue with the setup for BitInsert.
-    return false;
-  }
-  if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) {
-    if (mDebug) {
-      dbgs() << "Found an OR Operation that failed setup!\n";
-      inst->dump();
-      if (RHS) { RHS->dump(); }
-      if (RHSSrc) { RHSSrc->dump(); }
-      if (RHSMask) { RHSMask->dump(); }
-      if (RHSShift) { RHSShift->dump(); }
-    }
-    // There was an issue with the setup for BitInsert.
-    return false;
-  }
-  if (mDebug) {
-    dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n";
-    dbgs() << "Op:        "; inst->dump();
-    dbgs() << "LHS:       "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "LHS Src:   "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "LHS Mask:  "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "RHS:       "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "RHS Src:   "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "RHS Mask:  "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; }
-    dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; }
-  }
-  Constant *offset = NULL;
-  Constant *width = NULL;
-  uint32_t lhsMaskVal = 0, rhsMaskVal = 0;
-  uint32_t lhsShiftVal = 0, rhsShiftVal = 0;
-  uint32_t lhsMaskWidth = 0, rhsMaskWidth = 0;
-  uint32_t lhsMaskOffset = 0, rhsMaskOffset = 0;
-  lhsMaskVal = (LHSMask 
-      ? dyn_cast<ConstantInt>(LHSMask)->getZExtValue() : 0);
-  rhsMaskVal = (RHSMask 
-      ? dyn_cast<ConstantInt>(RHSMask)->getZExtValue() : 0);
-  lhsShiftVal = (LHSShift 
-      ? dyn_cast<ConstantInt>(LHSShift)->getZExtValue() : 0);
-  rhsShiftVal = (RHSShift 
-      ? dyn_cast<ConstantInt>(RHSShift)->getZExtValue() : 0);
-  lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal;
-  rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal;
-  lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal;
-  rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal;
-  // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks).
-  if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) {
-    return false;
-  }
-  if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) {
-    offset = ConstantInt::get(aType, lhsMaskOffset, false);
-    width = ConstantInt::get(aType, lhsMaskWidth, false);
-    RHSSrc = RHS;
-    if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) {
-      return false;
-    }
-    if (!LHSShift) {
-      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
-          "MaskShr", LHS);
-    } else if (lhsShiftVal != lhsMaskOffset) {
-      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
-          "MaskShr", LHS);
-    }
-    if (mDebug) {
-      dbgs() << "Optimizing LHS!\n";
-    }
-  } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) {
-    offset = ConstantInt::get(aType, rhsMaskOffset, false);
-    width = ConstantInt::get(aType, rhsMaskWidth, false);
-    LHSSrc = RHSSrc;
-    RHSSrc = LHS;
-    if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) {
-      return false;
-    }
-    if (!RHSShift) {
-      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
-          "MaskShr", RHS);
-    } else if (rhsShiftVal != rhsMaskOffset) {
-      LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset,
-          "MaskShr", RHS);
-    }
-    if (mDebug) {
-      dbgs() << "Optimizing RHS!\n";
-    }
-  } else {
-    if (mDebug) {
-      dbgs() << "Failed constraint 3!\n";
-    }
-    return false;
-  }
-  if (mDebug) {
-    dbgs() << "Width:  "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; }
-    dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; }
-    dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; }
-    dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; }
-  }
-  if (!offset || !width) {
-    if (mDebug) {
-      dbgs() << "Either width or offset are NULL, failed detection!\n";
-    }
-    return false;
-  }
-  // Lets create the function signature.
-  std::vector<Type *> callTypes;
-  callTypes.push_back(aType);
-  callTypes.push_back(aType);
-  callTypes.push_back(aType);
-  callTypes.push_back(aType);
-  FunctionType *funcType = FunctionType::get(aType, callTypes, false);
-  std::string name = "__amdil_ubit_insert";
-  if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; }
-  Function *Func = 
-    dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
-        getOrInsertFunction(StringRef(name), funcType));
-  Value *Operands[4] = {
-    width,
-    offset,
-    LHSSrc,
-    RHSSrc
-  };
-  CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt");
-  if (mDebug) {
-    dbgs() << "Old Inst: ";
-    inst->dump();
-    dbgs() << "New Inst: ";
-    CI->dump();
-    dbgs() << "\n\n";
-  }
-  CI->insertBefore(inst);
-  inst->replaceAllUsesWith(CI);
-  return true;
-}
-
-bool 
-AMDGPUPeepholeOpt::optimizeBitExtract(Instruction *inst)  {
-  if (!inst) {
-    return false;
-  }
-  if (!inst->isBinaryOp()) {
-    return false;
-  }
-  if (inst->getOpcode() != Instruction::And) {
-    return false;
-  }
-  if (optLevel == CodeGenOpt::None) {
-    return false;
-  }
-  // We want to do some simple optimizations on Shift right/And patterns. The
-  // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a
-  // value smaller than 32 and C is a mask. If C is a constant value, then the
-  // following transformation can occur. For signed integers, it turns into the
-  // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned
-  // integers, it turns into the function call dst =
-  // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract
-  // can be found in Section 7.9 of the ATI IL spec of the stream SDK for
-  // Evergreen hardware.
-  if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD4XXX) {
-    // This does not work on HD4XXX hardware.
-    return false;
-  }
-  Type *aType = inst->getType();
-  bool isVector = aType->isVectorTy();
-
-  // XXX Support vector types
-  if (isVector) {
-    return false;
-  }
-  int numEle = 1;
-  // This only works on 32bit integers
-  if (aType->getScalarType()
-      != Type::getInt32Ty(inst->getContext())) {
-    return false;
-  }
-  if (isVector) {
-    const VectorType *VT = dyn_cast<VectorType>(aType);
-    numEle = VT->getNumElements();
-    // We currently cannot support more than 4 elements in a intrinsic and we
-    // cannot support Vec3 types.
-    if (numEle > 4 || numEle == 3) {
-      return false;
-    }
-  }
-  BinaryOperator *ShiftInst = dyn_cast<BinaryOperator>(inst->getOperand(0));
-  // If the first operand is not a shift instruction, then we can return as it
-  // doesn't match this pattern.
-  if (!ShiftInst || !ShiftInst->isShift()) {
-    return false;
-  }
-  // If we are a shift left, then we need don't match this pattern.
-  if (ShiftInst->getOpcode() == Instruction::Shl) {
-    return false;
-  }
-  bool isSigned = ShiftInst->isArithmeticShift();
-  Constant *AndMask = dyn_cast<Constant>(inst->getOperand(1));
-  Constant *ShrVal = dyn_cast<Constant>(ShiftInst->getOperand(1));
-  // Lets make sure that the shift value and the and mask are constant integers.
-  if (!AndMask || !ShrVal) {
-    return false;
-  }
-  Constant *newMaskConst;
-  Constant *shiftValConst;
-  if (isVector) {
-    // Handle the vector case
-    std::vector<Constant *> maskVals;
-    std::vector<Constant *> shiftVals;
-    ConstantVector *AndMaskVec = dyn_cast<ConstantVector>(AndMask);
-    ConstantVector *ShrValVec = dyn_cast<ConstantVector>(ShrVal);
-    Type *scalarType = AndMaskVec->getType()->getScalarType();
-    assert(AndMaskVec->getNumOperands() ==
-           ShrValVec->getNumOperands() && "cannot have a "
-           "combination where the number of elements to a "
-           "shift and an and are different!");
-    for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) {
-      ConstantInt *AndCI = dyn_cast<ConstantInt>(AndMaskVec->getOperand(x));
-      ConstantInt *ShiftIC = dyn_cast<ConstantInt>(ShrValVec->getOperand(x));
-      if (!AndCI || !ShiftIC) {
-        return false;
-      }
-      uint32_t maskVal = (uint32_t)AndCI->getZExtValue();
-      if (!isMask_32(maskVal)) {
-        return false;
-      }
-      maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
-      uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue();
-      // If the mask or shiftval is greater than the bitcount, then break out.
-      if (maskVal >= 32 || shiftVal >= 32) {
-        return false;
-      }
-      // If the mask val is greater than the the number of original bits left
-      // then this optimization is invalid.
-      if (maskVal > (32 - shiftVal)) {
-        return false;
-      }
-      maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned));
-      shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned));
-    }
-    newMaskConst = ConstantVector::get(maskVals);
-    shiftValConst = ConstantVector::get(shiftVals);
-  } else {
-    // Handle the scalar case
-    uint32_t maskVal = (uint32_t)dyn_cast<ConstantInt>(AndMask)->getZExtValue();
-    // This must be a mask value where all lower bits are set to 1 and then any
-    // bit higher is set to 0.
-    if (!isMask_32(maskVal)) {
-      return false;
-    }
-    maskVal = (uint32_t)CountTrailingOnes_32(maskVal);
-    // Count the number of bits set in the mask, this is the width of the
-    // resulting bit set that is extracted from the source value.
-    uint32_t shiftVal = (uint32_t)dyn_cast<ConstantInt>(ShrVal)->getZExtValue();
-    // If the mask or shift val is greater than the bitcount, then break out.
-    if (maskVal >= 32 || shiftVal >= 32) {
-      return false;
-    }
-    // If the mask val is greater than the the number of original bits left then
-    // this optimization is invalid.
-    if (maskVal > (32 - shiftVal)) {
-      return false;
-    }
-    newMaskConst = ConstantInt::get(aType, maskVal, isSigned);
-    shiftValConst = ConstantInt::get(aType, shiftVal, isSigned);
-  }
-  // Lets create the function signature.
-  std::vector<Type *> callTypes;
-  callTypes.push_back(aType);
-  callTypes.push_back(aType);
-  callTypes.push_back(aType);
-  FunctionType *funcType = FunctionType::get(aType, callTypes, false);
-  std::string name = "llvm.AMDGPU.bit.extract.u32";
-  if (isVector) {
-    name += ".v" + itostr(numEle) + "i32";
-  } else {
-    name += ".";
-  }
-  // Lets create the function.
-  Function *Func = 
-    dyn_cast<Function>(inst->getParent()->getParent()->getParent()->
-                       getOrInsertFunction(StringRef(name), funcType));
-  Value *Operands[3] = {
-    ShiftInst->getOperand(0),
-    shiftValConst,
-    newMaskConst
-  };
-  // Lets create the Call with the operands
-  CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt");
-  CI->setDoesNotAccessMemory();
-  CI->insertBefore(inst);
-  inst->replaceAllUsesWith(CI);
-  return true;
-}
-
-bool
-AMDGPUPeepholeOpt::expandBFI(CallInst *CI) {
-  if (!CI) {
-    return false;
-  }
-  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
-  if (!LHS->getName().startswith("__amdil_bfi")) {
-    return false;
-  }
-  Type* type = CI->getOperand(0)->getType();
-  Constant *negOneConst = NULL;
-  if (type->isVectorTy()) {
-    std::vector<Constant *> negOneVals;
-    negOneConst = ConstantInt::get(CI->getContext(), 
-        APInt(32, StringRef("-1"), 10));
-    for (size_t x = 0,
-        y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
-      negOneVals.push_back(negOneConst);
-    }
-    negOneConst = ConstantVector::get(negOneVals);
-  } else {
-    negOneConst = ConstantInt::get(CI->getContext(), 
-        APInt(32, StringRef("-1"), 10));
-  }
-  // __amdil_bfi => (A & B) | (~A & C)
-  BinaryOperator *lhs = 
-    BinaryOperator::Create(Instruction::And, CI->getOperand(0),
-        CI->getOperand(1), "bfi_and", CI);
-  BinaryOperator *rhs =
-    BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst,
-        "bfi_not", CI);
-  rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2),
-      "bfi_and", CI);
-  lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI);
-  CI->replaceAllUsesWith(lhs);
-  return true;
-}
-
-bool
-AMDGPUPeepholeOpt::expandBFM(CallInst *CI) {
-  if (!CI) {
-    return false;
-  }
-  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
-  if (!LHS->getName().startswith("__amdil_bfm")) {
-    return false;
-  }
-  // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f)
-  Constant *newMaskConst = NULL;
-  Constant *newShiftConst = NULL;
-  Type* type = CI->getOperand(0)->getType();
-  if (type->isVectorTy()) {
-    std::vector<Constant*> newMaskVals, newShiftVals;
-    newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
-    newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
-    for (size_t x = 0,
-        y = dyn_cast<VectorType>(type)->getNumElements(); x < y; ++x) {
-      newMaskVals.push_back(newMaskConst);
-      newShiftVals.push_back(newShiftConst);
-    }
-    newMaskConst = ConstantVector::get(newMaskVals);
-    newShiftConst = ConstantVector::get(newShiftVals);
-  } else {
-    newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F);
-    newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1);
-  }
-  BinaryOperator *lhs =
-    BinaryOperator::Create(Instruction::And, CI->getOperand(0),
-        newMaskConst, "bfm_mask", CI);
-  lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst,
-      lhs, "bfm_shl", CI);
-  lhs = BinaryOperator::Create(Instruction::Sub, lhs,
-      newShiftConst, "bfm_sub", CI);
-  BinaryOperator *rhs =
-    BinaryOperator::Create(Instruction::And, CI->getOperand(1),
-        newMaskConst, "bfm_mask", CI);
-  lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI);
-  CI->replaceAllUsesWith(lhs);
-  return true;
-}
-
-bool
-AMDGPUPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb)  {
-  Instruction *inst = (*bbb);
-  if (optimizeCallInst(bbb)) {
-    return true;
-  }
-  if (optimizeBitExtract(inst)) {
-    return false;
-  }
-  if (optimizeBitInsert(inst)) {
-    return false;
-  }
-  if (correctMisalignedMemOp(inst)) {
-    return false;
-  }
-  return false;
-}
-bool
-AMDGPUPeepholeOpt::correctMisalignedMemOp(Instruction *inst) {
-  LoadInst *linst = dyn_cast<LoadInst>(inst);
-  StoreInst *sinst = dyn_cast<StoreInst>(inst);
-  unsigned alignment;
-  Type* Ty = inst->getType();
-  if (linst) {
-    alignment = linst->getAlignment();
-    Ty = inst->getType();
-  } else if (sinst) {
-    alignment = sinst->getAlignment();
-    Ty = sinst->getValueOperand()->getType();
-  } else {
-    return false;
-  }
-  unsigned size = getTypeSize(Ty);
-  if (size == alignment || size < alignment) {
-    return false;
-  }
-  if (!Ty->isStructTy()) {
-    return false;
-  }
-  if (alignment < 4) {
-    if (linst) {
-      linst->setAlignment(0);
-      return true;
-    } else if (sinst) {
-      sinst->setAlignment(0);
-      return true;
-    }
-  }
-  return false;
-}
-bool 
-AMDGPUPeepholeOpt::isSigned24BitOps(CallInst *CI)  {
-  if (!CI) {
-    return false;
-  }
-  Value *LHS = CI->getOperand(CI->getNumOperands() - 1);
-  std::string namePrefix = LHS->getName().substr(0, 14);
-  if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24"
-      && namePrefix != "__amdil__imul24_high") {
-    return false;
-  }
-  if (mSTM->device()->usesHardware(AMDGPUDeviceInfo::Signed24BitOps)) {
-    return false;
-  }
-  return true;
-}
-
-void 
-AMDGPUPeepholeOpt::expandSigned24BitOps(CallInst *CI)  {
-  assert(isSigned24BitOps(CI) && "Must be a "
-      "signed 24 bit operation to call this function!");
-  Value *LHS = CI->getOperand(CI->getNumOperands()-1);
-  // On 7XX and 8XX we do not have signed 24bit, so we need to
-  // expand it to the following:
-  // imul24 turns into 32bit imul
-  // imad24 turns into 32bit imad
-  // imul24_high turns into 32bit imulhigh
-  if (LHS->getName().substr(0, 14) == "__amdil_imad24") {
-    Type *aType = CI->getOperand(0)->getType();
-    bool isVector = aType->isVectorTy();
-    int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
-    std::vector<Type*> callTypes;
-    callTypes.push_back(CI->getOperand(0)->getType());
-    callTypes.push_back(CI->getOperand(1)->getType());
-    callTypes.push_back(CI->getOperand(2)->getType());
-    FunctionType *funcType =
-      FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
-    std::string name = "__amdil_imad";
-    if (isVector) {
-      name += "_v" + itostr(numEle) + "i32";
-    } else {
-      name += "_i32";
-    }
-    Function *Func = dyn_cast<Function>(
-                       CI->getParent()->getParent()->getParent()->
-                       getOrInsertFunction(StringRef(name), funcType));
-    Value *Operands[3] = {
-      CI->getOperand(0),
-      CI->getOperand(1),
-      CI->getOperand(2)
-    };
-    CallInst *nCI = CallInst::Create(Func, Operands, "imad24");
-    nCI->insertBefore(CI);
-    CI->replaceAllUsesWith(nCI);
-  } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") {
-    BinaryOperator *mulOp =
-      BinaryOperator::Create(Instruction::Mul, CI->getOperand(0),
-          CI->getOperand(1), "imul24", CI);
-    CI->replaceAllUsesWith(mulOp);
-  } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") {
-    Type *aType = CI->getOperand(0)->getType();
-
-    bool isVector = aType->isVectorTy();
-    int numEle = isVector ? dyn_cast<VectorType>(aType)->getNumElements() : 1;
-    std::vector<Type*> callTypes;
-    callTypes.push_back(CI->getOperand(0)->getType());
-    callTypes.push_back(CI->getOperand(1)->getType());
-    FunctionType *funcType =
-      FunctionType::get(CI->getOperand(0)->getType(), callTypes, false);
-    std::string name = "__amdil_imul_high";
-    if (isVector) {
-      name += "_v" + itostr(numEle) + "i32";
-    } else {
-      name += "_i32";
-    }
-    Function *Func = dyn_cast<Function>(
-                       CI->getParent()->getParent()->getParent()->
-                       getOrInsertFunction(StringRef(name), funcType));
-    Value *Operands[2] = {
-      CI->getOperand(0),
-      CI->getOperand(1)
-    };
-    CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high");
-    nCI->insertBefore(CI);
-    CI->replaceAllUsesWith(nCI);
-  }
-}
-
-bool 
-AMDGPUPeepholeOpt::isRWGLocalOpt(CallInst *CI)  {
-  return (CI != NULL
-          && CI->getOperand(CI->getNumOperands() - 1)->getName() 
-          == "__amdil_get_local_size_int");
-}
-
-bool 
-AMDGPUPeepholeOpt::convertAccurateDivide(CallInst *CI)  {
-  if (!CI) {
-    return false;
-  }
-  if (mSTM->device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX
-      && (mSTM->getDeviceName() == "cayman")) {
-    return false;
-  }
-  return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20) 
-      == "__amdil_improved_div";
-}
-
-void 
-AMDGPUPeepholeOpt::expandAccurateDivide(CallInst *CI)  {
-  assert(convertAccurateDivide(CI)
-         && "expanding accurate divide can only happen if it is expandable!");
-  BinaryOperator *divOp =
-    BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0),
-                           CI->getOperand(1), "fdiv32", CI);
-  CI->replaceAllUsesWith(divOp);
-}
-
-bool
-AMDGPUPeepholeOpt::propagateSamplerInst(CallInst *CI) {
-  if (optLevel != CodeGenOpt::None) {
-    return false;
-  }
-
-  if (!CI) {
-    return false;
-  }
-
-  unsigned funcNameIdx = 0;
-  funcNameIdx = CI->getNumOperands() - 1;
-  StringRef calleeName = CI->getOperand(funcNameIdx)->getName();
-  if (calleeName != "__amdil_image2d_read_norm"
-   && calleeName != "__amdil_image2d_read_unnorm"
-   && calleeName != "__amdil_image3d_read_norm"
-   && calleeName != "__amdil_image3d_read_unnorm") {
-    return false;
-  }
-
-  unsigned samplerIdx = 2;
-  samplerIdx = 1;
-  Value *sampler = CI->getOperand(samplerIdx);
-  LoadInst *lInst = dyn_cast<LoadInst>(sampler);
-  if (!lInst) {
-    return false;
-  }
-
-  if (lInst->getPointerAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
-    return false;
-  }
-
-  GlobalVariable *gv = dyn_cast<GlobalVariable>(lInst->getPointerOperand());
-  // If we are loading from what is not a global value, then we
-  // fail and return.
-  if (!gv) {
-    return false;
-  }
-
-  // If we don't have an initializer or we have an initializer and
-  // the initializer is not a 32bit integer, we fail.
-  if (!gv->hasInitializer() 
-      || !gv->getInitializer()->getType()->isIntegerTy(32)) {
-      return false;
-  }
-
-  // Now that we have the global variable initializer, lets replace
-  // all uses of the load instruction with the samplerVal and
-  // reparse the __amdil_is_constant() function.
-  Constant *samplerVal = gv->getInitializer();
-  lInst->replaceAllUsesWith(samplerVal);
-  return true;
-}
-
-bool 
-AMDGPUPeepholeOpt::doInitialization(Module &M)  {
-  return false;
-}
-
-bool 
-AMDGPUPeepholeOpt::doFinalization(Module &M)  {
-  return false;
-}
-
-void 
-AMDGPUPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const  {
-  AU.addRequired<MachineFunctionAnalysis>();
-  FunctionPass::getAnalysisUsage(AU);
-  AU.setPreservesAll();
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) {
-  size_t size = 0;
-  if (!T) {
-    return size;
-  }
-  switch (T->getTypeID()) {
-  case Type::X86_FP80TyID:
-  case Type::FP128TyID:
-  case Type::PPC_FP128TyID:
-  case Type::LabelTyID:
-    assert(0 && "These types are not supported by this backend");
-  default:
-  case Type::FloatTyID:
-  case Type::DoubleTyID:
-    size = T->getPrimitiveSizeInBits() >> 3;
-    break;
-  case Type::PointerTyID:
-    size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr);
-    break;
-  case Type::IntegerTyID:
-    size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr);
-    break;
-  case Type::StructTyID:
-    size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr);
-    break;
-  case Type::ArrayTyID:
-    size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr);
-    break;
-  case Type::FunctionTyID:
-    size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr);
-    break;
-  case Type::VectorTyID:
-    size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr);
-    break;
-  };
-  return size;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(StructType * const ST,
-    bool dereferencePtr) {
-  size_t size = 0;
-  if (!ST) {
-    return size;
-  }
-  Type *curType;
-  StructType::element_iterator eib;
-  StructType::element_iterator eie;
-  for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) {
-    curType = *eib;
-    size += getTypeSize(curType, dereferencePtr);
-  }
-  return size;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(IntegerType * const IT,
-    bool dereferencePtr) {
-  return IT ? (IT->getBitWidth() >> 3) : 0;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(FunctionType * const FT,
-    bool dereferencePtr) {
-    assert(0 && "Should not be able to calculate the size of an function type");
-    return 0;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(ArrayType * const AT,
-    bool dereferencePtr) {
-  return (size_t)(AT ? (getTypeSize(AT->getElementType(),
-                                    dereferencePtr) * AT->getNumElements())
-                     : 0);
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(VectorType * const VT,
-    bool dereferencePtr) {
-  return VT ? (VT->getBitWidth() >> 3) : 0;
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(PointerType * const PT,
-    bool dereferencePtr) {
-  if (!PT) {
-    return 0;
-  }
-  Type *CT = PT->getElementType();
-  if (CT->getTypeID() == Type::StructTyID &&
-      PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
-    return getTypeSize(dyn_cast<StructType>(CT));
-  } else if (dereferencePtr) {
-    size_t size = 0;
-    for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) {
-      size += getTypeSize(PT->getContainedType(x), dereferencePtr);
-    }
-    return size;
-  } else {
-    return 4;
-  }
-}
-
-size_t AMDGPUPeepholeOpt::getTypeSize(OpaqueType * const OT,
-    bool dereferencePtr) {
-  //assert(0 && "Should not be able to calculate the size of an opaque type");
-  return 4;
-}
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 8efba58..97f0a40 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -21,7 +21,6 @@ add_llvm_target(R600CodeGen
   AMDILISelDAGToDAG.cpp
   AMDILISelLowering.cpp
   AMDILNIDevice.cpp
-  AMDILPeepholeOptimizer.cpp
   AMDILSIDevice.cpp
   AMDGPUAsmPrinter.cpp
   AMDGPUFrameLowering.cpp
@@ -42,6 +41,7 @@ add_llvm_target(R600CodeGen
   R600ISelLowering.cpp
   R600MachineFunctionInfo.cpp
   R600MachineScheduler.cpp
+  R600Packetizer.cpp
   R600RegisterInfo.cpp
   SIAnnotateControlFlow.cpp
   SIInsertWaits.cpp
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index 10547a5..303cdf2 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -17,6 +17,7 @@ using namespace llvm;
 
 void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
                              StringRef Annot) {
+  OS.flush();
   printInstruction(MI, OS);
 
   printAnnotation(OS, Annot);
@@ -67,11 +68,14 @@ void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
-                                    raw_ostream &O, StringRef Asm) {
+                                   raw_ostream &O, StringRef Asm,
+                                   StringRef Default) {
   const MCOperand &Op = MI->getOperand(OpNo);
   assert(Op.isImm());
   if (Op.getImm() == 1) {
     O << Asm;
+  } else {
+    O << Default;
   }
 }
 
@@ -98,7 +102,7 @@ void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
 
 void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
                                   raw_ostream &O) {
-  printIfSet(MI, OpNo, O, " *");
+  printIfSet(MI, OpNo, O.indent(20 - O.GetNumBytesInBuffer()), "*", " ");
 }
 
 void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
@@ -169,4 +173,41 @@ void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
     O << "." << chans[chan];
 }
 
+void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) {
+  int BankSwizzle = MI->getOperand(OpNo).getImm();
+  switch (BankSwizzle) {
+  case 1:
+    O << "BS:VEC_021";
+    break;
+  case 2:
+    O << "BS:VEC_120";
+    break;
+  case 3:
+    O << "BS:VEC_102";
+    break;
+  case 4:
+    O << "BS:VEC_201";
+    break;
+  case 5:
+    O << "BS:VEC_210";
+    break;
+  default:
+    break;
+  }
+  return;
+}
+
+void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  int KCacheMode = MI->getOperand(OpNo).getImm();
+  if (KCacheMode > 0) {
+    int KCacheBank = MI->getOperand(OpNo - 2).getImm();
+    O << "CB" << KCacheBank <<":";
+    int KCacheAddr = MI->getOperand(OpNo + 2).getImm();
+    int LineSize = (KCacheMode == 1)?16:32;
+    O << KCacheAddr * 16 << "-" << KCacheAddr * 16 + LineSize;
+  }
+}
+
 #include "AMDGPUGenAsmWriter.inc"
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 767a708..c6fd053 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -35,7 +35,8 @@ private:
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm);
+  void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                  StringRef Asm, StringRef Default = "");
   void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -47,6 +48,8 @@ private:
   void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
 };
 
 } // End namespace llvm
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index 98fca43..a3397f3 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -44,7 +44,6 @@ public:
   AMDGPUAsmBackend(const Target &T)
     : MCAsmBackend() {}
 
-  virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
   virtual unsigned getNumFixupKinds() const { return 0; };
   virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                           uint64_t Value) const;
@@ -71,16 +70,6 @@ void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
   }
 }
 
-MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
-                                           StringRef CPU) {
-  return new AMDGPUAsmBackend(T);
-}
-
-AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
-                                                        raw_ostream &OS) const {
-  return new AMDGPUMCObjectWriter(OS);
-}
-
 void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                   unsigned DataSize, uint64_t Value) const {
 
@@ -88,3 +77,21 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
   assert(Fixup.getKind() == FK_PCRel_4);
   *Dst = (Value - 4) / 4;
 }
+
+//===----------------------------------------------------------------------===//
+// ELFAMDGPUAsmBackend class
+//===----------------------------------------------------------------------===//
+
+class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
+public:
+  ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createAMDGPUELFObjectWriter(OS);
+  }
+};
+
+MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT,
+                                           StringRef CPU) {
+  return new ELFAMDGPUAsmBackend(T);
+}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
new file mode 100644
index 0000000..48fac9f
--- /dev/null
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -0,0 +1,39 @@
+//===-- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  AMDGPUELFObjectWriter();
+protected:
+  virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                bool IsPCRel, bool IsRelocWithSymbol,
+                                int64_t Addend) const {
+    llvm_unreachable("Not implemented");
+  }
+
+};
+
+
+} // End anonymous namespace
+
+AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
+  : MCELFObjectTargetWriter(false, 0, 0, false) { }
+
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_ostream &OS) {
+  MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
+  return createELFObjectWriter(MOTW, OS, true);
+}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index b7cdd7c..2aae26a 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -68,8 +68,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() {
   //===--- Dwarf Emission Directives -----------------------------------===//
   HasLEB128 = true;
   SupportsDebugInformation = true;
-  DwarfSectionOffsetDirective = ".offset";
-
 }
 
 const char*
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 072ee49..61d70bb 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -78,7 +78,7 @@ static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
   if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
     return createSIMCCodeEmitter(MCII, MRI, STI, Ctx);
   } else {
-    return createR600MCCodeEmitter(MCII, MRI, STI, Ctx);
+    return createR600MCCodeEmitter(MCII, MRI, STI);
   }
 }
 
@@ -88,7 +88,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
                                     MCCodeEmitter *_Emitter,
                                     bool RelaxAll,
                                     bool NoExecStack) {
-  return createPureStreamer(Ctx, MAB, _OS, _Emitter);
+  return createELFStreamer(Ctx, MAB, _OS, _Emitter, false, false);
 }
 
 extern "C" void LLVMInitializeR600TargetMC() {
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index 363a4af..abb0320 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -23,16 +23,17 @@ class MCAsmBackend;
 class MCCodeEmitter;
 class MCContext;
 class MCInstrInfo;
+class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
 class Target;
+class raw_ostream;
 
 extern Target TheAMDGPUTarget;
 
 MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
                                        const MCRegisterInfo &MRI,
-                                       const MCSubtargetInfo &STI,
-                                       MCContext &Ctx);
+                                       const MCSubtargetInfo &STI);
 
 MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
                                      const MCRegisterInfo &MRI,
@@ -41,6 +42,8 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
 
 MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT,
                                      StringRef CPU);
+
+MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS);
 } // End llvm namespace
 
 #define GET_REGINFO_ENUM
diff --git a/lib/Target/R600/MCTargetDesc/CMakeLists.txt b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
index 37e714c..3ccdf42 100644
--- a/lib/Target/R600/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/R600/MCTargetDesc/CMakeLists.txt
@@ -1,6 +1,7 @@
 
 add_llvm_library(LLVMR600Desc
   AMDGPUAsmBackend.cpp
+  AMDGPUELFObjectWriter.cpp
   AMDGPUMCTargetDesc.cpp
   AMDGPUMCAsmInfo.cpp
   R600MCCodeEmitter.cpp
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 927bcbd..cb4cf0c 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -9,12 +9,8 @@
 //
 /// \file
 ///
-/// This code emitter outputs bytecode that is understood by the r600g driver
-/// in the Mesa [1] project.  The bytecode is very similar to the hardware's ISA,
-/// but it still needs to be run through a finalizer in order to be executed
-/// by the GPU.
-///
-/// [1] http://www.mesa3d.org/
+/// \brief The R600 code emitter produces machine code that can be executed
+/// directly on the GPU device.
 //
 //===----------------------------------------------------------------------===//
 
@@ -30,9 +26,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include <stdio.h>
 
-#define SRC_BYTE_COUNT 11
-#define DST_BYTE_COUNT 5
-
 using namespace llvm;
 
 namespace {
@@ -43,13 +36,12 @@ class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
   const MCInstrInfo &MCII;
   const MCRegisterInfo &MRI;
   const MCSubtargetInfo &STI;
-  MCContext &Ctx;
 
 public:
 
   R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
-                    const MCSubtargetInfo &sti, MCContext &ctx)
-    : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { }
+                    const MCSubtargetInfo &sti)
+    : MCII(mcii), MRI(mri), STI(sti) { }
 
   /// \brief Encode the instruction and write it to the OS.
   virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -60,30 +52,14 @@ public:
                                      SmallVectorImpl<MCFixup> &Fixups) const;
 private:
 
-  void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
-                    raw_ostream &OS) const;
-  void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
-  void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx,
-                    raw_ostream &OS) const;
-  void EmitDst(const MCInst &MI, raw_ostream &OS) const;
-  void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
-
-  void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
-
   void EmitByte(unsigned int byte, raw_ostream &OS) const;
 
-  void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
-
   void Emit(uint32_t value, raw_ostream &OS) const;
   void Emit(uint64_t value, raw_ostream &OS) const;
 
   unsigned getHWRegChan(unsigned reg) const;
   unsigned getHWReg(unsigned regNo) const;
 
-  bool isFCOp(unsigned opcode) const;
-  bool isTexOp(unsigned opcode) const;
-  bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
-
 };
 
 } // End anonymous namespace
@@ -95,16 +71,6 @@ enum RegElement {
   ELEMENT_W
 };
 
-enum InstrTypes {
-  INSTR_ALU = 0,
-  INSTR_TEX,
-  INSTR_FC,
-  INSTR_NATIVE,
-  INSTR_VTX,
-  INSTR_EXPORT,
-  INSTR_CFALU
-};
-
 enum FCInstr {
   FC_IF_PREDICATE = 0,
   FC_ELSE,
@@ -132,355 +98,95 @@ enum TextureTypes {
 
 MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
                                            const MCRegisterInfo &MRI,
-                                           const MCSubtargetInfo &STI,
-                                           MCContext &Ctx) {
-  return new R600MCCodeEmitter(MCII, MRI, STI, Ctx);
+                                           const MCSubtargetInfo &STI) {
+  return new R600MCCodeEmitter(MCII, MRI, STI);
 }
 
 void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                                        SmallVectorImpl<MCFixup> &Fixups) const {
-  if (isFCOp(MI.getOpcode())){
-    EmitFCInstr(MI, OS);
-  } else if (MI.getOpcode() == AMDGPU::RETURN ||
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+  if (MI.getOpcode() == AMDGPU::RETURN ||
+    MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
+    MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
     MI.getOpcode() == AMDGPU::BUNDLE ||
     MI.getOpcode() == AMDGPU::KILL) {
     return;
-  } else {
-    switch(MI.getOpcode()) {
-    case AMDGPU::STACK_SIZE: {
-      EmitByte(MI.getOperand(0).getImm(), OS);
-      break;
-    }
-    case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
-    case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
-      uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
-      EmitByte(INSTR_NATIVE, OS);
-      Emit(inst, OS);
-      break;
-    }
-    case AMDGPU::CONSTANT_LOAD_eg:
-    case AMDGPU::VTX_READ_PARAM_8_eg:
-    case AMDGPU::VTX_READ_PARAM_16_eg:
-    case AMDGPU::VTX_READ_PARAM_32_eg:
-    case AMDGPU::VTX_READ_PARAM_128_eg:
-    case AMDGPU::VTX_READ_GLOBAL_8_eg:
-    case AMDGPU::VTX_READ_GLOBAL_32_eg:
-    case AMDGPU::VTX_READ_GLOBAL_128_eg:
-    case AMDGPU::TEX_VTX_CONSTBUF:
-    case AMDGPU::TEX_VTX_TEXBUF : {
-      uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
-      uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
-
-      EmitByte(INSTR_VTX, OS);
-      Emit(InstWord01, OS);
-      Emit(InstWord2, OS);
-      break;
-    }
-    case AMDGPU::TEX_LD:
-    case AMDGPU::TEX_GET_TEXTURE_RESINFO:
-    case AMDGPU::TEX_SAMPLE:
-    case AMDGPU::TEX_SAMPLE_C:
-    case AMDGPU::TEX_SAMPLE_L:
-    case AMDGPU::TEX_SAMPLE_C_L:
-    case AMDGPU::TEX_SAMPLE_LB:
-    case AMDGPU::TEX_SAMPLE_C_LB:
-    case AMDGPU::TEX_SAMPLE_G:
-    case AMDGPU::TEX_SAMPLE_C_G:
-    case AMDGPU::TEX_GET_GRADIENTS_H:
-    case AMDGPU::TEX_GET_GRADIENTS_V:
-    case AMDGPU::TEX_SET_GRADIENTS_H:
-    case AMDGPU::TEX_SET_GRADIENTS_V: {
-      unsigned Opcode = MI.getOpcode();
-      bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
-      unsigned OpOffset = HasOffsets ? 3 : 0;
-      int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
-      int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
-
-      uint32_t SrcSelect[4] = {0, 1, 2, 3};
-      uint32_t Offsets[3] = {0, 0, 0};
-      uint64_t CoordType[4] = {1, 1, 1, 1};
-
-      if (HasOffsets)
-        for (unsigned i = 0; i < 3; i++) {
-          int SignedOffset = MI.getOperand(i + 2).getImm();
-          Offsets[i] = (SignedOffset & 0x1F);
-        }
-          
-
-      if (TextureType == TEXTURE_RECT ||
-          TextureType == TEXTURE_SHADOWRECT) {
-        CoordType[ELEMENT_X] = 0;
-        CoordType[ELEMENT_Y] = 0;
-      }
-
-      if (TextureType == TEXTURE_1D_ARRAY ||
-          TextureType == TEXTURE_SHADOW1D_ARRAY) {
-        if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
-            Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
-          CoordType[ELEMENT_Y] = 0;
-        } else {
-          CoordType[ELEMENT_Z] = 0;
-          SrcSelect[ELEMENT_Z] = ELEMENT_Y;
-        }
-      } else if (TextureType == TEXTURE_2D_ARRAY ||
-          TextureType == TEXTURE_SHADOW2D_ARRAY) {
-        CoordType[ELEMENT_Z] = 0;
+  } else if (IS_VTX(Desc)) {
+    uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
+    uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
+    InstWord2 |= 1 << 19;
+
+    Emit(InstWord01, OS);
+    Emit(InstWord2, OS);
+    Emit((u_int32_t) 0, OS);
+  } else if (IS_TEX(Desc)) {
+    unsigned Opcode = MI.getOpcode();
+    bool HasOffsets = (Opcode == AMDGPU::TEX_LD);
+    unsigned OpOffset = HasOffsets ? 3 : 0;
+    int64_t Sampler = MI.getOperand(OpOffset + 3).getImm();
+    int64_t TextureType = MI.getOperand(OpOffset + 4).getImm();
+
+    uint32_t SrcSelect[4] = {0, 1, 2, 3};
+    uint32_t Offsets[3] = {0, 0, 0};
+    uint64_t CoordType[4] = {1, 1, 1, 1};
+
+    if (HasOffsets)
+      for (unsigned i = 0; i < 3; i++) {
+        int SignedOffset = MI.getOperand(i + 2).getImm();
+        Offsets[i] = (SignedOffset & 0x1F);
       }
 
-
-      if ((TextureType == TEXTURE_SHADOW1D ||
-          TextureType == TEXTURE_SHADOW2D ||
-          TextureType == TEXTURE_SHADOWRECT ||
-          TextureType == TEXTURE_SHADOW1D_ARRAY) &&
-          Opcode != AMDGPU::TEX_SAMPLE_C_L &&
-          Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
-        SrcSelect[ELEMENT_W] = ELEMENT_Z;
-      }
-
-      uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
-          CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
-          CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
-      uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
-          SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
-          SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
-          Offsets[2] << 10;
-
-      EmitByte(INSTR_TEX, OS);
-      Emit(Word01, OS);
-      Emit(Word2, OS);
-      break;
-    }
-    case AMDGPU::EG_ExportSwz:
-    case AMDGPU::R600_ExportSwz:
-    case AMDGPU::EG_ExportBuf:
-    case AMDGPU::R600_ExportBuf: {
-      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
-      EmitByte(INSTR_EXPORT, OS);
-      Emit(Inst, OS);
-      break;
-    }
-    case AMDGPU::CF_ALU:
-    case AMDGPU::CF_ALU_PUSH_BEFORE: {
-      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
-      EmitByte(INSTR_CFALU, OS);
-      Emit(Inst, OS);
-      break;
-    }
-    case AMDGPU::CF_TC:
-    case AMDGPU::CF_VC:
-    case AMDGPU::CF_CALL_FS:
-      return;
-    case AMDGPU::WHILE_LOOP:
-    case AMDGPU::END_LOOP:
-    case AMDGPU::LOOP_BREAK:
-    case AMDGPU::CF_CONTINUE:
-    case AMDGPU::CF_JUMP:
-    case AMDGPU::CF_ELSE:
-    case AMDGPU::POP: {
-      uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
-      EmitByte(INSTR_NATIVE, OS);
-      Emit(Inst, OS);
-      break;
+    if (TextureType == TEXTURE_RECT ||
+        TextureType == TEXTURE_SHADOWRECT) {
+      CoordType[ELEMENT_X] = 0;
+      CoordType[ELEMENT_Y] = 0;
     }
-    default:
-      EmitALUInstr(MI, Fixups, OS);
-      break;
-    }
-  }
-}
-
-void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
-                                     SmallVectorImpl<MCFixup> &Fixups,
-                                     raw_ostream &OS) const {
-  const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
-
-  // Emit instruction type
-  EmitByte(INSTR_ALU, OS);
-
-  uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
-
-  //older alu have different encoding for instructions with one or two src
-  //parameters.
-  if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
-      !(MCDesc.TSFlags & R600_InstFlag::OP3)) {
-    uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
-    InstWord01 &= ~(0x3FFULL << 39);
-    InstWord01 |= ISAOpCode << 1;
-  }
-
-  unsigned SrcNum = MCDesc.TSFlags & R600_InstFlag::OP3 ? 3 :
-      MCDesc.TSFlags & R600_InstFlag::OP2 ? 2 : 1;
-
-  EmitByte(SrcNum, OS);
-
-  const unsigned SrcOps[3][2] = {
-      {R600Operands::SRC0, R600Operands::SRC0_SEL},
-      {R600Operands::SRC1, R600Operands::SRC1_SEL},
-      {R600Operands::SRC2, R600Operands::SRC2_SEL}
-  };
 
-  for (unsigned SrcIdx = 0; SrcIdx < SrcNum; ++SrcIdx) {
-    unsigned RegOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][0]];
-    unsigned SelOpIdx = R600Operands::ALUOpTable[SrcNum-1][SrcOps[SrcIdx][1]];
-    EmitSrcISA(MI, RegOpIdx, SelOpIdx, OS);
-  }
-
-  Emit(InstWord01, OS);
-  return;
-}
-
-void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
-                                raw_ostream &OS) const {
-  const MCOperand &MO = MI.getOperand(OpIdx);
-  union {
-    float f;
-    uint32_t i;
-  } Value;
-  Value.i = 0;
-  // Emit the source select (2 bytes).  For GPRs, this is the register index.
-  // For other potential instruction operands, (e.g. constant registers) the
-  // value of the source select is defined in the r600isa docs.
-  if (MO.isReg()) {
-    unsigned reg = MO.getReg();
-    EmitTwoBytes(getHWReg(reg), OS);
-    if (reg == AMDGPU::ALU_LITERAL_X) {
-      unsigned ImmOpIndex = MI.getNumOperands() - 1;
-      MCOperand ImmOp = MI.getOperand(ImmOpIndex);
-      if (ImmOp.isFPImm()) {
-        Value.f = ImmOp.getFPImm();
+    if (TextureType == TEXTURE_1D_ARRAY ||
+        TextureType == TEXTURE_SHADOW1D_ARRAY) {
+      if (Opcode == AMDGPU::TEX_SAMPLE_C_L ||
+          Opcode == AMDGPU::TEX_SAMPLE_C_LB) {
+        CoordType[ELEMENT_Y] = 0;
       } else {
-        assert(ImmOp.isImm());
-        Value.i = ImmOp.getImm();
+        CoordType[ELEMENT_Z] = 0;
+        SrcSelect[ELEMENT_Z] = ELEMENT_Y;
       }
+    } else if (TextureType == TEXTURE_2D_ARRAY ||
+        TextureType == TEXTURE_SHADOW2D_ARRAY) {
+      CoordType[ELEMENT_Z] = 0;
     }
-  } else {
-    // XXX: Handle other operand types.
-    EmitTwoBytes(0, OS);
-  }
-
-  // Emit the source channel (1 byte)
-  if (MO.isReg()) {
-    EmitByte(getHWRegChan(MO.getReg()), OS);
-  } else {
-    EmitByte(0, OS);
-  }
-
-  // XXX: Emit isNegated (1 byte)
-  if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
-      && (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
-     (MO.isReg() &&
-      (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
-    EmitByte(1, OS);
-  } else {
-    EmitByte(0, OS);
-  }
-
-  // Emit isAbsolute (1 byte)
-  if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
-    EmitByte(1, OS);
-  } else {
-    EmitByte(0, OS);
-  }
-
-  // XXX: Emit relative addressing mode (1 byte)
-  EmitByte(0, OS);
-
-  // Emit kc_bank, This will be adjusted later by r600_asm
-  EmitByte(0, OS);
 
-  // Emit the literal value, if applicable (4 bytes).
-  Emit(Value.i, OS);
 
-}
-
-void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx,
-                                   unsigned SelOpIdx, raw_ostream &OS) const {
-  const MCOperand &RegMO = MI.getOperand(RegOpIdx);
-  const MCOperand &SelMO = MI.getOperand(SelOpIdx);
-
-  union {
-    float f;
-    uint32_t i;
-  } InlineConstant;
-  InlineConstant.i = 0;
-  // Emit source type (1 byte) and source select (4 bytes). For GPRs type is 0
-  // and select is 0 (GPR index is encoded in the instr encoding. For constants
-  // type is 1 and select is the original const select passed from the driver.
-  unsigned Reg = RegMO.getReg();
-  if (Reg == AMDGPU::ALU_CONST) {
-    EmitByte(1, OS);
-    uint32_t Sel = SelMO.getImm();
-    Emit(Sel, OS);
-  } else {
-    EmitByte(0, OS);
-    Emit((uint32_t)0, OS);
-  }
-
-  if (Reg == AMDGPU::ALU_LITERAL_X) {
-    unsigned ImmOpIndex = MI.getNumOperands() - 1;
-    MCOperand ImmOp = MI.getOperand(ImmOpIndex);
-    if (ImmOp.isFPImm()) {
-      InlineConstant.f = ImmOp.getFPImm();
-    } else {
-      assert(ImmOp.isImm());
-      InlineConstant.i = ImmOp.getImm();
+    if ((TextureType == TEXTURE_SHADOW1D ||
+        TextureType == TEXTURE_SHADOW2D ||
+        TextureType == TEXTURE_SHADOWRECT ||
+        TextureType == TEXTURE_SHADOW1D_ARRAY) &&
+        Opcode != AMDGPU::TEX_SAMPLE_C_L &&
+        Opcode != AMDGPU::TEX_SAMPLE_C_LB) {
+      SrcSelect[ELEMENT_W] = ELEMENT_Z;
     }
-  }
-
-  // Emit the literal value, if applicable (4 bytes).
-  Emit(InlineConstant.i, OS);
-}
-
-void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
-
-  // Emit instruction type
-  EmitByte(INSTR_FC, OS);
 
-  // Emit SRC
-  unsigned NumOperands = MI.getNumOperands();
-  if (NumOperands > 0) {
-    assert(NumOperands == 1);
-    EmitSrc(MI, 0, OS);
+    uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) |
+        CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 |
+        CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63;
+    uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 |
+        SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 |
+        SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
+        Offsets[2] << 10;
+
+    Emit(Word01, OS);
+    Emit(Word2, OS);
+    Emit((u_int32_t) 0, OS);
   } else {
-    EmitNullBytes(SRC_BYTE_COUNT, OS);
-  }
-
-  // Emit FC Instruction
-  enum FCInstr instr;
-  switch (MI.getOpcode()) {
-  case AMDGPU::PREDICATED_BREAK:
-    instr = FC_BREAK_PREDICATE;
-    break;
-  case AMDGPU::CONTINUE:
-    instr = FC_CONTINUE;
-    break;
-  case AMDGPU::IF_PREDICATE_SET:
-    instr = FC_IF_PREDICATE;
-    break;
-  case AMDGPU::ELSE:
-    instr = FC_ELSE;
-    break;
-  case AMDGPU::ENDIF:
-    instr = FC_ENDIF;
-    break;
-  case AMDGPU::ENDLOOP:
-    instr = FC_ENDLOOP;
-    break;
-  case AMDGPU::WHILELOOP:
-    instr = FC_BGNLOOP;
-    break;
-  default:
-    abort();
-    break;
-  }
-  EmitByte(instr, OS);
-}
-
-void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
-                                      raw_ostream &OS) const {
-
-  for (unsigned int i = 0; i < ByteCount; i++) {
-    EmitByte(0, OS);
+    uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
+    if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
+       ((Desc.TSFlags & R600_InstFlag::OP1) ||
+         Desc.TSFlags & R600_InstFlag::OP2)) {
+      uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
+      Inst &= ~(0x3FFULL << 39);
+      Inst |= ISAOpCode << 1;
+    }
+    Emit(Inst, OS);
   }
 }
 
@@ -488,12 +194,6 @@ void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
   OS.write((uint8_t) Byte & 0xff);
 }
 
-void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
-                                     raw_ostream &OS) const {
-  OS.write((uint8_t) (Bytes & 0xff));
-  OS.write((uint8_t) ((Bytes >> 8) & 0xff));
-}
-
 void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
   for (unsigned i = 0; i < 4; i++) {
     OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
@@ -531,55 +231,4 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
   }
 }
 
-//===----------------------------------------------------------------------===//
-// Encoding helper functions
-//===----------------------------------------------------------------------===//
-
-bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
-  switch(opcode) {
-  default: return false;
-  case AMDGPU::PREDICATED_BREAK:
-  case AMDGPU::CONTINUE:
-  case AMDGPU::IF_PREDICATE_SET:
-  case AMDGPU::ELSE:
-  case AMDGPU::ENDIF:
-  case AMDGPU::ENDLOOP:
-  case AMDGPU::WHILELOOP:
-    return true;
-  }
-}
-
-bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
-  switch(opcode) {
-  default: return false;
-  case AMDGPU::TEX_LD:
-  case AMDGPU::TEX_GET_TEXTURE_RESINFO:
-  case AMDGPU::TEX_SAMPLE:
-  case AMDGPU::TEX_SAMPLE_C:
-  case AMDGPU::TEX_SAMPLE_L:
-  case AMDGPU::TEX_SAMPLE_C_L:
-  case AMDGPU::TEX_SAMPLE_LB:
-  case AMDGPU::TEX_SAMPLE_C_LB:
-  case AMDGPU::TEX_SAMPLE_G:
-  case AMDGPU::TEX_SAMPLE_C_G:
-  case AMDGPU::TEX_GET_GRADIENTS_H:
-  case AMDGPU::TEX_GET_GRADIENTS_V:
-  case AMDGPU::TEX_SET_GRADIENTS_H:
-  case AMDGPU::TEX_SET_GRADIENTS_V:
-    return true;
-  }
-}
-
-bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
-                                  unsigned Flag) const {
-  const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
-  unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
-  if (FlagIndex == 0) {
-    return false;
-  }
-  assert(MI.getOperand(FlagIndex).isImm());
-  return !!((MI.getOperand(FlagIndex).getImm() >>
-            (NUM_MO_FLAGS * Operand)) & Flag);
-}
-
 #include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index 868810c..0cbe919 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -1,4 +1,4 @@
-//===-- Processors.td - TODO: Add brief description -------===//
+//===-- Processors.td - R600 Processor definitions ------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,25 +6,43 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// AMDIL processors supported.
-//
-//===----------------------------------------------------------------------===//
 
 class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
 : Processor<Name, itin, Features>;
-def : Proc<"",           R600_EG_Itin, [FeatureR600ALUInst]>;
-def : Proc<"r600",       R600_EG_Itin, [FeatureR600ALUInst]>;
-def : Proc<"rv710",      R600_EG_Itin, []>;
-def : Proc<"rv730",      R600_EG_Itin, []>;
-def : Proc<"rv770",      R600_EG_Itin, [FeatureFP64]>;
-def : Proc<"cedar",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"redwood",    R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"juniper",    R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"cypress",    R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-def : Proc<"barts",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"turks",      R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"caicos",     R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
-def : Proc<"cayman",     R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
-def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
-
+def : Proc<"",           R600_VLIW5_Itin,
+    [FeatureR600ALUInst, FeatureVertexCache]>;
+def : Proc<"r600",       R600_VLIW5_Itin,
+    [FeatureR600ALUInst , FeatureVertexCache]>;
+def : Proc<"rs880",      R600_VLIW5_Itin,
+    [FeatureR600ALUInst]>;
+def : Proc<"rv670",      R600_VLIW5_Itin,
+    [FeatureR600ALUInst, FeatureFP64, FeatureVertexCache]>;
+def : Proc<"rv710",      R600_VLIW5_Itin,
+    [FeatureVertexCache]>;
+def : Proc<"rv730",      R600_VLIW5_Itin,
+    [FeatureVertexCache]>;
+def : Proc<"rv770",      R600_VLIW5_Itin,
+    [FeatureFP64, FeatureVertexCache]>;
+def : Proc<"cedar",      R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"redwood",    R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"sumo",       R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages]>;
+def : Proc<"juniper",    R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"cypress",    R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureFP64, FeatureVertexCache]>;
+def : Proc<"barts",      R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"turks",      R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureVertexCache]>;
+def : Proc<"caicos",     R600_VLIW5_Itin,
+    [FeatureByteAddress, FeatureImages]>;
+def : Proc<"cayman",     R600_VLIW4_Itin,
+    [FeatureByteAddress, FeatureImages, FeatureFP64]>;def : Proc<"SI",         SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"tahiti",     SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"pitcairn",   SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"verde",      SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"oland",      SI_Itin, [Feature64BitPtr, FeatureFP64]>;
+def : Proc<"hainan",     SI_Itin, [Feature64BitPtr, FeatureFP64]>;
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index 3a6c7ea..ffe3414 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -30,35 +30,27 @@ namespace llvm {
 class R600ControlFlowFinalizer : public MachineFunctionPass {
 
 private:
+  typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
+
+  enum ControlFlowInstruction {
+    CF_TC,
+    CF_VC,
+    CF_CALL_FS,
+    CF_WHILE_LOOP,
+    CF_END_LOOP,
+    CF_LOOP_BREAK,
+    CF_LOOP_CONTINUE,
+    CF_JUMP,
+    CF_ELSE,
+    CF_POP,
+    CF_END
+  };
+
   static char ID;
   const R600InstrInfo *TII;
+  const R600RegisterInfo &TRI;
   unsigned MaxFetchInst;
-
-  bool isFetch(const MachineInstr *MI) const {
-    switch (MI->getOpcode()) {
-    case AMDGPU::TEX_VTX_CONSTBUF:
-    case AMDGPU::TEX_VTX_TEXBUF:
-    case AMDGPU::TEX_LD:
-    case AMDGPU::TEX_GET_TEXTURE_RESINFO:
-    case AMDGPU::TEX_GET_GRADIENTS_H:
-    case AMDGPU::TEX_GET_GRADIENTS_V:
-    case AMDGPU::TEX_SET_GRADIENTS_H:
-    case AMDGPU::TEX_SET_GRADIENTS_V:
-    case AMDGPU::TEX_SAMPLE:
-    case AMDGPU::TEX_SAMPLE_C:
-    case AMDGPU::TEX_SAMPLE_L:
-    case AMDGPU::TEX_SAMPLE_C_L:
-    case AMDGPU::TEX_SAMPLE_LB:
-    case AMDGPU::TEX_SAMPLE_C_LB:
-    case AMDGPU::TEX_SAMPLE_G:
-    case AMDGPU::TEX_SAMPLE_C_G:
-    case AMDGPU::TXD:
-    case AMDGPU::TXD_SHADOW:
-     return true;
-    default:
-      return false;
-    }
-  }
+  const AMDGPUSubtarget &ST;
 
   bool IsTrivialInst(MachineInstr *MI) const {
     switch (MI->getOpcode()) {
@@ -70,26 +62,226 @@ private:
     }
   }
 
-  MachineBasicBlock::iterator
-  MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-      unsigned CfAddress) const {
+  const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
+    unsigned Opcode = 0;
+    bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX);
+    switch (CFI) {
+    case CF_TC:
+      Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
+      break;
+    case CF_VC:
+      Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
+      break;
+    case CF_CALL_FS:
+      Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
+      break;
+    case CF_WHILE_LOOP:
+      Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
+      break;
+    case CF_END_LOOP:
+      Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
+      break;
+    case CF_LOOP_BREAK:
+      Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
+      break;
+    case CF_LOOP_CONTINUE:
+      Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
+      break;
+    case CF_JUMP:
+      Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
+      break;
+    case CF_ELSE:
+      Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
+      break;
+    case CF_POP:
+      Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
+      break;
+    case CF_END:
+      if (ST.device()->getDeviceFlag() == OCL_DEVICE_CAYMAN) {
+        Opcode = AMDGPU::CF_END_CM;
+        break;
+      }
+      Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
+      break;
+    }
+    assert (Opcode && "No opcode selected");
+    return TII->get(Opcode);
+  }
+
+  bool isCompatibleWithClause(const MachineInstr *MI,
+  std::set<unsigned> &DstRegs, std::set<unsigned> &SrcRegs) const {
+    unsigned DstMI, SrcMI;
+    for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+        E = MI->operands_end(); I != E; ++I) {
+      const MachineOperand &MO = *I;
+      if (!MO.isReg())
+        continue;
+      if (MO.isDef())
+        DstMI = MO.getReg();
+      if (MO.isUse()) {
+        unsigned Reg = MO.getReg();
+        if (AMDGPU::R600_Reg128RegClass.contains(Reg))
+          SrcMI = Reg;
+        else
+          SrcMI = TRI.getMatchingSuperReg(Reg,
+              TRI.getSubRegFromChannel(TRI.getHWRegChan(Reg)),
+              &AMDGPU::R600_Reg128RegClass);
+      }
+    }
+    if ((DstRegs.find(SrcMI) == DstRegs.end()) &&
+        (SrcRegs.find(DstMI) == SrcRegs.end())) {
+      SrcRegs.insert(SrcMI);
+      DstRegs.insert(DstMI);
+      return true;
+    } else
+      return false;
+  }
+
+  ClauseFile
+  MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
+      const {
     MachineBasicBlock::iterator ClauseHead = I;
+    std::vector<MachineInstr *> ClauseContent;
     unsigned AluInstCount = 0;
+    bool IsTex = TII->usesTextureCache(ClauseHead);
+    std::set<unsigned> DstRegs, SrcRegs;
     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
       if (IsTrivialInst(I))
         continue;
-      if (!isFetch(I))
+      if (AluInstCount > MaxFetchInst)
+        break;
+      if ((IsTex && !TII->usesTextureCache(I)) ||
+          (!IsTex && !TII->usesVertexCache(I)))
+        break;
+      if (!isCompatibleWithClause(I, DstRegs, SrcRegs))
         break;
       AluInstCount ++;
-      if (AluInstCount > MaxFetchInst)
+      ClauseContent.push_back(I);
+    }
+    MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
+        getHWInstrDesc(IsTex?CF_TC:CF_VC))
+        .addImm(0) // ADDR
+        .addImm(AluInstCount - 1); // COUNT
+    return ClauseFile(MIb, ClauseContent);
+  }
+
+  void getLiteral(MachineInstr *MI, std::vector<int64_t> &Lits) const {
+    unsigned LiteralRegs[] = {
+      AMDGPU::ALU_LITERAL_X,
+      AMDGPU::ALU_LITERAL_Y,
+      AMDGPU::ALU_LITERAL_Z,
+      AMDGPU::ALU_LITERAL_W
+    };
+    for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      if (MO.getReg() != AMDGPU::ALU_LITERAL_X)
+        continue;
+      unsigned ImmIdx = TII->getOperandIdx(MI->getOpcode(), R600Operands::IMM);
+      int64_t Imm = MI->getOperand(ImmIdx).getImm();
+      std::vector<int64_t>::iterator It =
+          std::find(Lits.begin(), Lits.end(), Imm);
+      if (It != Lits.end()) {
+        unsigned Index = It - Lits.begin();
+        MO.setReg(LiteralRegs[Index]);
+      } else {
+        assert(Lits.size() < 4 && "Too many literals in Instruction Group");
+        MO.setReg(LiteralRegs[Lits.size()]);
+        Lits.push_back(Imm);
+      }
+    }
+  }
+
+  MachineBasicBlock::iterator insertLiterals(
+      MachineBasicBlock::iterator InsertPos,
+      const std::vector<unsigned> &Literals) const {
+    MachineBasicBlock *MBB = InsertPos->getParent();
+    for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
+      unsigned LiteralPair0 = Literals[i];
+      unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
+      InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
+          TII->get(AMDGPU::LITERALS))
+          .addImm(LiteralPair0)
+          .addImm(LiteralPair1);
+    }
+    return InsertPos;
+  }
+
+  ClauseFile
+  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
+      const {
+    MachineBasicBlock::iterator ClauseHead = I;
+    std::vector<MachineInstr *> ClauseContent;
+    I++;
+    for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
+      if (IsTrivialInst(I)) {
+        ++I;
+        continue;
+      }
+      if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
         break;
+      std::vector<int64_t> Literals;
+      if (I->isBundle()) {
+        MachineInstr *DeleteMI = I;
+        MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
+        while (++BI != E && BI->isBundledWithPred()) {
+          BI->unbundleFromPred();
+          for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
+            MachineOperand &MO = BI->getOperand(i);
+            if (MO.isReg() && MO.isInternalRead())
+              MO.setIsInternalRead(false);
+          }
+          getLiteral(BI, Literals);
+          ClauseContent.push_back(BI);
+        }
+        I = BI;
+        DeleteMI->eraseFromParent();
+      } else {
+        getLiteral(I, Literals);
+        ClauseContent.push_back(I);
+        I++;
+      }
+      for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
+        unsigned literal0 = Literals[i];
+        unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
+        MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
+            TII->get(AMDGPU::LITERALS))
+            .addImm(literal0)
+            .addImm(literal2);
+        ClauseContent.push_back(MILit);
+      }
     }
-    BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
-        TII->get(AMDGPU::CF_TC))
-        .addImm(CfAddress) // ADDR
-        .addImm(AluInstCount); // COUNT
-    return I;
+    ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
+    return ClauseFile(ClauseHead, ClauseContent);
   }
+
+  void
+  EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
+      unsigned &CfCount) {
+    CounterPropagateAddr(Clause.first, CfCount);
+    MachineBasicBlock *BB = Clause.first->getParent();
+    BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
+        .addImm(CfCount);
+    for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
+      BB->splice(InsertPos, BB, Clause.second[i]);
+    }
+    CfCount += 2 * Clause.second.size();
+  }
+
+  void
+  EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
+      unsigned &CfCount) {
+    CounterPropagateAddr(Clause.first, CfCount);
+    MachineBasicBlock *BB = Clause.first->getParent();
+    BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
+        .addImm(CfCount);
+    for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
+      BB->splice(InsertPos, BB, Clause.second[i]);
+    }
+    CfCount += Clause.second.size();
+  }
+
   void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
     MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
   }
@@ -102,9 +294,27 @@ private:
     }
   }
 
+  unsigned getHWStackSize(unsigned StackSubEntry, bool hasPush) const {
+    switch (ST.device()->getGeneration()) {
+    case AMDGPUDeviceInfo::HD4XXX:
+      if (hasPush)
+        StackSubEntry += 2;
+      break;
+    case AMDGPUDeviceInfo::HD5XXX:
+      if (hasPush)
+        StackSubEntry ++;
+    case AMDGPUDeviceInfo::HD6XXX:
+      StackSubEntry += 2;
+      break;
+    }
+    return (StackSubEntry + 3)/4; // Need ceil value of StackSubEntry/4
+  }
+
 public:
   R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
-    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) {
+    TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
+    TRI(TII->getRegisterInfo()),
+    ST(tm.getSubtarget<AMDGPUSubtarget>()) {
       const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
       if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX)
         MaxFetchInst = 8;
@@ -115,6 +325,7 @@ public:
   virtual bool runOnMachineFunction(MachineFunction &MF) {
     unsigned MaxStack = 0;
     unsigned CurrentStack = 0;
+    bool HasPush = false;
     for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
         ++MB) {
       MachineBasicBlock &MBB = *MB;
@@ -124,14 +335,16 @@ public:
       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
       if (MFI->ShaderType == 1) {
         BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
-            TII->get(AMDGPU::CF_CALL_FS));
+            getHWInstrDesc(CF_CALL_FS));
         CfCount++;
+        MaxStack = 1;
       }
+      std::vector<ClauseFile> FetchClauses, AluClauses;
       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
           I != E;) {
-        if (isFetch(I)) {
+        if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
           DEBUG(dbgs() << CfCount << ":"; I->dump(););
-          I = MakeFetchClause(MBB, I, 0);
+          FetchClauses.push_back(MakeFetchClause(MBB, I));
           CfCount++;
           continue;
         }
@@ -142,20 +355,25 @@ public:
         case AMDGPU::CF_ALU_PUSH_BEFORE:
           CurrentStack++;
           MaxStack = std::max(MaxStack, CurrentStack);
+          HasPush = true;
         case AMDGPU::CF_ALU:
+          I = MI;
+          AluClauses.push_back(MakeALUClause(MBB, I));
         case AMDGPU::EG_ExportBuf:
         case AMDGPU::EG_ExportSwz:
         case AMDGPU::R600_ExportBuf:
         case AMDGPU::R600_ExportSwz:
+        case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
+        case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
           DEBUG(dbgs() << CfCount << ":"; MI->dump(););
           CfCount++;
           break;
         case AMDGPU::WHILELOOP: {
-          CurrentStack++;
+          CurrentStack+=4;
           MaxStack = std::max(MaxStack, CurrentStack);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::WHILE_LOOP))
-              .addImm(2);
+              getHWInstrDesc(CF_WHILE_LOOP))
+              .addImm(1);
           std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
               std::set<MachineInstr *>());
           Pair.second.insert(MIb);
@@ -165,12 +383,12 @@ public:
           break;
         }
         case AMDGPU::ENDLOOP: {
-          CurrentStack--;
+          CurrentStack-=4;
           std::pair<unsigned, std::set<MachineInstr *> > Pair =
               LoopStack.back();
           LoopStack.pop_back();
           CounterPropagateAddr(Pair.second, CfCount);
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP))
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
               .addImm(Pair.first + 1);
           MI->eraseFromParent();
           CfCount++;
@@ -178,7 +396,7 @@ public:
         }
         case AMDGPU::IF_PREDICATE_SET: {
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::CF_JUMP))
+              getHWInstrDesc(CF_JUMP))
               .addImm(0)
               .addImm(0);
           IfThenElseStack.push_back(MIb);
@@ -192,7 +410,7 @@ public:
           IfThenElseStack.pop_back();
           CounterPropagateAddr(JumpInst, CfCount);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::CF_ELSE))
+              getHWInstrDesc(CF_ELSE))
               .addImm(0)
               .addImm(1);
           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
@@ -207,9 +425,10 @@ public:
           IfThenElseStack.pop_back();
           CounterPropagateAddr(IfOrElseInst, CfCount + 1);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::POP))
+              getHWInstrDesc(CF_POP))
               .addImm(CfCount + 1)
               .addImm(1);
+          (void)MIb;
           DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
           MI->eraseFromParent();
           CfCount++;
@@ -218,13 +437,13 @@ public:
         case AMDGPU::PREDICATED_BREAK: {
           CurrentStack--;
           CfCount += 3;
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP))
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP))
               .addImm(CfCount)
               .addImm(1);
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::LOOP_BREAK))
+              getHWInstrDesc(CF_LOOP_BREAK))
               .addImm(0);
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP))
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_POP))
               .addImm(CfCount)
               .addImm(1);
           LoopStack.back().second.insert(MIb);
@@ -233,20 +452,31 @@ public:
         }
         case AMDGPU::CONTINUE: {
           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
-              TII->get(AMDGPU::CF_CONTINUE))
+              getHWInstrDesc(CF_LOOP_CONTINUE))
               .addImm(0);
           LoopStack.back().second.insert(MIb);
           MI->eraseFromParent();
           CfCount++;
           break;
         }
+        case AMDGPU::RETURN: {
+          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
+          CfCount++;
+          MI->eraseFromParent();
+          if (CfCount % 2) {
+            BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
+            CfCount++;
+          }
+          for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
+            EmitFetchClause(I, FetchClauses[i], CfCount);
+          for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
+            EmitALUClause(I, AluClauses[i], CfCount);
+        }
         default:
           break;
         }
       }
-      BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
-          TII->get(AMDGPU::STACK_SIZE))
-          .addImm(MaxStack);
+      MFI->StackSize = getHWStackSize(MaxStack, HasPush);
     }
 
     return false;
@@ -265,4 +495,3 @@ char R600ControlFlowFinalizer::ID = 0;
 llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
   return new R600ControlFlowFinalizer(TM);
 }
-
diff --git a/lib/Target/R600/R600Defines.h b/lib/Target/R600/R600Defines.h
index 16cfcf5..36bfb18 100644
--- a/lib/Target/R600/R600Defines.h
+++ b/lib/Target/R600/R600Defines.h
@@ -39,7 +39,9 @@ namespace R600_InstFlag {
     //FlagOperand bits 7, 8
     NATIVE_OPERANDS = (1 << 9),
     OP1 = (1 << 10),
-    OP2 = (1 << 11)
+    OP2 = (1 << 11),
+    VTX_INST  = (1 << 12),
+    TEX_INST = (1 << 13)
   };
 }
 
@@ -52,6 +54,9 @@ namespace R600_InstFlag {
 #define GET_REG_CHAN(reg) ((reg) >> HW_CHAN_SHIFT)
 #define GET_REG_INDEX(reg) ((reg) & HW_REG_MASK)
 
+#define IS_VTX(desc) ((desc).TSFlags & R600_InstFlag::VTX_INST)
+#define IS_TEX(desc) ((desc).TSFlags & R600_InstFlag::TEX_INST)
+
 namespace R600Operands {
   enum Ops {
     DST,
@@ -78,6 +83,7 @@ namespace R600Operands {
     LAST,
     PRED_SEL,
     IMM,
+    BANK_SWIZZLE,
     COUNT
  };
 
@@ -85,13 +91,39 @@ namespace R600Operands {
 //            W        C     S  S  S  S     S  S  S  S     S  S  S
 //            R  O  D  L  S  R  R  R  R  S  R  R  R  R  S  R  R  R  L  P
 //   D  U     I  M  R  A  R  C  C  C  C  R  C  C  C  C  R  C  C  C  A  R  I
-//   S  E  U  T  O  E  M  C  0  0  0  0  C  1  1  1  1  C  2  2  2  S  E  M
-//   T  M  P  E  D  L  P  0  N  R  A  S  1  N  R  A  S  2  N  R  S  T  D  M
-    {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12},
-    {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19},
-    {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17}
+//   S  E  U  T  O  E  M  C  0  0  0  0  C  1  1  1  1  C  2  2  2  S  E  M  B
+//   T  M  P  E  D  L  P  0  N  R  A  S  1  N  R  A  S  2  N  R  S  T  D  M  S
+    {0,-1,-1, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,-1,-1,-1,10,11,12,13},
+    {0, 1, 2, 3, 4 ,5 ,6 ,7, 8, 9,10,11,12,13,14,15,16,-1,-1,-1,-1,17,18,19,20},
+    {0,-1,-1,-1,-1, 1, 2, 3, 4, 5,-1, 6, 7, 8, 9,-1,10,11,12,13,14,15,16,17,18}
   };
 
 }
 
+//===----------------------------------------------------------------------===//
+// Config register definitions
+//===----------------------------------------------------------------------===//
+
+#define R_02880C_DB_SHADER_CONTROL                    0x02880C
+#define   S_02880C_KILL_ENABLE(x)                      (((x) & 0x1) << 6)
+
+// These fields are the same for all shader types and families.
+#define   S_NUM_GPRS(x)                         (((x) & 0xFF) << 0)
+#define   S_STACK_SIZE(x)                       (((x) & 0xFF) << 8)
+//===----------------------------------------------------------------------===//
+// R600, R700 Registers
+//===----------------------------------------------------------------------===//
+
+#define R_028850_SQ_PGM_RESOURCES_PS                 0x028850
+#define R_028868_SQ_PGM_RESOURCES_VS                 0x028868
+
+//===----------------------------------------------------------------------===//
+// Evergreen, Northern Islands Registers
+//===----------------------------------------------------------------------===//
+
+#define R_028844_SQ_PGM_RESOURCES_PS                 0x028844
+#define R_028860_SQ_PGM_RESOURCES_VS                 0x028860
+#define R_028878_SQ_PGM_RESOURCES_GS                 0x028878
+#define R_0288D4_SQ_PGM_RESOURCES_LS                 0x0288d4
+
 #endif // R600DEFINES_H_
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 53e6e51..7252235 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -43,11 +43,25 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::AND,  MVT::v4i32, Expand);
   setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand);
   setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand);
+  setOperationAction(ISD::MUL,  MVT::v2i32, Expand);
+  setOperationAction(ISD::MUL,  MVT::v4i32, Expand);
+  setOperationAction(ISD::OR, MVT::v4i32, Expand);
+  setOperationAction(ISD::OR, MVT::v2i32, Expand);
   setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand);
+  setOperationAction(ISD::SHL, MVT::v4i32, Expand);
+  setOperationAction(ISD::SHL, MVT::v2i32, Expand);
+  setOperationAction(ISD::SRL, MVT::v4i32, Expand);
+  setOperationAction(ISD::SRL, MVT::v2i32, Expand);
+  setOperationAction(ISD::SRA, MVT::v4i32, Expand);
+  setOperationAction(ISD::SRA, MVT::v2i32, Expand);
+  setOperationAction(ISD::SUB, MVT::v4i32, Expand);
+  setOperationAction(ISD::SUB, MVT::v2i32, Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand);
   setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
   setOperationAction(ISD::UREM, MVT::v4i32, Expand);
   setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
+  setOperationAction(ISD::XOR, MVT::v4i32, Expand);
+  setOperationAction(ISD::XOR, MVT::v2i32, Expand);
 
   setOperationAction(ISD::BR_CC, MVT::i32, Expand);
   setOperationAction(ISD::BR_CC, MVT::f32, Expand);
@@ -70,6 +84,9 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::SELECT, MVT::i32, Custom);
   setOperationAction(ISD::SELECT, MVT::f32, Custom);
 
+  setOperationAction(ISD::VSELECT, MVT::v4i32, Expand);
+  setOperationAction(ISD::VSELECT, MVT::v2i32, Expand);
+
   // Legalize loads and stores to the private address space.
   setOperationAction(ISD::LOAD, MVT::i32, Custom);
   setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
@@ -93,6 +110,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
   setTargetDAGCombine(ISD::SELECT_CC);
 
   setBooleanContents(ZeroOrNegativeOneBooleanContent);
+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
   setSchedulingPreference(Sched::VLIW);
 }
 
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index b232188..37150c4 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "R600InstrInfo.h"
+#include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "AMDGPUTargetMachine.h"
 #include "R600Defines.h"
@@ -29,7 +30,8 @@ using namespace llvm;
 
 R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
   : AMDGPUInstrInfo(tm),
-    RI(tm, *this)
+    RI(tm, *this),
+    ST(tm.getSubtarget<AMDGPUSubtarget>())
   { }
 
 const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
@@ -139,6 +141,33 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
           (TargetFlags & R600_InstFlag::OP3));
 }
 
+bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
+  return (get(Opcode).TSFlags & R600_InstFlag::TRANS_ONLY);
+}
+
+bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const {
+  return isTransOnly(MI->getOpcode());
+}
+
+bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
+  return ST.hasVertexCache() && IS_VTX(get(Opcode));
+}
+
+bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const {
+  const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
+  return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode());
+}
+
+bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
+  return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode));
+}
+
+bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const {
+  const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>();
+  return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) ||
+         usesTextureCache(MI->getOpcode());
+}
+
 bool
 R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
     const {
@@ -183,10 +212,19 @@ R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
       int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
       if (SrcIdx < 0)
         break;
-      if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+      unsigned Reg = MI->getOperand(SrcIdx).getReg();
+      if (Reg == AMDGPU::ALU_CONST) {
         unsigned Const = MI->getOperand(
             getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
         Consts.push_back(Const);
+        continue;
+      }
+      if (AMDGPU::R600_KC0RegClass.contains(Reg) ||
+          AMDGPU::R600_KC1RegClass.contains(Reg)) {
+        unsigned Index = RI.getEncodingValue(Reg) & 0xff;
+        unsigned Chan = RI.getHWRegChan(Reg);
+        Consts.push_back((Index << 2) | Chan);
+        continue;
       }
     }
   }
@@ -684,7 +722,8 @@ MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MB
   //scheduling to the backend, we can change the default to 0.
   MIB.addImm(1)        // $last
       .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
-      .addImm(0);        // $literal
+      .addImm(0)         // $literal
+      .addImm(0);        // $bank_swizzle
 
   return MIB;
 }
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index dbae900..babe4b8 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -33,6 +33,7 @@ namespace llvm {
   class R600InstrInfo : public AMDGPUInstrInfo {
   private:
   const R600RegisterInfo RI;
+  const AMDGPUSubtarget &ST;
 
   int getBranchInstr(const MachineOperand &op) const;
 
@@ -53,6 +54,14 @@ namespace llvm {
   /// \returns true if this \p Opcode represents an ALU instruction.
   bool isALUInstr(unsigned Opcode) const;
 
+  bool isTransOnly(unsigned Opcode) const;
+  bool isTransOnly(const MachineInstr *MI) const;
+
+  bool usesVertexCache(unsigned Opcode) const;
+  bool usesVertexCache(const MachineInstr *MI) const;
+  bool usesTextureCache(unsigned Opcode) const;
+  bool usesTextureCache(const MachineInstr *MI) const;
+
   bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
   bool canBundle(const std::vector<MachineInstr *> &) const;
 
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 663b41a..8f47523 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -13,11 +13,12 @@
 
 include "R600Intrinsics.td"
 
-class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
+class InstR600 <dag outs, dag ins, string asm, list<dag> pattern,
                 InstrItinClass itin>
     : AMDGPUInst <outs, ins, asm, pattern> {
 
   field bits<64> Inst;
+  bit TransOnly = 0;
   bit Trig = 0;
   bit Op3 = 0;
   bit isVector = 0;
@@ -25,9 +26,9 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
   bit Op1 = 0;
   bit Op2 = 0;
   bit HasNativeOperands = 0;
+  bit VTXInst = 0;
+  bit TEXInst = 0;
 
-  bits<11> op_code = inst;
-  //let Inst = inst;
   let Namespace = "AMDGPU";
   let OutOperandList = outs;
   let InOperandList = ins;
@@ -35,6 +36,7 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
   let Pattern = pattern;
   let Itinerary = itin;
 
+  let TSFlags{0} = TransOnly;
   let TSFlags{4} = Trig;
   let TSFlags{5} = Op3;
 
@@ -45,11 +47,12 @@ class InstR600 <bits<11> inst, dag outs, dag ins, string asm, list<dag> pattern,
   let TSFlags{9} = HasNativeOperands;
   let TSFlags{10} = Op1;
   let TSFlags{11} = Op2;
+  let TSFlags{12} = VTXInst;
+  let TSFlags{13} = TEXInst;
 }
 
 class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern> :
-    AMDGPUInst <outs, ins, asm, pattern> {
-  field bits<64> Inst;
+    InstR600 <outs, ins, asm, pattern, NullALU> {
 
   let Namespace = "AMDGPU";
 }
@@ -74,6 +77,9 @@ class InstFlag<string PM = "printOperand", int Default = 0>
 def SEL : OperandWithDefaultOps <i32, (ops (i32 -1))> {
   let PrintMethod = "printSel";
 }
+def BANK_SWIZZLE : OperandWithDefaultOps <i32, (ops (i32 0))> {
+  let PrintMethod = "printBankSwizzle";
+}
 
 def LITERAL : InstFlag<"printLiteral">;
 
@@ -137,7 +143,7 @@ class R600ALU_Word1 {
   field bits<32> Word1;
 
   bits<11> dst;
-  bits<3>  bank_swizzle = 0;
+  bits<3>  bank_swizzle;
   bits<1>  dst_rel;
   bits<1>  clamp;
 
@@ -346,15 +352,15 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
 // and R600InstrInfo::getOperandIdx().
 class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
-    InstR600 <0,
-              (outs R600_Reg32:$dst),
+    InstR600 <(outs R600_Reg32:$dst),
               (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
                    R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
-                   LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+                   LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
+                   BANK_SWIZZLE:$bank_swizzle),
               !strconcat("  ", opName,
-                   "$clamp $dst$write$dst_rel$omod, "
+                   "$last$clamp $dst$write$dst_rel$omod, "
                    "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
-                   "$literal $pred_sel$last"),
+                   "$pred_sel $bank_swizzle"),
               pattern,
               itin>,
     R600ALU_Word0,
@@ -385,18 +391,18 @@ class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
 // R600InstrInfo::buildDefaultInstruction(), and R600InstrInfo::getOperandIdx().
 class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
-  InstR600 <inst,
-          (outs R600_Reg32:$dst),
+  InstR600 <(outs R600_Reg32:$dst),
           (ins UEM:$update_exec_mask, UP:$update_pred, WRITE:$write,
                OMOD:$omod, REL:$dst_rel, CLAMP:$clamp,
                R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel,
                R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel,
-               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
+               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
+               BANK_SWIZZLE:$bank_swizzle),
           !strconcat("  ", opName,
-                "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
+                "$last$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, "
                 "$src0_neg$src0_abs$src0$src0_abs$src0_rel, "
                 "$src1_neg$src1_abs$src1$src1_abs$src1_rel, "
-                "$literal $pred_sel$last"),
+                "$pred_sel $bank_swizzle"),
           pattern,
           itin>,
     R600ALU_Word0,
@@ -423,18 +429,19 @@ class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
 // R600InstrInfo::getOperandIdx().
 class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
-  InstR600 <0,
-          (outs R600_Reg32:$dst),
+  InstR600 <(outs R600_Reg32:$dst),
           (ins REL:$dst_rel, CLAMP:$clamp,
                R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, SEL:$src0_sel,
                R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel,
                R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel,
-               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal),
-          !strconcat("  ", opName, "$clamp $dst$dst_rel, "
+               LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal,
+               BANK_SWIZZLE:$bank_swizzle),
+          !strconcat("  ", opName, "$last$clamp $dst$dst_rel, "
                              "$src0_neg$src0$src0_rel, "
                              "$src1_neg$src1$src1_rel, "
                              "$src2_neg$src2$src2_rel, "
-                             "$literal $pred_sel$last"),
+                             "$pred_sel"
+                             "$bank_swizzle"),
           pattern,
           itin>,
     R600ALU_Word0,
@@ -450,8 +457,7 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
 
 class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
                       InstrItinClass itin = VecALU> :
-  InstR600 <inst,
-          (outs R600_Reg32:$dst),
+  InstR600 <(outs R600_Reg32:$dst),
           ins,
           asm,
           pattern,
@@ -459,8 +465,7 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
 
 class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
                 InstrItinClass itin = AnyALU> :
-  InstR600 <inst,
-          (outs R600_Reg128:$DST_GPR),
+  InstR600 <(outs R600_Reg128:$DST_GPR),
           (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget),
           !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"),
           pattern,
@@ -481,11 +486,14 @@ class R600_TEX <bits<11> inst, string opName, list<dag> pattern,
     let FETCH_WHOLE_QUAD = 0;
     let ALT_CONST = 0;
     let SAMPLER_INDEX_MODE = 0;
+    let RESOURCE_INDEX_MODE = 0;
 
     let COORD_TYPE_X = 0;
     let COORD_TYPE_Y = 0;
     let COORD_TYPE_Z = 0;
     let COORD_TYPE_W = 0;
+
+    let TEXInst = 1;
   }
 
 } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
@@ -738,7 +746,9 @@ multiclass SteamOutputExportPattern<Instruction ExportInst,
       4095, imm:$mask, buf3inst, 0)>;
 }
 
-let usesCustomInserter = 1 in {
+// Export Instructions should not be duplicated by TailDuplication pass
+// (which assumes that duplicable instruction are affected by exec mask)
+let usesCustomInserter = 1, isNotDuplicable = 1 in {
 
 class ExportSwzInst : InstR600ISA<(
     outs),
@@ -805,12 +815,15 @@ class CF_ALU_WORD1 {
   let Word1{31} = BARRIER;
 }
 
+def KCACHE : InstFlag<"printKCache">;
+
 class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs),
-(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, i32imm:$KCACHE_MODE0, i32imm:$KCACHE_MODE1,
-i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
+(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1,
+KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1,
+i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1,
+i32imm:$COUNT),
 !strconcat(OpName, " $COUNT, @$ADDR, "
-"KC0[CB$KCACHE_BANK0:$KCACHE_ADDR0-$KCACHE_ADDR0+32]"
-", KC1[CB$KCACHE_BANK1:$KCACHE_ADDR1-$KCACHE_ADDR1+32]"),
+"KC0[$KCACHE_MODE0], KC1[$KCACHE_MODE1]"),
 [] >, CF_ALU_WORD0, CF_ALU_WORD1 {
   field bits<64> Inst;
 
@@ -823,109 +836,139 @@ i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT),
   let Inst{63-32} = Word1;
 }
 
-class CF_WORD0 {
+class CF_WORD0_R600 {
   field bits<32> Word0;
 
-  bits<24> ADDR;
-  bits<3> JUMPTABLE_SEL;
+  bits<32> ADDR;
 
-  let Word0{23-0} = ADDR;
-  let Word0{26-24} = JUMPTABLE_SEL;
+  let Word0 = ADDR;
 }
 
-class CF_WORD1 {
+class CF_WORD1_R600 {
   field bits<32> Word1;
 
   bits<3> POP_COUNT;
   bits<5> CF_CONST;
   bits<2> COND;
-  bits<6> COUNT;
+  bits<3> COUNT;
+  bits<6> CALL_COUNT;
+  bits<1> COUNT_3;
+  bits<1> END_OF_PROGRAM;
   bits<1> VALID_PIXEL_MODE;
-  bits<8> CF_INST;
+  bits<7> CF_INST;
+  bits<1> WHOLE_QUAD_MODE;
   bits<1> BARRIER;
 
   let Word1{2-0} = POP_COUNT;
   let Word1{7-3} = CF_CONST;
   let Word1{9-8} = COND;
-  let Word1{15-10} = COUNT;
-  let Word1{20} = VALID_PIXEL_MODE;
-  let Word1{29-22} = CF_INST;
+  let Word1{12-10} = COUNT;
+  let Word1{18-13} = CALL_COUNT;
+  let Word1{19} = COUNT_3;
+  let Word1{21} = END_OF_PROGRAM;
+  let Word1{22} = VALID_PIXEL_MODE;
+  let Word1{29-23} = CF_INST;
+  let Word1{30} = WHOLE_QUAD_MODE;
   let Word1{31} = BARRIER;
 }
 
-class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
-ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 {
+class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
   field bits<64> Inst;
 
   let CF_INST = inst;
   let BARRIER = 1;
-  let JUMPTABLE_SEL = 0;
   let CF_CONST = 0;
   let VALID_PIXEL_MODE = 0;
   let COND = 0;
+  let CALL_COUNT = 0;
+  let COUNT_3 = 0;
+  let END_OF_PROGRAM = 0;
+  let WHOLE_QUAD_MODE = 0;
 
   let Inst{31-0} = Word0;
   let Inst{63-32} = Word1;
 }
 
-def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT),
-"TEX $COUNT @$ADDR"> {
-  let POP_COUNT = 0;
-}
-
-def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT),
-"VTX $COUNT @$ADDR"> {
-  let POP_COUNT = 0;
-}
+class CF_WORD0_EG {
+  field bits<32> Word0;
 
-def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> {
-  let POP_COUNT = 0;
-  let COUNT = 0;
-}
+  bits<24> ADDR;
+  bits<3> JUMPTABLE_SEL;
 
-def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
-  let POP_COUNT = 0;
-  let COUNT = 0;
+  let Word0{23-0} = ADDR;
+  let Word0{26-24} = JUMPTABLE_SEL;
 }
 
-def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> {
-  let POP_COUNT = 0;
-  let COUNT = 0;
-}
+class CF_WORD1_EG {
+  field bits<32> Word1;
 
-def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> {
-  let POP_COUNT = 0;
-  let COUNT = 0;
-}
+  bits<3> POP_COUNT;
+  bits<5> CF_CONST;
+  bits<2> COND;
+  bits<6> COUNT;
+  bits<1> VALID_PIXEL_MODE;
+  bits<1> END_OF_PROGRAM;
+  bits<8> CF_INST;
+  bits<1> BARRIER;
 
-def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> {
-  let COUNT = 0;
+  let Word1{2-0} = POP_COUNT;
+  let Word1{7-3} = CF_CONST;
+  let Word1{9-8} = COND;
+  let Word1{15-10} = COUNT;
+  let Word1{20} = VALID_PIXEL_MODE;
+  let Word1{21} = END_OF_PROGRAM;
+  let Word1{29-22} = CF_INST;
+  let Word1{31} = BARRIER;
 }
 
-def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> {
-  let COUNT = 0;
-}
+class CF_CLAUSE_EG <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
+ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
+  field bits<64> Inst;
 
-def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> {
-  let ADDR = 0;
-  let COUNT = 0;
-  let POP_COUNT = 0;
-}
+  let CF_INST = inst;
+  let BARRIER = 1;
+  let JUMPTABLE_SEL = 0;
+  let CF_CONST = 0;
+  let VALID_PIXEL_MODE = 0;
+  let COND = 0;
+  let END_OF_PROGRAM = 0;
 
-def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> {
-  let COUNT = 0;
+  let Inst{31-0} = Word0;
+  let Inst{63-32} = Word1;
 }
 
 def CF_ALU : ALU_CLAUSE<8, "ALU">;
 def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
 
-def STACK_SIZE : AMDGPUInst <(outs),
-(ins i32imm:$num), "nstack $num", [] > {
+def FETCH_CLAUSE : AMDGPUInst <(outs),
+(ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
   field bits<8> Inst;
   bits<8> num;
   let Inst = num;
 }
 
+def ALU_CLAUSE : AMDGPUInst <(outs),
+(ins i32imm:$addr), "ALU clause starting at $addr:", [] > {
+  field bits<8> Inst;
+  bits<8> num;
+  let Inst = num;
+}
+
+def LITERALS : AMDGPUInst <(outs),
+(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
+  field bits<64> Inst;
+  bits<32> literal1;
+  bits<32> literal2;
+
+  let Inst{31-0} = literal1;
+  let Inst{63-32} = literal2;
+}
+
+def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
+  field bits<64> Inst;
+}
+
 let Predicates = [isR600toCayman] in {
 
 //===----------------------------------------------------------------------===//
@@ -944,58 +987,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
 // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
 def SETE : R600_2OP <
   0x08, "SETE",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
-             COND_EQ))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))]
 >;
 
 def SGT : R600_2OP <
   0x09, "SETGT",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
-              COND_GT))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))]
 >;
 
 def SGE : R600_2OP <
   0xA, "SETGE",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
-              COND_GE))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))]
 >;
 
 def SNE : R600_2OP <
   0xB, "SETNE",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO,
-    COND_NE))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))]
 >;
 
 def SETE_DX10 : R600_2OP <
   0xC, "SETE_DX10",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
-    COND_EQ))]
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))]
 >;
 
 def SETGT_DX10 : R600_2OP <
   0xD, "SETGT_DX10",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
-    COND_GT))]
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))]
 >;
 
 def SETGE_DX10 : R600_2OP <
   0xE, "SETGE_DX10",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
-    COND_GE))]
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))]
 >;
 
 def SETNE_DX10 : R600_2OP <
   0xF, "SETNE_DX10",
-  [(set R600_Reg32:$dst,
-    (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, (i32 -1), (i32 0),
-     COND_NE))]
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))]
 >;
 
 def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
@@ -1053,38 +1080,32 @@ def MIN_UINT : R600_2OP_Helper <0x39, "MIN_UINT", AMDGPUumin>;
 
 def SETE_INT : R600_2OP <
   0x3A, "SETE_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))]
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETEQ))]
 >;
 
 def SETGT_INT : R600_2OP <
   0x3B, "SETGT_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))]
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGT))]
 >;
 
 def SETGE_INT : R600_2OP <
   0x3C, "SETGE_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))]
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETGE))]
 >;
 
 def SETNE_INT : R600_2OP <
   0x3D, "SETNE_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))]
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETNE))]
 >;
 
 def SETGT_UINT : R600_2OP <
   0x3E, "SETGT_UINT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))]
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGT))]
 >;
 
 def SETGE_UINT : R600_2OP <
   0x3F, "SETGE_UINT",
-  [(set (i32 R600_Reg32:$dst),
-    (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))]
+  [(set i32:$dst, (selectcc i32:$src0, i32:$src1, -1, 0, SETUGE))]
 >;
 
 def PRED_SETE_INT : R600_2OP <0x42, "PRED_SETE_INT", []>;
@@ -1094,26 +1115,17 @@ def PRED_SETNE_INT : R600_2OP <0x45, "PRED_SETNE_INT", []>;
 
 def CNDE_INT : R600_3OP <
   0x1C, "CNDE_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), 0,
-       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
-       COND_EQ))]
+  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_EQ))]
 >;
 
 def CNDGE_INT : R600_3OP <
   0x1E, "CNDGE_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), 0,
-       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
-       COND_GE))]
+  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GE))]
 >;
 
 def CNDGT_INT : R600_3OP <
   0x1D, "CNDGT_INT",
-  [(set (i32 R600_Reg32:$dst),
-   (selectcc (i32 R600_Reg32:$src0), 0,
-       (i32 R600_Reg32:$src1), (i32 R600_Reg32:$src2),
-       COND_GT))]
+  [(set i32:$dst, (selectcc i32:$src0, 0, i32:$src1, i32:$src2, COND_GT))]
 >;
 
 //===----------------------------------------------------------------------===//
@@ -1122,7 +1134,7 @@ def CNDGT_INT : R600_3OP <
 
 def TEX_LD : R600_TEX <
   0x03, "TEX_LD",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_txf v4f32:$SRC_GPR,
       imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID,
       imm:$SAMPLER_ID, imm:$textureTarget))]
 > {
@@ -1135,19 +1147,19 @@ let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X,
 
 def TEX_GET_TEXTURE_RESINFO : R600_TEX <
   0x04, "TEX_GET_TEXTURE_RESINFO",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_txq v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_GET_GRADIENTS_H : R600_TEX <
   0x07, "TEX_GET_GRADIENTS_H",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_ddx v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_GET_GRADIENTS_V : R600_TEX <
   0x08, "TEX_GET_GRADIENTS_V",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_ddy v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
@@ -1163,37 +1175,37 @@ def TEX_SET_GRADIENTS_V : R600_TEX <
 
 def TEX_SAMPLE : R600_TEX <
   0x10, "TEX_SAMPLE",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_SAMPLE_C : R600_TEX <
   0x18, "TEX_SAMPLE_C",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_tex v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
 >;
 
 def TEX_SAMPLE_L : R600_TEX <
   0x11, "TEX_SAMPLE_L",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_SAMPLE_C_L : R600_TEX <
   0x19, "TEX_SAMPLE_C_L",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_txl v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
 >;
 
 def TEX_SAMPLE_LB : R600_TEX <
   0x12, "TEX_SAMPLE_LB",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))]
 >;
 
 def TEX_SAMPLE_C_LB : R600_TEX <
   0x1A, "TEX_SAMPLE_C_LB",
-  [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR,
+  [(set v4f32:$DST_GPR, (int_AMDGPU_txb v4f32:$SRC_GPR,
       imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))]
 >;
 
@@ -1223,32 +1235,22 @@ class MULADD_Common <bits<5> inst> : R600_3OP <
 
 class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
   inst, "MULADD_IEEE",
-  [(set (f32 R600_Reg32:$dst),
-   (fadd (fmul R600_Reg32:$src0, R600_Reg32:$src1), R600_Reg32:$src2))]
+  [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
 >;
 
 class CNDE_Common <bits<5> inst> : R600_3OP <
   inst, "CNDE",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
-       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
-       COND_EQ))]
+  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))]
 >;
 
 class CNDGT_Common <bits<5> inst> : R600_3OP <
   inst, "CNDGT",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
-       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
-       COND_GT))]
+  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))]
 >;
 
 class CNDGE_Common <bits<5> inst> : R600_3OP <
   inst, "CNDGE",
-  [(set R600_Reg32:$dst,
-   (selectcc (f32 R600_Reg32:$src0), FP_ZERO,
-       (f32 R600_Reg32:$src1), (f32 R600_Reg32:$src2),
-       COND_GE))]
+  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
 >;
 
 multiclass DOT4_Common <bits<11> inst> {
@@ -1256,7 +1258,7 @@ multiclass DOT4_Common <bits<11> inst> {
   def _pseudo : R600_REDUCTION <inst,
     (ins R600_Reg128:$src0, R600_Reg128:$src1),
     "DOT4 $dst $src0, $src1",
-    [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))]
+    [(set f32:$dst, (int_AMDGPU_dp4 v4f32:$src0, v4f32:$src1))]
   >;
 
   def _real : R600_2OP <inst, "DOT4", []>;
@@ -1266,11 +1268,10 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
 multiclass CUBE_Common <bits<11> inst> {
 
   def _pseudo : InstR600 <
-    inst,
     (outs R600_Reg128:$dst),
     (ins R600_Reg128:$src),
     "CUBE $dst $src",
-    [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))],
+    [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src))],
     VecALU
   > {
     let isPseudo = 1;
@@ -1282,23 +1283,38 @@ multiclass CUBE_Common <bits<11> inst> {
 
 class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "EXP_IEEE", fexp2
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class FLT_TO_INT_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "FLT_TO_INT", fp_to_sint
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class INT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "INT_TO_FLT", sint_to_fp
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class FLT_TO_UINT_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "FLT_TO_UINT", fp_to_uint
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class UINT_TO_FLT_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "UINT_TO_FLT", uint_to_fp
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
   inst, "LOG_CLAMPED", []
@@ -1306,50 +1322,84 @@ class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
 
 class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "LOG_IEEE", flog2
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
 class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
 class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
 class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
   inst, "MULHI_INT", mulhs
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
   inst, "MULHI", mulhu
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
   inst, "MULLO_INT", mul
->;
-class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []>;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
+class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class RECIP_CLAMPED_Common <bits<11> inst> : R600_1OP <
   inst, "RECIP_CLAMPED", []
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class RECIP_IEEE_Common <bits<11> inst> : R600_1OP <
-  inst, "RECIP_IEEE", [(set R600_Reg32:$dst, (fdiv FP_ONE, R600_Reg32:$src0))]
->;
+  inst, "RECIP_IEEE", [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "RECIP_UINT", AMDGPUurecip
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
   inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
   inst, "RECIPSQRT_IEEE", []
->;
+> {
+  let TransOnly = 1;
+  let Itinerary = TransALU;
+}
 
 class SIN_Common <bits<11> inst> : R600_1OP <
   inst, "SIN", []>{
   let Trig = 1;
+  let TransOnly = 1;
+  let Itinerary = TransALU;
 }
 
 class COS_Common <bits<11> inst> : R600_1OP <
   inst, "COS", []> {
   let Trig = 1;
+  let TransOnly = 1;
+  let Itinerary = TransALU;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1358,19 +1408,20 @@ class COS_Common <bits<11> inst> : R600_1OP <
 
 multiclass DIV_Common <InstR600 recip_ieee> {
 def : Pat<
-  (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1),
-  (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+  (int_AMDGPU_div f32:$src0, f32:$src1),
+  (MUL_IEEE $src0, (recip_ieee $src1))
 >;
 
 def : Pat<
-  (fdiv R600_Reg32:$src0, R600_Reg32:$src1),
-  (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1))
+  (fdiv f32:$src0, f32:$src1),
+  (MUL_IEEE $src0, (recip_ieee $src1))
 >;
 }
 
-class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee> : Pat <
-  (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w),
-  (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x))
+class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ieee>
+  : Pat <
+  (int_TGSI_lit_z f32:$src_x, f32:$src_y, f32:$src_w),
+  (exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
 >;
 
 //===----------------------------------------------------------------------===//
@@ -1410,14 +1461,13 @@ let Predicates = [isR600] in {
   def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>;
 
   defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
-  def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL, R600_Reg32>;
+  def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
   def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
 
-  def : Pat<(fsqrt R600_Reg32:$src),
-    (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_r600 R600_Reg32:$src))>;
+  def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
 
   def R600_ExportSwz : ExportSwzInst {
-    let Word1{20-17} = 1; // BURST_COUNT
+    let Word1{20-17} = 0; // BURST_COUNT
     let Word1{21} = eop;
     let Word1{22} = 1; // VALID_PIXEL_MODE
     let Word1{30-23} = inst;
@@ -1426,25 +1476,77 @@ let Predicates = [isR600] in {
   defm : ExportPattern<R600_ExportSwz, 39>;
 
   def R600_ExportBuf : ExportBufInst {
-    let Word1{20-17} = 1; // BURST_COUNT
+    let Word1{20-17} = 0; // BURST_COUNT
     let Word1{21} = eop;
     let Word1{22} = 1; // VALID_PIXEL_MODE
     let Word1{30-23} = inst;
     let Word1{31} = 1; // BARRIER
   }
   defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
+
+  def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "TEX $COUNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "VTX $COUNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
+  "LOOP_START_DX10 @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
+  "LOOP_BREAK @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
+  "CONTINUE @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "JUMP @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "ELSE @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
+    let ADDR = 0;
+    let COUNT = 0;
+    let POP_COUNT = 0;
+  }
+  def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "POP @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
+    let COUNT = 0;
+    let POP_COUNT = 0;
+    let ADDR = 0;
+    let END_OF_PROGRAM = 1;
+  }
+
 }
 
 // Helper pattern for normalizing inputs to triginomic instructions for R700+
 // cards.
 class COS_PAT <InstR600 trig> : Pat<
-  (fcos R600_Reg32:$src),
-  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+  (fcos f32:$src),
+  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
 >;
 
 class SIN_PAT <InstR600 trig> : Pat<
-  (fsin R600_Reg32:$src),
-  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src))
+  (fsin f32:$src),
+  (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), $src))
 >;
 
 //===----------------------------------------------------------------------===//
@@ -1482,11 +1584,10 @@ def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
 def SIN_eg : SIN_Common<0x8D>;
 def COS_eg : COS_Common<0x8E>;
 
-def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL, R600_Reg32>;
+def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
 def : SIN_PAT <SIN_eg>;
 def : COS_PAT <COS_eg>;
-def : Pat<(fsqrt R600_Reg32:$src),
-  (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_eg R600_Reg32:$src))>;
+def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
 } // End Predicates = [isEG]
 
 //===----------------------------------------------------------------------===//
@@ -1510,15 +1611,17 @@ let Predicates = [isEGorCayman] in {
   // (16,8)           = (Input <<  8) >> 24  = (Input &  0xffffff)   >> 16
   // (24,8)           = (Input <<  0) >> 24  = (Input &  0xffffffff) >> 24
   def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
-    [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0,
-                                                      R600_Reg32:$src1,
-                                                      R600_Reg32:$src2))],
+    [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1,
+                                               i32:$src2))],
     VecALU
   >;
+  def : BFEPattern <BFE_UINT_eg>;
+
+  def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
+  defm : BFIPatterns <BFI_INT_eg>;
 
   def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
-    [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1,
-                                          R600_Reg32:$src2))],
+    [(set i32:$dst, (AMDGPUbitalign i32:$src0, i32:$src1, i32:$src2))],
     VecALU
   >;
 
@@ -1563,14 +1666,15 @@ let hasSideEffects = 1 in {
   // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes,
   // which do not need to be truncated since the fp values are 0.0f or 1.0f.
   // We should look into handling these cases separately.
-  def : Pat<(fp_to_sint R600_Reg32:$src0),
-    (FLT_TO_INT_eg (TRUNC R600_Reg32:$src0))>;
+  def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>;
+
+  def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>;
 
-  def : Pat<(fp_to_uint R600_Reg32:$src0),
-    (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src0))>;
+  // SHA-256 Patterns
+  def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
 
   def EG_ExportSwz : ExportSwzInst {
-    let Word1{19-16} = 1; // BURST_COUNT
+    let Word1{19-16} = 0; // BURST_COUNT
     let Word1{20} = 1; // VALID_PIXEL_MODE
     let Word1{21} = eop;
     let Word1{29-22} = inst;
@@ -1580,7 +1684,7 @@ let hasSideEffects = 1 in {
   defm : ExportPattern<EG_ExportSwz, 83>;
 
   def EG_ExportBuf : ExportBufInst {
-    let Word1{19-16} = 1; // BURST_COUNT
+    let Word1{19-16} = 0; // BURST_COUNT
     let Word1{20} = 1; // VALID_PIXEL_MODE
     let Word1{21} = eop;
     let Word1{29-22} = inst;
@@ -1589,6 +1693,57 @@ let hasSideEffects = 1 in {
   }
   defm : SteamOutputExportPattern<EG_ExportBuf, 0x40, 0x41, 0x42, 0x43>;
 
+  def CF_TC_EG : CF_CLAUSE_EG<1, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "TEX $COUNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def CF_VC_EG : CF_CLAUSE_EG<2, (ins i32imm:$ADDR, i32imm:$COUNT),
+  "VTX $COUNT @$ADDR"> {
+    let POP_COUNT = 0;
+  }
+  def WHILE_LOOP_EG : CF_CLAUSE_EG<6, (ins i32imm:$ADDR),
+  "LOOP_START_DX10 @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def END_LOOP_EG : CF_CLAUSE_EG<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def LOOP_BREAK_EG : CF_CLAUSE_EG<9, (ins i32imm:$ADDR),
+  "LOOP_BREAK @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def CF_CONTINUE_EG : CF_CLAUSE_EG<8, (ins i32imm:$ADDR),
+  "CONTINUE @$ADDR"> {
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
+  def CF_JUMP_EG : CF_CLAUSE_EG<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "JUMP @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_ELSE_EG : CF_CLAUSE_EG<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "ELSE @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_CALL_FS_EG : CF_CLAUSE_EG<19, (ins), "CALL_FS"> {
+    let ADDR = 0;
+    let COUNT = 0;
+    let POP_COUNT = 0;
+  }
+  def POP_EG : CF_CLAUSE_EG<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
+  "POP @$ADDR POP:$POP_COUNT"> {
+    let COUNT = 0;
+  }
+  def CF_END_EG :  CF_CLAUSE_EG<0, (ins), "CF_END"> {
+    let COUNT = 0;
+    let POP_COUNT = 0;
+    let ADDR = 0;
+    let END_OF_PROGRAM = 1;
+  }
+
 //===----------------------------------------------------------------------===//
 // Memory read/write instructions
 //===----------------------------------------------------------------------===//
@@ -1618,14 +1773,14 @@ class RAT_WRITE_CACHELESS_eg <dag ins, bits<4> comp_mask, string name,
 def RAT_WRITE_CACHELESS_32_eg : RAT_WRITE_CACHELESS_eg <
   (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
   0x1, "RAT_WRITE_CACHELESS_32_eg",
-  [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]
+  [(global_store i32:$rw_gpr, i32:$index_gpr)]
 >;
 
 //128-bit store
 def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
   (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr, InstFlag:$eop),
   0xf, "RAT_WRITE_CACHELESS_128",
-  [(global_store (v4i32 R600_Reg128:$rw_gpr), R600_TReg32_X:$index_gpr)]
+  [(global_store v4i32:$rw_gpr, i32:$index_gpr)]
 >;
 
 class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
@@ -1679,6 +1834,8 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
   // VTX_WORD3 (Padding)
   //
   // Inst{127-96} = 0;
+
+  let VTXInst = 1;
 }
 
 class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
@@ -1748,19 +1905,19 @@ class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
 //===----------------------------------------------------------------------===//
 
 def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
-  [(set (i32 R600_TReg32_X:$dst), (load_param_zexti8 ADDRVTX_READ:$ptr))]
+  [(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))]
 >;
 
 def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
-  [(set (i32 R600_TReg32_X:$dst), (load_param_zexti16 ADDRVTX_READ:$ptr))]
+  [(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))]
 >;
 
 def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
-  [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))]
+  [(set i32:$dst, (load_param ADDRVTX_READ:$ptr))]
 >;
 
 def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
-  [(set (v4i32 R600_Reg128:$dst), (load_param ADDRVTX_READ:$ptr))]
+  [(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))]
 >;
 
 //===----------------------------------------------------------------------===//
@@ -1769,17 +1926,17 @@ def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
 
 // 8-bit reads
 def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
-  [(set (i32 R600_TReg32_X:$dst), (zextloadi8_global ADDRVTX_READ:$ptr))]
+  [(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))]
 >;
 
 // 32-bit reads
 def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
-  [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))]
+  [(set i32:$dst, (global_load ADDRVTX_READ:$ptr))]
 >;
 
 // 128-bit reads
 def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
-  [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))]
+  [(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))]
 >;
 
 //===----------------------------------------------------------------------===//
@@ -1788,7 +1945,7 @@ def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
 //===----------------------------------------------------------------------===//
 
 def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
-  [(set (i32 R600_TReg32_X:$dst), (constant_load ADDRVTX_READ:$ptr))]
+  [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))]
 >;
 
 }
@@ -1818,22 +1975,27 @@ def SIN_cm : SIN_Common<0x8D>;
 def COS_cm : COS_Common<0x8E>;
 } // End isVector = 1
 
-def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL, R600_Reg32>;
+def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
 def : SIN_PAT <SIN_cm>;
 def : COS_PAT <COS_cm>;
 
 defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
 
 // RECIP_UINT emulation for Cayman
+// The multiplication scales from [0,1] to the unsigned integer range
 def : Pat <
-  (AMDGPUurecip R600_Reg32:$src0),
-  (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)),
-                            (MOV_IMM_I32 0x4f800000)))
+  (AMDGPUurecip i32:$src0),
+  (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)),
+                            (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
 >;
 
+  def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
+    let ADDR = 0;
+    let POP_COUNT = 0;
+    let COUNT = 0;
+  }
 
-def : Pat<(fsqrt R600_Reg32:$src),
-  (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;
+def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
 
 } // End isCayman
 
@@ -1855,21 +2017,21 @@ def PREDICATED_BREAK : ILFormat<(outs), (ins GPRI32:$src),
 let isPseudo = 1 in {
 
 def PRED_X : InstR600 <
-  0, (outs R600_Predicate_Bit:$dst),
+  (outs R600_Predicate_Bit:$dst),
   (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
   "", [], NullALU> {
   let FlagOperandIdx = 3;
 }
 
 let isTerminator = 1, isBranch = 1 in {
-def JUMP_COND : InstR600 <0x10,
+def JUMP_COND : InstR600 <
           (outs),
           (ins brtarget:$target, R600_Predicate_Bit:$p),
           "JUMP $target ($p)",
           [], AnyALU
   >;
 
-def JUMP : InstR600 <0x10,
+def JUMP : InstR600 <
           (outs),
           (ins brtarget:$target),
           "JUMP $target",
@@ -1896,20 +2058,28 @@ def MASK_WRITE : AMDGPUShaderInst <
 } // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
 
 
-def TXD: AMDGPUShaderInst <
+def TXD: InstR600 <
   (outs R600_Reg128:$dst),
-  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
+       i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
   "TXD $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
-  [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, imm:$textureTarget))]
->;
+  [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
+                     imm:$resourceId, imm:$samplerId, imm:$textureTarget))],
+  NullALU > {
+  let TEXInst = 1;
+}
 
-def TXD_SHADOW: AMDGPUShaderInst <
+def TXD_SHADOW: InstR600 <
   (outs R600_Reg128:$dst),
-  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
+  (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2,
+       i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget),
   "TXD_SHADOW $dst, $src0, $src1, $src2, $resourceId, $samplerId, $textureTarget",
-  [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))]
->;
-
+  [(set v4f32:$dst, (int_AMDGPU_txd v4f32:$src0, v4f32:$src1, v4f32:$src2,
+        imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))],
+   NullALU
+> {
+  let TEXInst = 1;
+}
 } // End isPseudo = 1
 } // End usesCustomInserter = 1
 
@@ -1946,7 +2116,7 @@ def CONST_COPY : Instruction {
 
 def TEX_VTX_CONSTBUF :
   InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
-      [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
+      [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
   VTX_WORD1_GPR, VTX_WORD0 {
 
   let VC_INST = 0;
@@ -1995,11 +2165,12 @@ def TEX_VTX_CONSTBUF :
 // VTX_WORD3 (Padding)
 //
 // Inst{127-96} = 0;
+  let VTXInst = 1;
 }
 
 def TEX_VTX_TEXBUF:
   InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
-      [(set R600_Reg128:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
+      [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
 VTX_WORD1_GPR, VTX_WORD0 {
 
 let VC_INST = 0;
@@ -2048,6 +2219,7 @@ let Inst{63-32} = Word1;
 // VTX_WORD3 (Padding)
 //
 // Inst{127-96} = 0;
+  let VTXInst = 1;
 }
 
 
@@ -2124,9 +2296,8 @@ let isTerminator=1 in {
 // CND*_INT Pattterns for f32 True / False values
 
 class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
-  (selectcc (i32 R600_Reg32:$src0), 0, (f32 R600_Reg32:$src1),
-                                            R600_Reg32:$src2, cc),
-  (cnd R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+  (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
+  (cnd $src0, $src1, $src2)
 >;
 
 def : CND_INT_f32 <CNDE_INT,  SETEQ>;
@@ -2135,9 +2306,8 @@ def : CND_INT_f32 <CNDGE_INT, SETGE>;
 
 //CNDGE_INT extra pattern
 def : Pat <
-  (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1),
-                                        (i32 R600_Reg32:$src2), COND_GT),
-  (CNDGE_INT R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2)
+  (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_GT),
+  (CNDGE_INT $src0, $src1, $src2)
 >;
 
 // KIL Patterns
@@ -2147,56 +2317,56 @@ def KILP : Pat <
 >;
 
 def KIL : Pat <
-  (int_AMDGPU_kill R600_Reg32:$src0),
-  (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0)))
+  (int_AMDGPU_kill f32:$src0),
+  (MASK_WRITE (KILLGT (f32 ZERO), $src0))
 >;
 
 // SGT Reverse args
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT),
-  (SGT R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT),
+  (SGT $src1, $src0)
 >;
 
 // SGE Reverse args
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE),
-  (SGE R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE),
+  (SGE $src1, $src0)
 >;
 
 // SETGT_DX10 reverse args
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LT),
-  (SETGT_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT),
+  (SETGT_DX10 $src1, $src0)
 >;
 
 // SETGE_DX10 reverse args
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, COND_LE),
-  (SETGE_DX10 R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE),
+  (SETGE_DX10 $src1, $src0)
 >;
 
 // SETGT_INT reverse args
 def : Pat <
-  (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT),
-  (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc i32:$src0, i32:$src1, -1, 0, SETLT),
+  (SETGT_INT $src1, $src0)
 >;
 
 // SETGE_INT reverse args
 def : Pat <
-  (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE),
-  (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc i32:$src0, i32:$src1, -1, 0, SETLE),
+  (SETGE_INT $src1, $src0)
 >;
 
 // SETGT_UINT reverse args
 def : Pat <
-  (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT),
-  (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc i32:$src0, i32:$src1, -1, 0, SETULT),
+  (SETGT_UINT $src1, $src0)
 >;
 
 // SETGE_UINT reverse args
 def : Pat <
-  (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE),
-  (SETGE_UINT R600_Reg32:$src1, R600_Reg32:$src0)
+  (selectcc i32:$src0, i32:$src1, -1, 0, SETULE),
+  (SETGE_UINT $src1, $src0)
 >;
 
 // The next two patterns are special cases for handling 'true if ordered' and
@@ -2209,50 +2379,50 @@ def : Pat <
 
 //SETE - 'true if ordered'
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO),
-  (SETE R600_Reg32:$src0, R600_Reg32:$src1)
+  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO),
+  (SETE $src0, $src1)
 >;
 
 //SETE_DX10 - 'true if ordered'
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETO),
-  (SETE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+  (selectcc f32:$src0, f32:$src1, -1, 0, SETO),
+  (SETE_DX10 $src0, $src1)
 >;
 
 //SNE - 'true if unordered'
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO),
-  (SNE R600_Reg32:$src0, R600_Reg32:$src1)
+  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO),
+  (SNE $src0, $src1)
 >;
 
 //SETNE_DX10 - 'true if ordered'
 def : Pat <
-  (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUO),
-  (SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
+  (selectcc f32:$src0, f32:$src1, -1, 0, SETUO),
+  (SETNE_DX10 $src0, $src1)
 >;
 
-def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
-def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
+def : Extract_Element <f32, v4f32, 0, sub0>;
+def : Extract_Element <f32, v4f32, 1, sub1>;
+def : Extract_Element <f32, v4f32, 2, sub2>;
+def : Extract_Element <f32, v4f32, 3, sub3>;
 
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
-def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
+def : Insert_Element <f32, v4f32, 0, sub0>;
+def : Insert_Element <f32, v4f32, 1, sub1>;
+def : Insert_Element <f32, v4f32, 2, sub2>;
+def : Insert_Element <f32, v4f32, 3, sub3>;
 
-def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
-def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
+def : Extract_Element <i32, v4i32, 0, sub0>;
+def : Extract_Element <i32, v4i32, 1, sub1>;
+def : Extract_Element <i32, v4i32, 2, sub2>;
+def : Extract_Element <i32, v4i32, 3, sub3>;
 
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
-def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
+def : Insert_Element <i32, v4i32, 0, sub0>;
+def : Insert_Element <i32, v4i32, 1, sub1>;
+def : Insert_Element <i32, v4i32, 2, sub2>;
+def : Insert_Element <i32, v4i32, 3, sub3>;
 
-def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
-def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>;
+def : Vector4_Build <v4f32, f32>;
+def : Vector4_Build <v4i32, i32>;
 
 // bitconvert patterns
 
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
index 99c1f91..70fddbb 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -25,6 +25,7 @@ public:
   R600MachineFunctionInfo(const MachineFunction &MF);
   SmallVector<unsigned, 4> LiveOuts;
   std::vector<unsigned> IndirectRegs;
+  unsigned StackSize;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
new file mode 100644
index 0000000..cd7b7d0
--- /dev/null
+++ b/lib/Target/R600/R600Packetizer.cpp
@@ -0,0 +1,459 @@
+//===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass implements instructions packetization for R600. It unsets isLast
+/// bit of instructions inside a bundle and substitutes src register with
+/// PreviousVector when applicable.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef R600PACKETIZER_CPP
+#define R600PACKETIZER_CPP
+
+#define DEBUG_TYPE "packets"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "AMDGPU.h"
+#include "R600InstrInfo.h"
+
+namespace llvm {
+
+class R600Packetizer : public MachineFunctionPass {
+
+public:
+  static char ID;
+  R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesCFG();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
+    AU.addRequired<MachineLoopInfo>();
+    AU.addPreserved<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  const char *getPassName() const {
+    return "R600 Packetizer";
+  }
+
+  bool runOnMachineFunction(MachineFunction &Fn);
+};
+char R600Packetizer::ID = 0;
+
+class R600PacketizerList : public VLIWPacketizerList {
+
+private:
+  const R600InstrInfo *TII;
+  const R600RegisterInfo &TRI;
+
+  enum BankSwizzle {
+    ALU_VEC_012 = 0,
+    ALU_VEC_021,
+    ALU_VEC_120,
+    ALU_VEC_102,
+    ALU_VEC_201,
+    ALU_VEC_210
+  };
+
+  unsigned getSlot(const MachineInstr *MI) const {
+    return TRI.getHWRegChan(MI->getOperand(0).getReg());
+  }
+
+  /// \returns register to PV chan mapping for bundle/single instructions that
+  /// immediatly precedes I.
+  DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I)
+      const {
+    DenseMap<unsigned, unsigned> Result;
+    I--;
+    if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle())
+      return Result;
+    MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
+    if (I->isBundle())
+      BI++;
+    do {
+      if (TII->isPredicated(BI))
+        continue;
+      if (TII->isTransOnly(BI))
+        continue;
+      int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600Operands::WRITE);
+      if (OperandIdx < 0)
+        continue;
+      if (BI->getOperand(OperandIdx).getImm() == 0)
+        continue;
+      unsigned Dst = BI->getOperand(0).getReg();
+      if (BI->getOpcode() == AMDGPU::DOT4_r600_real) {
+        Result[Dst] = AMDGPU::PV_X;
+        continue;
+      }
+      unsigned PVReg = 0;
+      switch (TRI.getHWRegChan(Dst)) {
+      case 0:
+        PVReg = AMDGPU::PV_X;
+        break;
+      case 1:
+        PVReg = AMDGPU::PV_Y;
+        break;
+      case 2:
+        PVReg = AMDGPU::PV_Z;
+        break;
+      case 3:
+        PVReg = AMDGPU::PV_W;
+        break;
+      default:
+        llvm_unreachable("Invalid Chan");
+      }
+      Result[Dst] = PVReg;
+    } while ((++BI)->isBundledWithPred());
+    return Result;
+  }
+
+  void substitutePV(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PVs)
+      const {
+    R600Operands::Ops Ops[] = {
+      R600Operands::SRC0,
+      R600Operands::SRC1,
+      R600Operands::SRC2
+    };
+    for (unsigned i = 0; i < 3; i++) {
+      int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
+      if (OperandIdx < 0)
+        continue;
+      unsigned Src = MI->getOperand(OperandIdx).getReg();
+      const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src);
+      if (It != PVs.end())
+        MI->getOperand(OperandIdx).setReg(It->second);
+    }
+  }
+public:
+  // Ctor.
+  R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+                        MachineDominatorTree &MDT)
+  : VLIWPacketizerList(MF, MLI, MDT, true),
+    TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
+    TRI(TII->getRegisterInfo()) { }
+
+  // initPacketizerState - initialize some internal flags.
+  void initPacketizerState() { }
+
+  // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+  bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
+    return false;
+  }
+
+  // isSoloInstruction - return true if instruction MI can not be packetized
+  // with any other instruction, which means that MI itself is a packet.
+  bool isSoloInstruction(MachineInstr *MI) {
+    if (TII->isVector(*MI))
+      return true;
+    if (!TII->isALUInstr(MI->getOpcode()))
+      return true;
+    if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY)
+      return true;
+    if (TII->isTransOnly(MI))
+      return true;
+    return false;
+  }
+
+  // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+  // together.
+  bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+    MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
+    if (getSlot(MII) <= getSlot(MIJ))
+      return false;
+    // Does MII and MIJ share the same pred_sel ?
+    int OpI = TII->getOperandIdx(MII->getOpcode(), R600Operands::PRED_SEL),
+        OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600Operands::PRED_SEL);
+    unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
+        PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
+    if (PredI != PredJ)
+      return false;
+    if (SUJ->isSucc(SUI)) {
+      for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i) {
+        const SDep &Dep = SUJ->Succs[i];
+        if (Dep.getSUnit() != SUI)
+          continue;
+        if (Dep.getKind() == SDep::Anti)
+          continue;
+        if (Dep.getKind() == SDep::Output)
+          if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg())
+            continue;
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+  // and SUJ.
+  bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {return false;}
+
+  void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
+    unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600Operands::LAST);
+    MI->getOperand(LastOp).setImm(Bit);
+  }
+
+  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
+    CurrentPacketMIs.push_back(MI);
+    bool FitsConstLimits = TII->canBundle(CurrentPacketMIs);
+    DEBUG(
+      if (!FitsConstLimits) {
+        dbgs() << "Couldn't pack :\n";
+        MI->dump();
+        dbgs() << "with the following packets :\n";
+        for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
+          CurrentPacketMIs[i]->dump();
+          dbgs() << "\n";
+        }
+        dbgs() << "because of Consts read limitations\n";
+      });
+    const DenseMap<unsigned, unsigned> &PV =
+        getPreviousVector(CurrentPacketMIs.front());
+    bool FitsReadPortLimits = fitsReadPortLimitation(CurrentPacketMIs, PV);
+    DEBUG(
+      if (!FitsReadPortLimits) {
+        dbgs() << "Couldn't pack :\n";
+        MI->dump();
+        dbgs() << "with the following packets :\n";
+        for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) {
+          CurrentPacketMIs[i]->dump();
+          dbgs() << "\n";
+        }
+        dbgs() << "because of Read port limitations\n";
+      });
+    bool isBundlable = FitsConstLimits && FitsReadPortLimits;
+    CurrentPacketMIs.pop_back();
+    if (!isBundlable) {
+      endPacket(MI->getParent(), MI);
+      substitutePV(MI, getPreviousVector(MI));
+      return VLIWPacketizerList::addToPacket(MI);
+    }
+    if (!CurrentPacketMIs.empty())
+      setIsLastBit(CurrentPacketMIs.back(), 0);
+    substitutePV(MI, PV);
+    return VLIWPacketizerList::addToPacket(MI);
+  }
+private:
+  std::vector<std::pair<int, unsigned> >
+  ExtractSrcs(const MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV)
+      const {
+    R600Operands::Ops Ops[] = {
+      R600Operands::SRC0,
+      R600Operands::SRC1,
+      R600Operands::SRC2
+    };
+    std::vector<std::pair<int, unsigned> > Result;
+    for (unsigned i = 0; i < 3; i++) {
+      int OperandIdx = TII->getOperandIdx(MI->getOpcode(), Ops[i]);
+      if (OperandIdx < 0){
+        Result.push_back(std::pair<int, unsigned>(-1,0));
+        continue;
+      }
+      unsigned Src = MI->getOperand(OperandIdx).getReg();
+      if (PV.find(Src) != PV.end()) {
+        Result.push_back(std::pair<int, unsigned>(-1,0));
+        continue;
+      }
+      unsigned Reg = TRI.getEncodingValue(Src) & 0xff;
+      if (Reg > 127) {
+        Result.push_back(std::pair<int, unsigned>(-1,0));
+        continue;
+      }
+      unsigned Chan = TRI.getHWRegChan(Src);
+      Result.push_back(std::pair<int, unsigned>(Reg, Chan));
+    }
+    return Result;
+  }
+
+  std::vector<std::pair<int, unsigned> >
+  Swizzle(std::vector<std::pair<int, unsigned> > Src,
+  BankSwizzle Swz) const {
+    switch (Swz) {
+    case ALU_VEC_012:
+      break;
+    case ALU_VEC_021:
+      std::swap(Src[1], Src[2]);
+      break;
+    case ALU_VEC_102:
+      std::swap(Src[0], Src[1]);
+      break;
+    case ALU_VEC_120:
+      std::swap(Src[0], Src[1]);
+      std::swap(Src[0], Src[2]);
+      break;
+    case ALU_VEC_201:
+      std::swap(Src[0], Src[2]);
+      std::swap(Src[0], Src[1]);
+      break;
+    case ALU_VEC_210:
+      std::swap(Src[0], Src[2]);
+      break;
+    }
+    return Src;
+  }
+
+  bool isLegal(const std::vector<MachineInstr *> &IG,
+      const std::vector<BankSwizzle> &Swz,
+      const DenseMap<unsigned, unsigned> &PV) const {
+    assert (Swz.size() == IG.size());
+    int Vector[4][3];
+    memset(Vector, -1, sizeof(Vector));
+    for (unsigned i = 0, e = IG.size(); i < e; i++) {
+      const std::vector<std::pair<int, unsigned> > &Srcs =
+          Swizzle(ExtractSrcs(IG[i], PV), Swz[i]);
+      for (unsigned j = 0; j < 3; j++) {
+        const std::pair<int, unsigned> &Src = Srcs[j];
+        if (Src.first < 0)
+          continue;
+        if (Vector[Src.second][j] < 0)
+          Vector[Src.second][j] = Src.first;
+        if (Vector[Src.second][j] != Src.first)
+          return false;
+      }
+    }
+    return true;
+  }
+
+  bool recursiveFitsFPLimitation(
+  std::vector<MachineInstr *> IG,
+  const DenseMap<unsigned, unsigned> &PV,
+  std::vector<BankSwizzle> &SwzCandidate,
+  std::vector<MachineInstr *> CurrentlyChecked)
+      const {
+    if (!isLegal(CurrentlyChecked, SwzCandidate, PV))
+      return false;
+    if (IG.size() == CurrentlyChecked.size()) {
+      return true;
+    }
+    BankSwizzle AvailableSwizzle[] = {
+      ALU_VEC_012,
+      ALU_VEC_021,
+      ALU_VEC_120,
+      ALU_VEC_102,
+      ALU_VEC_201,
+      ALU_VEC_210
+    };
+    CurrentlyChecked.push_back(IG[CurrentlyChecked.size()]);
+    for (unsigned i = 0; i < 6; i++) {
+      SwzCandidate.push_back(AvailableSwizzle[i]);
+      if (recursiveFitsFPLimitation(IG, PV, SwzCandidate, CurrentlyChecked))
+        return true;
+      SwzCandidate.pop_back();
+    }
+    return false;
+  }
+
+  bool fitsReadPortLimitation(
+  std::vector<MachineInstr *> IG,
+  const DenseMap<unsigned, unsigned> &PV)
+      const {
+    //Todo : support shared src0 - src1 operand
+    std::vector<BankSwizzle> SwzCandidate;
+    bool Result = recursiveFitsFPLimitation(IG, PV, SwzCandidate,
+        std::vector<MachineInstr *>());
+    if (!Result)
+      return false;
+    for (unsigned i = 0, e = IG.size(); i < e; i++) {
+      MachineInstr *MI = IG[i];
+      unsigned Op = TII->getOperandIdx(MI->getOpcode(),
+          R600Operands::BANK_SWIZZLE);
+      MI->getOperand(Op).setImm(SwzCandidate[i]);
+    }
+    return true;
+  }
+};
+
+bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
+  const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
+  // Instantiate the packetizer.
+  R600PacketizerList Packetizer(Fn, MLI, MDT);
+
+  // DFA state table should not be empty.
+  assert(Packetizer.getResourceTracker() && "Empty DFA table!");
+
+  //
+  // Loop over all basic blocks and remove KILL pseudo-instructions
+  // These instructions confuse the dependence analysis. Consider:
+  // D0 = ...   (Insn 0)
+  // R0 = KILL R0, D0 (Insn 1)
+  // R0 = ... (Insn 2)
+  // Here, Insn 1 will result in the dependence graph not emitting an output
+  // dependence between Insn 0 and Insn 2. This can lead to incorrect
+  // packetization
+  //
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+    MachineBasicBlock::iterator End = MBB->end();
+    MachineBasicBlock::iterator MI = MBB->begin();
+    while (MI != End) {
+      if (MI->isKill()) {
+        MachineBasicBlock::iterator DeleteMI = MI;
+        ++MI;
+        MBB->erase(DeleteMI);
+        End = MBB->end();
+        continue;
+      }
+      ++MI;
+    }
+  }
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+    // Find scheduling regions and schedule / packetize each region.
+    unsigned RemainingCount = MBB->size();
+    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+        RegionEnd != MBB->begin();) {
+      // The next region starts above the previous region. Look backward in the
+      // instruction stream until we find the nearest boundary.
+      MachineBasicBlock::iterator I = RegionEnd;
+      for(;I != MBB->begin(); --I, --RemainingCount) {
+        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn))
+          break;
+      }
+      I = MBB->begin();
+
+      // Skip empty scheduling regions.
+      if (I == RegionEnd) {
+        RegionEnd = llvm::prior(RegionEnd);
+        --RemainingCount;
+        continue;
+      }
+      // Skip regions with one instruction.
+      if (I == llvm::prior(RegionEnd)) {
+        RegionEnd = llvm::prior(RegionEnd);
+        continue;
+      }
+
+      Packetizer.PacketizeMIs(MBB, I, RegionEnd);
+      RegionEnd = I;
+    }
+  }
+
+  return true;
+
+}
+
+}
+
+llvm::FunctionPass *llvm::createR600Packetizer(TargetMachine &tm) {
+  return new R600Packetizer(tm);
+}
+
+#endif // R600PACKETIZER_CPP
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index 03f4976..bfc546b 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -88,8 +88,14 @@ def NEG_ONE : R600Reg<"-1.0", 249>;
 def ONE_INT : R600Reg<"1", 250>;
 def HALF : R600Reg<"0.5", 252>;
 def NEG_HALF : R600Reg<"-0.5", 252>;
-def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
-def PV_X : R600Reg<"pv.x", 254>;
+def ALU_LITERAL_X : R600RegWithChan<"literal.x", 253, "X">;
+def ALU_LITERAL_Y : R600RegWithChan<"literal.y", 253, "Y">;
+def ALU_LITERAL_Z : R600RegWithChan<"literal.z", 253, "Z">;
+def ALU_LITERAL_W : R600RegWithChan<"literal.w", 253, "W">;
+def PV_X : R600RegWithChan<"PV.x", 254, "X">;
+def PV_Y : R600RegWithChan<"PV.y", 254, "Y">;
+def PV_Z : R600RegWithChan<"PV.z", 254, "Z">;
+def PV_W : R600RegWithChan<"PV.w", 254, "W">;
 def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
 def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
 def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
diff --git a/lib/Target/R600/R600Schedule.td b/lib/Target/R600/R600Schedule.td
index 7ede181..78a460a 100644
--- a/lib/Target/R600/R600Schedule.td
+++ b/lib/Target/R600/R600Schedule.td
@@ -24,7 +24,7 @@ def AnyALU : InstrItinClass;
 def VecALU : InstrItinClass;
 def TransALU : InstrItinClass;
 
-def R600_EG_Itin : ProcessorItineraries <
+def R600_VLIW5_Itin : ProcessorItineraries <
   [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
   [],
   [
@@ -34,3 +34,14 @@ def R600_EG_Itin : ProcessorItineraries <
     InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
   ]
 >;
+
+def R600_VLIW4_Itin : ProcessorItineraries <
+  [ALU_X, ALU_Y, ALU_Z, ALU_W, ALU_NULL],
+  [],
+  [
+    InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W]>]>,
+    InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
+    InstrItinData<TransALU, [InstrStage<1, [ALU_NULL]>]>,
+    InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
+  ]
+>;
diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
new file mode 100644
index 0000000..716b093
--- /dev/null
+++ b/lib/Target/R600/SIDefines.h
@@ -0,0 +1,22 @@
+//===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef SIDEFINES_H_
+#define SIDEFINES_H_
+
+#define R_00B028_SPI_SHADER_PGM_RSRC1_PS                                0x00B028
+#define R_00B128_SPI_SHADER_PGM_RSRC1_VS                                0x00B128
+#define R_00B228_SPI_SHADER_PGM_RSRC1_GS                                0x00B228
+#define R_00B848_COMPUTE_PGM_RSRC1                                      0x00B848
+#define   S_00B028_VGPRS(x)                                           (((x) & 0x3F) << 0)
+#define   S_00B028_SGPRS(x)                                           (((x) & 0x0F) << 6)
+#define R_0286CC_SPI_PS_INPUT_ENA                                       0x0286CC
+
+#endif // SIDEFINES_H_
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 6f0c307..6bd82a5 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -49,6 +49,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
 
   addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
   addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
+  addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass);
 
   addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
   addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
@@ -70,6 +71,10 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
 
   setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+  setOperationAction(ISD::STORE, MVT::i32, Custom);
+  setOperationAction(ISD::STORE, MVT::i64, Custom);
+
   setTargetDAGCombine(ISD::SELECT_CC);
 
   setTargetDAGCombine(ISD::SETCC);
@@ -234,6 +239,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+  case ISD::STORE: return LowerSTORE(Op, DAG);
   }
   return SDValue();
 }
@@ -332,6 +338,32 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
   return Chain;
 }
 
+#define RSRC_DATA_FORMAT 0xf00000000000
+
+SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
+  SDValue Chain = Op.getOperand(0);
+  SDValue Value = Op.getOperand(1);
+  SDValue VirtualAddress = Op.getOperand(2);
+  DebugLoc DL = Op.getDebugLoc();
+
+  if (StoreNode->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) {
+    return SDValue();
+  }
+
+  SDValue SrcSrc = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
+                               DAG.getConstant(0, MVT::i64),
+			       DAG.getConstant(RSRC_DATA_FORMAT, MVT::i64));
+
+  SDValue Ops[2];
+  Ops[0] = DAG.getNode(AMDGPUISD::BUFFER_STORE, DL, MVT::Other, Chain,
+                       Value, SrcSrc, VirtualAddress);
+  Ops[1] = Chain;
+
+  return DAG.getMergeValues(Ops, 2, DL);
+
+}
+
 SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
@@ -424,9 +456,12 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
     float F;
   } Imm;
 
-  if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N))
+  if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
+    if (Node->getZExtValue() >> 32) {
+        return -1;
+    }
     Imm.I = Node->getSExtValue();
-  else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
+  } else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
     Imm.F = Node->getValueAPF().convertToFloat();
   else
     return -1; // It isn't an immediate
@@ -534,8 +569,9 @@ void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
   Operand = SDValue(Node, 0);
 }
 
-SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
-                                          SelectionDAG &DAG) const {
+/// \brief Try to fold the Nodes operands into the Node
+SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
+                                       SelectionDAG &DAG) const {
 
   // Original encoding (either e32 or e64)
   int Opcode = Node->getMachineOpcode();
@@ -666,5 +702,116 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
 
   // Create a complete new instruction
   return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(),
-                            Node->getVTList(), Ops.data(), Ops.size());
+                            Node->getVTList(), Ops);
+}
+
+/// \brief Helper function for adjustWritemask
+unsigned SubIdx2Lane(unsigned Idx) {
+  switch (Idx) {
+  default: return 0;
+  case AMDGPU::sub0: return 0;
+  case AMDGPU::sub1: return 1;
+  case AMDGPU::sub2: return 2;
+  case AMDGPU::sub3: return 3;
+  }
+}
+
+/// \brief Adjust the writemask of MIMG instructions
+void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
+                                       SelectionDAG &DAG) const {
+  SDNode *Users[4] = { };
+  unsigned Writemask = 0, Lane = 0;
+
+  // Try to figure out the used register components
+  for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
+       I != E; ++I) {
+
+    // Abort if we can't understand the usage
+    if (!I->isMachineOpcode() ||
+        I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
+      return;
+
+    Lane = SubIdx2Lane(I->getConstantOperandVal(1));
+
+    // Abort if we have more than one user per component
+    if (Users[Lane])
+      return;
+
+    Users[Lane] = *I;
+    Writemask |= 1 << Lane;
+  }
+
+  // Abort if all components are used
+  if (Writemask == 0xf)
+    return;
+
+  // Adjust the writemask in the node
+  std::vector<SDValue> Ops;
+  Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32));
+  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
+    Ops.push_back(Node->getOperand(i));
+  Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+
+  // If we only got one lane, replace it with a copy
+  if (Writemask == (1U << Lane)) {
+    SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
+    SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                      DebugLoc(), Users[Lane]->getValueType(0),
+                                      SDValue(Node, 0), RC);
+    DAG.ReplaceAllUsesWith(Users[Lane], Copy);
+    return;
+  }
+
+  // Update the users of the node with the new indices
+  for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
+
+    SDNode *User = Users[i];
+    if (!User)
+      continue;
+
+    SDValue Op = DAG.getTargetConstant(Idx, MVT::i32);
+    DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
+
+    switch (Idx) {
+    default: break;
+    case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
+    case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
+    case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
+    }
+  }
+}
+
+/// \brief Fold the instructions after slecting them
+SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
+                                          SelectionDAG &DAG) const {
+
+  if (AMDGPU::isMIMG(Node->getMachineOpcode()) != -1)
+    adjustWritemask(Node, DAG);
+
+  return foldOperands(Node, DAG);
+}
+
+/// \brief Assign the register class depending on the number of
+/// bits set in the writemask
+void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+                                                     SDNode *Node) const {
+  if (AMDGPU::isMIMG(MI->getOpcode()) == -1)
+    return;
+
+  unsigned VReg = MI->getOperand(0).getReg();
+  unsigned Writemask = MI->getOperand(1).getImm();
+  unsigned BitsSet = 0;
+  for (unsigned i = 0; i < 4; ++i)
+    BitsSet += Writemask & (1 << i) ? 1 : 0;
+
+  const TargetRegisterClass *RC;
+  switch (BitsSet) {
+  default: return;
+  case 1:  RC = &AMDGPU::VReg_32RegClass; break;
+  case 2:  RC = &AMDGPU::VReg_64RegClass; break;
+  case 3:  RC = &AMDGPU::VReg_96RegClass; break;
+  }
+
+  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MRI.setRegClass(VReg, RC);
 }
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 5ad2f40..de637be 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -24,6 +24,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
   const SIInstrInfo * TII;
   const TargetRegisterInfo * TRI;
 
+  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
 
@@ -33,6 +34,9 @@ class SITargetLowering : public AMDGPUTargetLowering {
   void ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, 
                        unsigned RegClass, bool &ScalarSlotUsed) const;
 
+  SDNode *foldOperands(MachineSDNode *N, SelectionDAG &DAG) const;
+  void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
+
 public:
   SITargetLowering(TargetMachine &tm);
 
@@ -49,6 +53,8 @@ public:
   virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
   virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
+  virtual void AdjustInstrPostInstrSelection(MachineInstr *MI,
+                                             SDNode *Node) const;
 
   int32_t analyzeImmediate(const SDNode *N) const;
 };
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 3891ddb..f737ddd 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -284,33 +284,33 @@ let Uses = [EXEC] in {
 class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
     Enc64<outs, ins, asm, pattern> {
 
-  bits<8> VDATA;
-  bits<12> OFFSET;
-  bits<1> OFFEN;
-  bits<1> IDXEN;
-  bits<1> GLC;
-  bits<1> ADDR64;
-  bits<1> LDS;
-  bits<8> VADDR;
-  bits<7> SRSRC;
-  bits<1> SLC;
-  bits<1> TFE;
-  bits<8> SOFFSET;
-
-  let Inst{11-0} = OFFSET;
-  let Inst{12} = OFFEN;
-  let Inst{13} = IDXEN;
-  let Inst{14} = GLC;
-  let Inst{15} = ADDR64;
-  let Inst{16} = LDS;
+  bits<12> offset;
+  bits<1> offen;
+  bits<1> idxen;
+  bits<1> glc;
+  bits<1> addr64;
+  bits<1> lds;
+  bits<8> vaddr;
+  bits<8> vdata;
+  bits<7> srsrc;
+  bits<1> slc;
+  bits<1> tfe;
+  bits<8> soffset;
+
+  let Inst{11-0} = offset;
+  let Inst{12} = offen;
+  let Inst{13} = idxen;
+  let Inst{14} = glc;
+  let Inst{15} = addr64;
+  let Inst{16} = lds;
   let Inst{24-18} = op;
   let Inst{31-26} = 0x38; //encoding
-  let Inst{39-32} = VADDR;
-  let Inst{47-40} = VDATA;
-  let Inst{52-48} = SRSRC{6-2};
-  let Inst{54} = SLC;
-  let Inst{55} = TFE;
-  let Inst{63-56} = SOFFSET;
+  let Inst{39-32} = vaddr;
+  let Inst{47-40} = vdata;
+  let Inst{52-48} = srsrc{6-2};
+  let Inst{54} = slc;
+  let Inst{55} = tfe;
+  let Inst{63-56} = soffset;
 
   let VM_CNT = 1;
   let EXP_CNT = 1;
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 0bfcef5..9a04c60 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -58,6 +58,10 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
   };
 
+  const int16_t Sub0_2[] = {
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
+  };
+
   const int16_t Sub0_1[] = {
     AMDGPU::sub0, AMDGPU::sub1, 0
   };
@@ -125,6 +129,11 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opcode = AMDGPU::V_MOV_B32_e32;
     SubIndices = Sub0_1;
 
+  } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
+    assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
+    Opcode = AMDGPU::V_MOV_B32_e32;
+    SubIndices = Sub0_2;
+
   } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
     assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
 	   AMDGPU::SReg_128RegClass.contains(SrcReg));
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index d4e60e5..87eff4d 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -80,6 +80,7 @@ namespace AMDGPU {
   int getVOPe64(uint16_t Opcode);
   int getCommuteRev(uint16_t Opcode);
   int getCommuteOrig(uint16_t Opcode);
+  int isMIMG(uint16_t Opcode);
 
 } // End namespace AMDGPU
 
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 617f0b8..c8aecb7 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -26,6 +26,10 @@ def HI32 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getZExtValue() >> 32, MVT::i32);
 }]>;
 
+def SIbuffer_store : SDNode<"AMDGPUISD::BUFFER_STORE",
+                           SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
+                           [SDNPHasChain, SDNPMayStore]>;
+
 def IMM8bitDWORD : ImmLeaf <
   i32, [{
     return (Imm & ~0x3FC) == 0;
@@ -255,14 +259,14 @@ multiclass VOPC_64 <bits<8> op, string opName,
 class VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
   op, (outs VReg_32:$dst),
   (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
-   i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
   opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
 >, VOP <opName>;
 
 class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
   op, (outs VReg_64:$dst),
   (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2,
-   i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg),
+   InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
   opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
 >, VOP <opName>;
 
@@ -285,17 +289,39 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
 
 class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
   op,
-  (outs regClass:$dst),
+  (outs regClass:$vdata),
   (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
        i1imm:$lds, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc,
        i1imm:$tfe, SSrc_32:$soffset),
-  asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, "
+  asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, "
      #"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset",
   []> {
   let mayLoad = 1;
   let mayStore = 0;
 }
 
+class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
+                         ValueType VT> :
+    MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr),
+          name#" $vdata, $srsrc + $vaddr",
+          [(SIbuffer_store (VT vdataClass:$vdata), (i128 SReg_128:$srsrc),
+                                                    (i64 VReg_64:$vaddr))]> {
+
+  let mayLoad = 0;
+  let mayStore = 1;
+
+  // Encoding
+  let offset = 0;
+  let offen = 0;
+  let idxen = 0;
+  let glc = 0;
+  let addr64 = 1;
+  let lds = 0;
+  let slc = 0;
+  let tfe = 0;
+  let soffset = 128; // ZERO
+}
+
 class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
   op,
   (outs regClass:$dst),
@@ -309,7 +335,22 @@ class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF
   let mayStore = 0;
 }
 
-class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
+class MIMG_NoSampler_Helper <bits<7> op, string asm> : MIMG <
+  op,
+  (outs VReg_128:$vdata),
+  (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+       i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr,
+       SReg_256:$srsrc),
+  asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+     #" $tfe, $lwe, $slc, $vaddr, $srsrc",
+  []> {
+  let SSAMP = 0;
+  let mayLoad = 1;
+  let mayStore = 0;
+  let hasPostISelHook = 1;
+}
+
+class MIMG_Sampler_Helper <bits<7> op, string asm> : MIMG <
   op,
   (outs VReg_128:$vdata),
   (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
@@ -320,6 +361,7 @@ class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
   []> {
   let mayLoad = 1;
   let mayStore = 0;
+  let hasPostISelHook = 1;
 }
 
 //===----------------------------------------------------------------------===//
@@ -353,4 +395,13 @@ def getCommuteOrig : InstrMapping {
   let ValueCols = [["1"]];
 }
 
+// Test if the supplied opcode is an MIMG instruction
+def isMIMG : InstrMapping {
+  let FilterClass = "MIMG";
+  let RowFields = ["Inst"];
+  let ColFields = ["Size"];
+  let KeyCol = ["8"];
+  let ValueCols = [["8"]];
+}
+
 include "SIInstructions.td"
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 4f734f9..0d50c5d 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -108,7 +108,7 @@ VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
 def S_CMPK_EQ_I32 : SOPK <
   0x00000003, (outs SCCReg:$dst), (ins SReg_32:$src0, i32imm:$src1),
   "S_CMPK_EQ_I32",
-  [(set SCCReg:$dst, (setcc SReg_32:$src0, imm:$src1, SETEQ))]
+  [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
 >;
 */
 
@@ -408,8 +408,14 @@ def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2",
 def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
 //def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>;
 //def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>;
-//def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>;
-//def BUFFER_STORE_DWORDX2 : MUBUF_DWORDX2 <0x0000001d, "BUFFER_STORE_DWORDX2", []>;
+
+def BUFFER_STORE_DWORD : MUBUF_Store_Helper <
+  0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32
+>;
+
+def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
+  0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, i64
+>;
 //def BUFFER_STORE_DWORDX4 : MUBUF_DWORDX4 <0x0000001e, "BUFFER_STORE_DWORDX4", []>;
 //def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
 //def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
@@ -489,7 +495,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
 //def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
 //def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
 //def IMAGE_LOAD : MIMG_NoPattern_ <"IMAGE_LOAD", 0x00000000>;
-//def IMAGE_LOAD_MIP : MIMG_NoPattern_ <"IMAGE_LOAD_MIP", 0x00000001>;
+def IMAGE_LOAD_MIP : MIMG_NoSampler_Helper <0x00000001, "IMAGE_LOAD_MIP">;
 //def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
 //def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"IMAGE_LOAD_PCK_SGN", 0x00000003>;
 //def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_MIP_PCK", 0x00000004>;
@@ -498,7 +504,7 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
 //def IMAGE_STORE_MIP : MIMG_NoPattern_ <"IMAGE_STORE_MIP", 0x00000009>;
 //def IMAGE_STORE_PCK : MIMG_NoPattern_ <"IMAGE_STORE_PCK", 0x0000000a>;
 //def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"IMAGE_STORE_MIP_PCK", 0x0000000b>;
-//def IMAGE_GET_RESINFO : MIMG_NoPattern_ <"IMAGE_GET_RESINFO", 0x0000000e>;
+def IMAGE_GET_RESINFO : MIMG_NoSampler_Helper <0x0000000e, "IMAGE_GET_RESINFO">;
 //def IMAGE_ATOMIC_SWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_SWAP", 0x0000000f>;
 //def IMAGE_ATOMIC_CMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_CMPSWAP", 0x00000010>;
 //def IMAGE_ATOMIC_ADD : MIMG_NoPattern_ <"IMAGE_ATOMIC_ADD", 0x00000011>;
@@ -516,20 +522,20 @@ defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
 //def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"IMAGE_ATOMIC_FCMPSWAP", 0x0000001d>;
 //def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMIN", 0x0000001e>;
 //def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"IMAGE_ATOMIC_FMAX", 0x0000001f>;
-def IMAGE_SAMPLE : MIMG_Load_Helper <0x00000020, "IMAGE_SAMPLE">; 
+def IMAGE_SAMPLE : MIMG_Sampler_Helper <0x00000020, "IMAGE_SAMPLE">; 
 //def IMAGE_SAMPLE_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CL", 0x00000021>;
-def IMAGE_SAMPLE_D : MIMG_Load_Helper <0x00000022, "IMAGE_SAMPLE_D">;
+def IMAGE_SAMPLE_D : MIMG_Sampler_Helper <0x00000022, "IMAGE_SAMPLE_D">;
 //def IMAGE_SAMPLE_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_D_CL", 0x00000023>;
-def IMAGE_SAMPLE_L : MIMG_Load_Helper <0x00000024, "IMAGE_SAMPLE_L">;
-def IMAGE_SAMPLE_B : MIMG_Load_Helper <0x00000025, "IMAGE_SAMPLE_B">;
+def IMAGE_SAMPLE_L : MIMG_Sampler_Helper <0x00000024, "IMAGE_SAMPLE_L">;
+def IMAGE_SAMPLE_B : MIMG_Sampler_Helper <0x00000025, "IMAGE_SAMPLE_B">;
 //def IMAGE_SAMPLE_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_B_CL", 0x00000026>;
 //def IMAGE_SAMPLE_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_LZ", 0x00000027>;
-def IMAGE_SAMPLE_C : MIMG_Load_Helper <0x00000028, "IMAGE_SAMPLE_C">;
+def IMAGE_SAMPLE_C : MIMG_Sampler_Helper <0x00000028, "IMAGE_SAMPLE_C">;
 //def IMAGE_SAMPLE_C_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CL", 0x00000029>;
 //def IMAGE_SAMPLE_C_D : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D", 0x0000002a>;
 //def IMAGE_SAMPLE_C_D_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_D_CL", 0x0000002b>;
-def IMAGE_SAMPLE_C_L : MIMG_Load_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
-def IMAGE_SAMPLE_C_B : MIMG_Load_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
+def IMAGE_SAMPLE_C_L : MIMG_Sampler_Helper <0x0000002c, "IMAGE_SAMPLE_C_L">;
+def IMAGE_SAMPLE_C_B : MIMG_Sampler_Helper <0x0000002d, "IMAGE_SAMPLE_C_B">;
 //def IMAGE_SAMPLE_C_B_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL", 0x0000002e>;
 //def IMAGE_SAMPLE_C_LZ : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ", 0x0000002f>;
 //def IMAGE_SAMPLE_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_O", 0x00000030>;
@@ -594,12 +600,14 @@ defm V_READFIRSTLANE_B32 : VOP1_32 <0x00000002, "V_READFIRSTLANE_B32", []>;
 //defm V_CVT_I32_F64 : VOP1_32 <0x00000003, "V_CVT_I32_F64", []>;
 //defm V_CVT_F64_I32 : VOP1_64 <0x00000004, "V_CVT_F64_I32", []>;
 defm V_CVT_F32_I32 : VOP1_32 <0x00000005, "V_CVT_F32_I32",
-  [(set VReg_32:$dst, (sint_to_fp VSrc_32:$src0))]
+  [(set f32:$dst, (sint_to_fp i32:$src0))]
+>;
+defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32",
+  [(set f32:$dst, (uint_to_fp i32:$src0))]
 >;
-//defm V_CVT_F32_U32 : VOP1_32 <0x00000006, "V_CVT_F32_U32", []>;
-//defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
+defm V_CVT_U32_F32 : VOP1_32 <0x00000007, "V_CVT_U32_F32", []>;
 defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
-  [(set (i32 VReg_32:$dst), (fp_to_sint VSrc_32:$src0))]
+  [(set i32:$dst, (fp_to_sint f32:$src0))]
 >;
 defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
 ////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
@@ -616,35 +624,37 @@ defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
 //defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
 //defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
 defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
-  [(set VReg_32:$dst, (AMDGPUfract VSrc_32:$src0))]
+  [(set f32:$dst, (AMDGPUfract f32:$src0))]
+>;
+defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32",
+  [(set f32:$dst, (int_AMDGPU_trunc f32:$src0))]
 >;
-defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", []>;
 defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
-  [(set VReg_32:$dst, (fceil VSrc_32:$src0))]
+  [(set f32:$dst, (fceil f32:$src0))]
 >;
 defm V_RNDNE_F32 : VOP1_32 <0x00000023, "V_RNDNE_F32",
-  [(set VReg_32:$dst, (frint VSrc_32:$src0))]
+  [(set f32:$dst, (frint f32:$src0))]
 >;
 defm V_FLOOR_F32 : VOP1_32 <0x00000024, "V_FLOOR_F32",
-  [(set VReg_32:$dst, (ffloor VSrc_32:$src0))]
+  [(set f32:$dst, (ffloor f32:$src0))]
 >;
 defm V_EXP_F32 : VOP1_32 <0x00000025, "V_EXP_F32",
-  [(set VReg_32:$dst, (fexp2 VSrc_32:$src0))]
+  [(set f32:$dst, (fexp2 f32:$src0))]
 >;
 defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
 defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
-  [(set VReg_32:$dst, (flog2 VSrc_32:$src0))]
+  [(set f32:$dst, (flog2 f32:$src0))]
 >;
 defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
 defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
 defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
-  [(set VReg_32:$dst, (fdiv FP_ONE, VSrc_32:$src0))]
+  [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
 >;
 defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
 defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
 defm V_RSQ_LEGACY_F32 : VOP1_32 <
   0x0000002d, "V_RSQ_LEGACY_F32",
-  [(set VReg_32:$dst, (int_AMDGPU_rsq VSrc_32:$src0))]
+  [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))]
 >;
 defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
 defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", []>;
@@ -787,14 +797,13 @@ def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
   (ins VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2,
    InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
   "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
-  [(set (i32 VReg_32:$dst), (select (i1 SSrc_64:$src2),
-   VSrc_32:$src1, VSrc_32:$src0))]
+  [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))]
 >;
 
 //f32 pattern for V_CNDMASK_B32_e64
 def : Pat <
-  (f32 (select (i1 SSrc_64:$src2), VSrc_32:$src1, VSrc_32:$src0)),
-  (V_CNDMASK_B32_e64 VSrc_32:$src0, VSrc_32:$src1, SSrc_64:$src2)
+  (f32 (select i1:$src2, f32:$src1, f32:$src0)),
+  (V_CNDMASK_B32_e64 $src0, $src1, $src2)
 >;
 
 defm V_READLANE_B32 : VOP2_32 <0x00000001, "V_READLANE_B32", []>;
@@ -802,11 +811,11 @@ defm V_WRITELANE_B32 : VOP2_32 <0x00000002, "V_WRITELANE_B32", []>;
 
 let isCommutable = 1 in {
 defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32",
-  [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))]
+  [(set f32:$dst, (fadd f32:$src0, f32:$src1))]
 >;
 
 defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32",
-  [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))]
+  [(set f32:$dst, (fsub f32:$src0, f32:$src1))]
 >;
 defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">;
 } // End isCommutable = 1
@@ -817,11 +826,11 @@ let isCommutable = 1 in {
 
 defm V_MUL_LEGACY_F32 : VOP2_32 <
   0x00000007, "V_MUL_LEGACY_F32",
-  [(set VReg_32:$dst, (int_AMDGPU_mul VSrc_32:$src0, VReg_32:$src1))]
+  [(set f32:$dst, (int_AMDGPU_mul f32:$src0, f32:$src1))]
 >;
 
 defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
-  [(set VReg_32:$dst, (fmul VSrc_32:$src0, VReg_32:$src1))]
+  [(set f32:$dst, (fmul f32:$src0, f32:$src1))]
 >;
 
 } // End isCommutable = 1
@@ -834,43 +843,51 @@ defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
 let isCommutable = 1 in {
 
 defm V_MIN_LEGACY_F32 : VOP2_32 <0x0000000d, "V_MIN_LEGACY_F32",
-  [(set VReg_32:$dst, (AMDGPUfmin VSrc_32:$src0, VReg_32:$src1))]
+  [(set f32:$dst, (AMDGPUfmin f32:$src0, f32:$src1))]
 >;
 
 defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
-  [(set VReg_32:$dst, (AMDGPUfmax VSrc_32:$src0, VReg_32:$src1))]
+  [(set f32:$dst, (AMDGPUfmax f32:$src0, f32:$src1))]
 >;
 
 defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
 defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
-defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
-defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
-defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
-defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
+defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
+  [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
+>;
+defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
+  [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
+>;
+defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
+  [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
+>;
+defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
+  [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
+>;
 
 defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
-  [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+  [(set i32:$dst, (srl i32:$src0, i32:$src1))]
 >;
 defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
 
 defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
-  [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))]
+  [(set i32:$dst, (sra i32:$src0, i32:$src1))]
 >;
 defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
 
 defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
-  [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))]
+  [(set i32:$dst, (shl i32:$src0, i32:$src1))]
 >;
 defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
 
 defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
-  [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))]
+  [(set i32:$dst, (and i32:$src0, i32:$src1))]
 >;
 defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
-  [(set VReg_32:$dst, (or VSrc_32:$src0, VReg_32:$src1))]
+  [(set i32:$dst, (or i32:$src0, i32:$src1))]
 >;
 defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
-  [(set VReg_32:$dst, (xor VSrc_32:$src0, VReg_32:$src1))]
+  [(set i32:$dst, (xor i32:$src0, i32:$src1))]
 >;
 
 } // End isCommutable = 1
@@ -885,11 +902,11 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
 
 let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
 defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
-  [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
+  [(set i32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
 >;
 
 defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
-  [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))]
+  [(set i32:$dst, (sub i32:$src0, i32:$src1))]
 >;
 defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
 
@@ -905,7 +922,7 @@ defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>;
 ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>;
 ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>;
 defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
- [(set VReg_32:$dst, (int_SI_packf16 VSrc_32:$src0, VReg_32:$src1))]
+ [(set i32:$dst, (int_SI_packf16 f32:$src0, f32:$src1))]
 >;
 ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
 ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
@@ -942,6 +959,7 @@ def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
 def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>;
 def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>;
 def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;
+defm : BFIPatterns <V_BFI_B32>;
 def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", []>;
 def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", []>;
 //def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
@@ -983,18 +1001,18 @@ def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
 } // isCommutable = 1
 
 def : Pat <
-  (mul VSrc_32:$src0, VReg_32:$src1),
-  (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+  (mul i32:$src0, i32:$src1),
+  (V_MUL_LO_I32 $src0, $src1, (i32 0))
 >;
 
 def : Pat <
-  (mulhu VSrc_32:$src0, VReg_32:$src1),
-  (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+  (mulhu i32:$src0, i32:$src1),
+  (V_MUL_HI_U32 $src0, $src1, (i32 0))
 >;
 
 def : Pat <
-  (mulhs VSrc_32:$src0, VReg_32:$src1),
-  (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0)
+  (mulhs i32:$src0, i32:$src1),
+  (V_MUL_HI_I32 $src0, $src1, (i32 0))
 >;
 
 def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
@@ -1019,34 +1037,27 @@ def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
 def S_CSELECT_B32 : SOP2 <
   0x0000000a, (outs SReg_32:$dst),
   (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
-  [(set (i32 SReg_32:$dst), (select (i1 SCCReg:$scc),
-                                     SReg_32:$src0, SReg_32:$src1))]
+  []
 >;
 
 def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
 
-// f32 pattern for S_CSELECT_B32
-def : Pat <
-  (f32 (select (i1 SCCReg:$scc), SReg_32:$src0, SReg_32:$src1)),
-  (S_CSELECT_B32 SReg_32:$src0, SReg_32:$src1, SCCReg:$scc)
->;
-
 def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
 
 def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
-  [(set SReg_64:$dst, (i64 (and SSrc_64:$src0, SSrc_64:$src1)))]
+  [(set i64:$dst, (and i64:$src0, i64:$src1))]
 >;
 
 def : Pat <
-  (i1 (and SSrc_64:$src0, SSrc_64:$src1)),
-  (S_AND_B64 SSrc_64:$src0, SSrc_64:$src1)
+  (i1 (and i1:$src0, i1:$src1)),
+  (S_AND_B64 $src0, $src1)
 >;
 
 def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
 def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
 def : Pat <
-  (i1 (or SSrc_64:$src0, SSrc_64:$src1)),
-  (S_OR_B64 SSrc_64:$src0, SSrc_64:$src1)
+  (i1 (or i1:$src0, i1:$src1)),
+  (S_OR_B64 $src0, $src1)
 >;
 def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
 def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", []>;
@@ -1097,14 +1108,14 @@ def SI_IF : InstSI <
   (outs SReg_64:$dst),
   (ins SReg_64:$vcc, brtarget:$target),
   "SI_IF $dst, $vcc, $target",
-  [(set SReg_64:$dst, (int_SI_if SReg_64:$vcc, bb:$target))]
+  [(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
 >;
 
 def SI_ELSE : InstSI <
   (outs SReg_64:$dst),
   (ins SReg_64:$src, brtarget:$target),
   "SI_ELSE $dst, $src, $target",
-  [(set SReg_64:$dst, (int_SI_else SReg_64:$src, bb:$target))]> {
+  [(set i64:$dst, (int_SI_else i64:$src, bb:$target))]> {
 
   let Constraints = "$src = $dst";
 }
@@ -1113,7 +1124,7 @@ def SI_LOOP : InstSI <
   (outs),
   (ins SReg_64:$saved, brtarget:$target),
   "SI_LOOP $saved, $target",
-  [(int_SI_loop SReg_64:$saved, bb:$target)]
+  [(int_SI_loop i64:$saved, bb:$target)]
 >;
 
 } // end isBranch = 1, isTerminator = 1
@@ -1122,35 +1133,35 @@ def SI_BREAK : InstSI <
   (outs SReg_64:$dst),
   (ins SReg_64:$src),
   "SI_ELSE $dst, $src",
-  [(set SReg_64:$dst, (int_SI_break SReg_64:$src))]
+  [(set i64:$dst, (int_SI_break i64:$src))]
 >;
 
 def SI_IF_BREAK : InstSI <
   (outs SReg_64:$dst),
   (ins SReg_64:$vcc, SReg_64:$src),
   "SI_IF_BREAK $dst, $vcc, $src",
-  [(set SReg_64:$dst, (int_SI_if_break SReg_64:$vcc, SReg_64:$src))]
+  [(set i64:$dst, (int_SI_if_break i1:$vcc, i64:$src))]
 >;
 
 def SI_ELSE_BREAK : InstSI <
   (outs SReg_64:$dst),
   (ins SReg_64:$src0, SReg_64:$src1),
   "SI_ELSE_BREAK $dst, $src0, $src1",
-  [(set SReg_64:$dst, (int_SI_else_break SReg_64:$src0, SReg_64:$src1))]
+  [(set i64:$dst, (int_SI_else_break i64:$src0, i64:$src1))]
 >;
 
 def SI_END_CF : InstSI <
   (outs),
   (ins SReg_64:$saved),
   "SI_END_CF $saved",
-  [(int_SI_end_cf SReg_64:$saved)]
+  [(int_SI_end_cf i64:$saved)]
 >;
 
 def SI_KILL : InstSI <
   (outs),
   (ins VReg_32:$src),
   "SI_KIL $src",
-  [(int_AMDGPU_kill VReg_32:$src)]
+  [(int_AMDGPU_kill f32:$src)]
 >;
 
 } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
@@ -1184,8 +1195,8 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
 } // end IsCodeGenOnly, isPseudo
 
 def : Pat<
-  (int_AMDGPU_cndlt VReg_32:$src0, VReg_32:$src1, VReg_32:$src2),
-  (V_CNDMASK_B32_e64 VReg_32:$src2, VReg_32:$src1, (V_CMP_GT_F32_e64 0, VReg_32:$src0))
+  (int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
+  (V_CNDMASK_B32_e64 $src2, $src1, (V_CMP_GT_F32_e64 0, $src0))
 >;
 
 def : Pat <
@@ -1195,93 +1206,110 @@ def : Pat <
 
 /* int_SI_vs_load_input */
 def : Pat<
-  (int_SI_vs_load_input SReg_128:$tlst, IMM12bit:$attr_offset,
-                        VReg_32:$buf_idx_vgpr),
+  (int_SI_vs_load_input v16i8:$tlst, IMM12bit:$attr_offset,
+                        i32:$buf_idx_vgpr),
   (BUFFER_LOAD_FORMAT_XYZW imm:$attr_offset, 0, 1, 0, 0, 0,
-                           VReg_32:$buf_idx_vgpr, SReg_128:$tlst,
-                           0, 0, 0)
+                           $buf_idx_vgpr, $tlst, 0, 0, 0)
 >;
 
 /* int_SI_export */
 def : Pat <
   (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
-                 VReg_32:$src0,VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
+                 f32:$src0, f32:$src1, f32:$src2, f32:$src3),
   (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
-       VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3)
+       $src0, $src1, $src2, $src3)
 >;
 
+/********** ======================= **********/
+/********** Image sampling patterns **********/
+/********** ======================= **********/
 
 /* int_SI_sample for simple 1D texture lookup */
 def : Pat <
-  (int_SI_sample imm:$writemask, VReg_32:$addr,
-                 SReg_256:$rsrc, SReg_128:$sampler, imm),
-  (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr,
-                SReg_256:$rsrc, SReg_128:$sampler)
+  (int_SI_sample v1i32:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
+  (IMAGE_SAMPLE 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
 >;
 
-class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
-                    ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
-          SReg_256:$rsrc, SReg_128:$sampler, imm),
-    (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
-          SReg_256:$rsrc, SReg_128:$sampler)
+class SamplePattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, imm),
+    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
 >;
 
-class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
-                        ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
-          SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT),
-    (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr,
-          SReg_256:$rsrc, SReg_128:$sampler)
+class SampleRectPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_RECT),
+    (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
 >;
 
-class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class,
-                         ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
-          SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY),
-    (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
-          SReg_256:$rsrc, SReg_128:$sampler)
+class SampleArrayPattern<Intrinsic name, MIMG opcode, ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_ARRAY),
+    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
 >;
 
 class SampleShadowPattern<Intrinsic name, MIMG opcode,
-                          RegisterClass addr_class, ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
-          SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW),
-    (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr,
-          SReg_256:$rsrc, SReg_128:$sampler)
+                          ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW),
+    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
 >;
 
 class SampleShadowArrayPattern<Intrinsic name, MIMG opcode,
-                               RegisterClass addr_class, ValueType addr_type> : Pat <
-    (name imm:$writemask, (addr_type addr_class:$addr),
-          SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY),
-    (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr,
-          SReg_256:$rsrc, SReg_128:$sampler)
+                               ValueType vt> : Pat <
+    (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, TEX_SHADOW_ARRAY),
+    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
 >;
 
 /* int_SI_sample* for texture lookups consuming more address parameters */
-multiclass SamplePatterns<RegisterClass addr_class, ValueType addr_type> {
-  def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
-  def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
-  def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_class, addr_type>;
-  def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
-  def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_class, addr_type>;
-
-  def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
-  def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_class, addr_type>;
-  def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
-  def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_class, addr_type>;
-
-  def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
-  def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_class, addr_type>;
-  def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
-  def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_class, addr_type>;
+multiclass SamplePatterns<ValueType addr_type> {
+  def : SamplePattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
+  def : SampleRectPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
+  def : SampleArrayPattern <int_SI_sample, IMAGE_SAMPLE, addr_type>;
+  def : SampleShadowPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
+  def : SampleShadowArrayPattern <int_SI_sample, IMAGE_SAMPLE_C, addr_type>;
+
+  def : SamplePattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
+  def : SampleArrayPattern <int_SI_samplel, IMAGE_SAMPLE_L, addr_type>;
+  def : SampleShadowPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
+  def : SampleShadowArrayPattern <int_SI_samplel, IMAGE_SAMPLE_C_L, addr_type>;
+
+  def : SamplePattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
+  def : SampleArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_B, addr_type>;
+  def : SampleShadowPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
+  def : SampleShadowArrayPattern <int_SI_sampleb, IMAGE_SAMPLE_C_B, addr_type>;
+}
+
+defm : SamplePatterns<v2i32>;
+defm : SamplePatterns<v4i32>;
+defm : SamplePatterns<v8i32>;
+defm : SamplePatterns<v16i32>;
+
+/* int_SI_imageload for texture fetches consuming varying address parameters */
+class ImageLoadPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+    (name addr_type:$addr, v32i8:$rsrc, imm),
+    (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+class ImageLoadArrayPattern<Intrinsic name, MIMG opcode, ValueType addr_type> : Pat <
+    (name addr_type:$addr, v32i8:$rsrc, TEX_ARRAY),
+    (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc)
+>;
+
+multiclass ImageLoadPatterns<ValueType addr_type> {
+  def : ImageLoadPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>;
+  def : ImageLoadArrayPattern <int_SI_imageload, IMAGE_LOAD_MIP, addr_type>;
 }
 
-defm : SamplePatterns<VReg_64, v2i32>;
-defm : SamplePatterns<VReg_128, v4i32>;
-defm : SamplePatterns<VReg_256, v8i32>;
-defm : SamplePatterns<VReg_512, v16i32>;
+defm : ImageLoadPatterns<v2i32>;
+defm : ImageLoadPatterns<v4i32>;
+
+/* Image resource information */
+def : Pat <
+  (int_SI_resinfo i32:$mipid, v32i8:$rsrc, imm),
+  (IMAGE_GET_RESINFO 0xf, 0, 0, 0, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+>;
+
+def : Pat <
+  (int_SI_resinfo i32:$mipid, v32i8:$rsrc, TEX_ARRAY),
+  (IMAGE_GET_RESINFO 0xf, 0, 0, 1, 0, 0, 0, 0, (V_MOV_B32_e32 $mipid), $rsrc)
+>;
 
 /********** ============================================ **********/
 /********** Extraction, Insertion, Building and Casting  **********/
@@ -1289,77 +1317,77 @@ defm : SamplePatterns<VReg_512, v16i32>;
 
 foreach Index = 0-2 in {
   def Extract_Element_v2i32_#Index : Extract_Element <
-    i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v2i32_#Index : Insert_Element <
-    i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 
   def Extract_Element_v2f32_#Index : Extract_Element <
-    f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v2f32_#Index : Insert_Element <
-    f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 }
 
 foreach Index = 0-3 in {
   def Extract_Element_v4i32_#Index : Extract_Element <
-    i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v4i32_#Index : Insert_Element <
-    i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v4i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 
   def Extract_Element_v4f32_#Index : Extract_Element <
-    f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v4f32_#Index : Insert_Element <
-    f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v4f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 }
 
 foreach Index = 0-7 in {
   def Extract_Element_v8i32_#Index : Extract_Element <
-    i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v8i32_#Index : Insert_Element <
-    i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v8i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 
   def Extract_Element_v8f32_#Index : Extract_Element <
-    f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v8f32_#Index : Insert_Element <
-    f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v8f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 }
 
 foreach Index = 0-15 in {
   def Extract_Element_v16i32_#Index : Extract_Element <
-    i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v16i32_#Index : Insert_Element <
-    i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+    i32, v16i32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 
   def Extract_Element_v16f32_#Index : Extract_Element <
-    f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
   def Insert_Element_v16f32_#Index : Insert_Element <
-    f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index)
+    f32, v16f32, Index, !cast<SubRegIndex>(sub#Index)
   >;
 }
 
-def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>;
-def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>;
-def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>;
-def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>;
-def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>;
-def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>;
-def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>;
-def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>;
-def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>;
+def : Vector1_Build <v1i32, i32, VReg_32>;
+def : Vector2_Build <v2i32, i32>;
+def : Vector2_Build <v2f32, f32>;
+def : Vector4_Build <v4i32, i32>;
+def : Vector4_Build <v4f32, f32>;
+def : Vector8_Build <v8i32, i32>;
+def : Vector8_Build <v8f32, f32>;
+def : Vector16_Build <v16i32, i32>;
+def : Vector16_Build <v16f32, f32>;
 
 def : BitConvert <i32, f32, SReg_32>;
 def : BitConvert <i32, f32, VReg_32>;
@@ -1372,20 +1400,20 @@ def : BitConvert <f32, i32, VReg_32>;
 /********** =================== **********/
 
 def : Pat <
-  (int_AMDIL_clamp VReg_32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
-  (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+  (int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
+  (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
    0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
 >;
 
 def : Pat <
-  (fabs VReg_32:$src),
-  (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+  (fabs f32:$src),
+  (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
    1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
 >;
 
 def : Pat <
-  (fneg VReg_32:$src),
-  (V_ADD_F32_e64 VReg_32:$src, (i32 0 /* SRC1 */),
+  (fneg f32:$src),
+  (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
    0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
 >;
 
@@ -1426,16 +1454,16 @@ def : Pat <
 /********** ===================== **********/
 
 def : Pat <
-  (int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
-  (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params)
+  (int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
+  (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, $params)
 >;
 
 def : Pat <
-  (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij),
-  (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0),
-                                    imm:$attr_chan, imm:$attr, M0Reg:$params),
-                   (EXTRACT_SUBREG VReg_64:$ij, sub1),
-                   imm:$attr_chan, imm:$attr, M0Reg:$params)
+  (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, v2i32:$ij),
+  (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
+                                    imm:$attr_chan, imm:$attr, i32:$params),
+                   (EXTRACT_SUBREG $ij, sub1),
+                   imm:$attr_chan, imm:$attr, $params)
 >;
 
 /********** ================== **********/
@@ -1443,101 +1471,111 @@ def : Pat <
 /********** ================== **********/
 
 /* llvm.AMDGPU.pow */
-def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>;
+def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
 
 def : Pat <
-  (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1),
-  (V_MUL_LEGACY_F32_e32 VSrc_32:$src0, (V_RCP_LEGACY_F32_e32 VSrc_32:$src1))
+  (int_AMDGPU_div f32:$src0, f32:$src1),
+  (V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
 >;
 
 def : Pat<
-  (fdiv VSrc_32:$src0, VSrc_32:$src1),
-  (V_MUL_F32_e32 VSrc_32:$src0, (V_RCP_F32_e32 VSrc_32:$src1))
+  (fdiv f32:$src0, f32:$src1),
+  (V_MUL_F32_e32 $src0, (V_RCP_F32_e32 $src1))
 >;
 
 def : Pat <
-  (fcos VSrc_32:$src0),
-  (V_COS_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+  (fcos f32:$src0),
+  (V_COS_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
 >;
 
 def : Pat <
-  (fsin VSrc_32:$src0),
-  (V_SIN_F32_e32 (V_MUL_F32_e32 VSrc_32:$src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
+  (fsin f32:$src0),
+  (V_SIN_F32_e32 (V_MUL_F32_e32 $src0, (V_MOV_B32_e32 CONST.TWO_PI_INV)))
 >;
 
 def : Pat <
-  (int_AMDGPU_cube VReg_128:$src),
+  (int_AMDGPU_cube v4f32:$src),
   (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
-    (V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
-                  (EXTRACT_SUBREG VReg_128:$src, sub1),
-                  (EXTRACT_SUBREG VReg_128:$src, sub2),
-                  0, 0, 0, 0), sub0),
-    (V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
-                  (EXTRACT_SUBREG VReg_128:$src, sub1),
-                  (EXTRACT_SUBREG VReg_128:$src, sub2),
-                  0, 0, 0, 0), sub1),
-    (V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
-                  (EXTRACT_SUBREG VReg_128:$src, sub1),
-                  (EXTRACT_SUBREG VReg_128:$src, sub2),
-                  0, 0, 0, 0), sub2),
-    (V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
-                  (EXTRACT_SUBREG VReg_128:$src, sub1),
-                  (EXTRACT_SUBREG VReg_128:$src, sub2),
-                  0, 0, 0, 0), sub3)
+    (V_CUBETC_F32 (EXTRACT_SUBREG $src, sub0),
+                  (EXTRACT_SUBREG $src, sub1),
+                  (EXTRACT_SUBREG $src, sub2)),
+                   sub0),
+    (V_CUBESC_F32 (EXTRACT_SUBREG $src, sub0),
+                  (EXTRACT_SUBREG $src, sub1),
+                  (EXTRACT_SUBREG $src, sub2)),
+                   sub1),
+    (V_CUBEMA_F32 (EXTRACT_SUBREG $src, sub0),
+                  (EXTRACT_SUBREG $src, sub1),
+                  (EXTRACT_SUBREG $src, sub2)),
+                   sub2),
+    (V_CUBEID_F32 (EXTRACT_SUBREG $src, sub0),
+                  (EXTRACT_SUBREG $src, sub1),
+                  (EXTRACT_SUBREG $src, sub2)),
+                   sub3)
 >;
 
 def : Pat <
-  (i32 (sext (i1 SReg_64:$src0))),
-  (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0)
+  (i32 (sext i1:$src0)),
+  (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
 >;
 
 // 1. Offset as 8bit DWORD immediate
 def : Pat <
-  (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset),
-  (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset)
+  (int_SI_load_const v16i8:$sbase, IMM8bitDWORD:$offset),
+  (S_BUFFER_LOAD_DWORD_IMM $sbase, IMM8bitDWORD:$offset)
 >;
 
 // 2. Offset loaded in an 32bit SGPR
 def : Pat <
-  (int_SI_load_const SReg_128:$sbase, imm:$offset),
-  (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset))
+  (int_SI_load_const v16i8:$sbase, imm:$offset),
+  (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
 >;
 
 // 3. Offset in an 32Bit VGPR
 def : Pat <
-  (int_SI_load_const SReg_128:$sbase, VReg_32:$voff),
-  (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0)
+  (int_SI_load_const v16i8:$sbase, i32:$voff),
+  (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, $voff, $sbase, 0, 0, 0)
+>;
+
+// The multiplication scales from [0,1] to the unsigned integer range
+def : Pat <
+  (AMDGPUurecip i32:$src0),
+  (V_CVT_U32_F32_e32
+    (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
+                   (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
 >;
 
 /********** ================== **********/
 /**********   VOP3 Patterns    **********/
 /********** ================== **********/
 
-def : Pat <(f32 (fadd (fmul VSrc_32:$src0, VSrc_32:$src1), VSrc_32:$src2)),
-           (V_MAD_F32 VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
-            0, 0, 0, 0)>;
+def : Pat <
+  (f32 (fadd (fmul f32:$src0, f32:$src1), f32:$src2)),
+  (V_MAD_F32 $src0, $src1, $src2)
+>;
 
 /********** ================== **********/
 /**********   SMRD Patterns    **********/
 /********** ================== **********/
 
 multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+
   // 1. Offset as 8bit DWORD immediate
   def : Pat <
-    (constant_load (SIadd64bit32bit SReg_64:$sbase, IMM8bitDWORD:$offset)),
-    (vt (Instr_IMM SReg_64:$sbase, IMM8bitDWORD:$offset))
+    (constant_load (SIadd64bit32bit i64:$sbase, IMM8bitDWORD:$offset)),
+    (vt (Instr_IMM $sbase, IMM8bitDWORD:$offset))
   >;
 
   // 2. Offset loaded in an 32bit SGPR
   def : Pat <
-    (constant_load (SIadd64bit32bit SReg_64:$sbase, imm:$offset)),
-    (vt (Instr_SGPR SReg_64:$sbase, (S_MOV_B32 imm:$offset)))
+    (constant_load (SIadd64bit32bit i64:$sbase, imm:$offset)),
+    (vt (Instr_SGPR $sbase, (S_MOV_B32 imm:$offset)))
   >;
 
   // 3. No offset at all
   def : Pat <
-    (constant_load SReg_64:$sbase),
-    (vt (Instr_IMM SReg_64:$sbase, 0))
+    (constant_load i64:$sbase),
+    (vt (Instr_IMM $sbase, 0))
   >;
 }
 
@@ -1550,45 +1588,37 @@ defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
 /**********   Indirect adressing   **********/
 /********** ====================== **********/
 
-multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt,
-                                SI_INDIRECT_DST IndDst> {
+multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
+
   // 1. Extract with offset
   def : Pat<
-    (vector_extract (vt rc:$vec),
-      (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
-    ),
-    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off))
+    (vector_extract vt:$vec, (i64 (zext (add i32:$idx, imm:$off)))),
+    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
   >;
 
   // 2. Extract without offset
   def : Pat<
-    (vector_extract (vt rc:$vec),
-      (i64 (zext (i32 VReg_32:$idx)))
-    ),
-    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0))
+    (vector_extract vt:$vec, (i64 (zext i32:$idx))),
+    (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
   >;
 
   // 3. Insert with offset
   def : Pat<
-    (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
-      (i64 (zext (i32 (add VReg_32:$idx, imm:$off))))
-    ),
-    (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val))
+    (vector_insert vt:$vec, f32:$val, (i64 (zext (add i32:$idx, imm:$off)))),
+    (IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
   >;
 
   // 4. Insert without offset
   def : Pat<
-    (vector_insert (vt rc:$vec), (f32 VReg_32:$val),
-      (i64 (zext (i32 VReg_32:$idx)))
-    ),
-    (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val))
+    (vector_insert vt:$vec, f32:$val, (i64 (zext i32:$idx))),
+    (IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
   >;
 }
 
-defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
+defm : SI_INDIRECT_Pattern <v2f32, SI_INDIRECT_DST_V2>;
+defm : SI_INDIRECT_Pattern <v4f32, SI_INDIRECT_DST_V4>;
+defm : SI_INDIRECT_Pattern <v8f32, SI_INDIRECT_DST_V8>;
+defm : SI_INDIRECT_Pattern <v16f32, SI_INDIRECT_DST_V16>;
 
 /********** =============== **********/
 /**********   Conditions    **********/
@@ -1596,12 +1626,18 @@ defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>;
 
 def : Pat<
   (i1 (setcc f32:$src0, f32:$src1, SETO)),
-  (V_CMP_O_F32_e64 f32:$src0, f32:$src1)
+  (V_CMP_O_F32_e64 $src0, $src1)
 >;
 
 def : Pat<
   (i1 (setcc f32:$src0, f32:$src1, SETUO)),
-  (V_CMP_U_F32_e64 f32:$src0, f32:$src1)
+  (V_CMP_U_F32_e64 $src0, $src1)
 >;
 
+//============================================================================//
+// Miscellaneous Optimization Patterns
+//============================================================================//
+
+def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e32>;
+
 } // End isSI predicate
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 0af378e..224cd2f 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -19,12 +19,16 @@ let TargetPrefix = "SI", isTarget = 1 in {
   def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
 
-  class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
 
   def int_SI_sample : Sample;
   def int_SI_sampleb : Sample;
   def int_SI_samplel : Sample;
 
+  def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
   /* Interpolation Intrinsics */
 
   def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 4f14931..244d4c00 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -94,6 +94,12 @@ def VGPR_64 : RegisterTuples<[sub0, sub1],
                              [(add (trunc VGPR_32, 255)),
                               (add (shl VGPR_32, 1))]>;
 
+// VGPR 96-bit registers
+def VGPR_96 : RegisterTuples<[sub0, sub1, sub2],
+                             [(add (trunc VGPR_32, 254)),
+                              (add (shl VGPR_32, 1)),
+                              (add (shl VGPR_32, 2))]>;
+
 // VGPR 128-bit registers
 def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
                               [(add (trunc VGPR_32, 253)),
@@ -151,7 +157,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
   (add SGPR_64, VCCReg, EXECReg)
 >;
 
-def SReg_128 : RegisterClass<"AMDGPU", [v16i8], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v16i8, i128], 128, (add SGPR_128)>;
 
 def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
 
@@ -162,6 +168,10 @@ def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
 
 def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
 
+def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
+  let Size = 96;
+}
+
 def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
 
 def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
new file mode 100644
index 0000000..aac0e8d
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h
@@ -0,0 +1,62 @@
+//===-- SparcBaseInfo.h - Top level definitions for Sparc ---- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions
+// for the Sparc target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core code gen
+// types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCBASEINFO_H
+#define SPARCBASEINFO_H
+
+namespace llvm {
+
+/// SPII - This namespace holds target specific flags for instruction info.
+namespace SPII {
+
+/// Target Operand Flags. Sparc specific TargetFlags for MachineOperands and
+/// SDNodes.
+enum TOF {
+  MO_NO_FLAG,
+
+  // Extract the low 10 bits of an address.
+  // Assembler: %lo(addr)
+  MO_LO,
+
+  // Extract bits 31-10 of an address. Only for sethi.
+  // Assembler: %hi(addr) or %lm(addr)
+  MO_HI,
+
+  // Extract bits 43-22 of an adress. Only for sethi.
+  // Assembler: %h44(addr)
+  MO_H44,
+
+  // Extract bits 21-12 of an address.
+  // Assembler: %m44(addr)
+  MO_M44,
+
+  // Extract bits 11-0 of an address.
+  // Assembler: %l44(addr)
+  MO_L44,
+
+  // Extract bits 63-42 of an address. Only for sethi.
+  // Assembler: %hh(addr)
+  MO_HH,
+
+  // Extract bits 41-32 of an address.
+  // Assembler: %hm(addr)
+  MO_HM
+};
+
+} // end namespace SPII
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 7fdb0c3..1c64e1b 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -50,14 +50,42 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
   return X;
 }
 
+// Code models. Some only make sense for 64-bit code.
+//
+// SunCC  Reloc   CodeModel  Constraints
+// abs32  Static  Small      text+data+bss linked below 2^32 bytes
+// abs44  Static  Medium     text+data+bss linked below 2^44 bytes
+// abs64  Static  Large      text smaller than 2^31 bytes
+// pic13  PIC_    Small      GOT < 2^13 bytes
+// pic32  PIC_    Medium     GOT < 2^32 bytes
+//
+// All code models require that the text segment is smaller than 2GB.
+
 static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
                                                CodeModel::Model CM,
                                                CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
+
+  // The default 32-bit code model is abs32/pic32.
+  if (CM == CodeModel::Default)
+    CM = RM == Reloc::PIC_ ? CodeModel::Medium : CodeModel::Small;
+
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
+static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                                 CodeModel::Model CM,
+                                                 CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+
+  // The default 64-bit code model is abs44/pic32.
+  if (CM == CodeModel::Default)
+    CM = CodeModel::Medium;
+
+  X->InitMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
 extern "C" void LLVMInitializeSparcTargetMC() {
   // Register the MC asm info.
   RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget);
@@ -67,7 +95,7 @@ extern "C" void LLVMInitializeSparcTargetMC() {
   TargetRegistry::RegisterMCCodeGenInfo(TheSparcTarget,
                                        createSparcMCCodeGenInfo);
   TargetRegistry::RegisterMCCodeGenInfo(TheSparcV9Target,
-                                       createSparcMCCodeGenInfo);
+                                       createSparcV9MCCodeGenInfo);
 
   // Register the MC instruction info.
   TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index e14b3cb..108eb90 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -16,6 +16,7 @@
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
 #include "SparcTargetMachine.h"
+#include "MCTargetDesc/SparcBaseInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -72,15 +73,39 @@ namespace {
 void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                    raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand (opNum);
-  bool CloseParen = false;
-  if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
-    O << "%hi(";
-    CloseParen = true;
-  } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
-             !MO.isReg() && !MO.isImm()) {
-    O << "%lo(";
-    CloseParen = true;
+  unsigned TF = MO.getTargetFlags();
+#ifndef NDEBUG
+  // Verify the target flags.
+  if (MO.isGlobal() || MO.isSymbol() || MO.isCPI()) {
+    if (MI->getOpcode() == SP::CALL)
+      assert(TF == SPII::MO_NO_FLAG &&
+             "Cannot handle target flags on call address");
+    else if (MI->getOpcode() == SP::SETHIi)
+      assert((TF == SPII::MO_HI || TF == SPII::MO_H44 || TF == SPII::MO_HH) &&
+             "Invalid target flags for address operand on sethi");
+    else
+      assert((TF == SPII::MO_LO || TF == SPII::MO_M44 || TF == SPII::MO_L44 ||
+              TF == SPII::MO_HM) &&
+             "Invalid target flags for small address operand");
   }
+#endif
+
+  bool CloseParen = true;
+  switch (TF) {
+  default:
+      llvm_unreachable("Unknown target flags on operand");
+  case SPII::MO_NO_FLAG:
+    CloseParen = false;
+    break;
+  case SPII::MO_LO:  O << "%lo(";  break;
+  case SPII::MO_HI:  O << "%hi(";  break;
+  case SPII::MO_H44: O << "%h44("; break;
+  case SPII::MO_M44: O << "%m44("; break;
+  case SPII::MO_L44: O << "%l44("; break;
+  case SPII::MO_HH:  O << "%hh(";  break;
+  case SPII::MO_HM:  O << "%hm(";  break;
+  }
+
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
     O << "%" << StringRef(getRegisterName(MO.getReg())).lower();
@@ -127,14 +152,7 @@ void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
     return;   // don't print "+0"
 
   O << "+";
-  if (MI->getOperand(opNum+1).isGlobal() ||
-      MI->getOperand(opNum+1).isCPI()) {
-    O << "%lo(";
-    printOperand(MI, opNum+1, O);
-    O << ")";
-  } else {
-    printOperand(MI, opNum+1, O);
-  }
+  printOperand(MI, opNum+1, O);
 }
 
 bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
index b38ac61..54784e0 100644
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -12,25 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
-// Return Value Calling Conventions
+// SPARC v8 32-bit.
 //===----------------------------------------------------------------------===//
 
-// Sparc 32-bit C return-value convention.
-def RetCC_Sparc32 : CallingConv<[
-  CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
-  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
-  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
-]>;
-
-// Sparc 64-bit C return-value convention.
-def RetCC_Sparc64 : CallingConv<[
-  CCIfType<[i32], CCPromoteToType<i64>>,
-  CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
-  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
-  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
-]>;
-
-// Sparc 32-bit C Calling convention.
 def CC_Sparc32 : CallingConv<[
   //Custom assign SRet to [sp+64].
   CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
@@ -43,14 +27,93 @@ def CC_Sparc32 : CallingConv<[
   CCAssignToStack<4, 4>
 ]>;
 
-// Sparc 64-bit C Calling convention.
+def RetCC_Sparc32 : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// SPARC v9 64-bit.
+//===----------------------------------------------------------------------===//
+//
+// The 64-bit ABI conceptually assigns all function arguments to a parameter
+// array starting at [%fp+BIAS+128] in the callee's stack frame. All arguments
+// occupy a multiple of 8 bytes in the array. Integer arguments are extended to
+// 64 bits by the caller. Floats are right-aligned in their 8-byte slot, the
+// first 4 bytes in the slot are undefined.
+//
+// The integer registers %i0 to %i5 shadow the first 48 bytes of the parameter
+// array at fixed offsets. Integer arguments are promoted to registers when
+// possible.
+//
+// The floating point registers %f0 to %f31 shadow the first 128 bytes of the
+// parameter array at fixed offsets. Float and double parameters are promoted
+// to these registers when possible.
+//
+// Structs up to 16 bytes in size are passed by value. They are right-aligned
+// in one or two 8-byte slots in the parameter array. Struct members are
+// promoted to both floating point and integer registers when possible. A
+// struct containing two floats would thus be passed in %f0 and %f1, while two
+// float function arguments would occupy 8 bytes each, and be passed in %f1 and
+// %f3.
+//
+// When a struct { int, float } is passed by value, the int goes in the high
+// bits of an integer register while the float goes in a floating point
+// register.
+//
+// The difference is encoded in LLVM IR using the inreg atttribute on function
+// arguments:
+//
+//   C:   void f(float, float);
+//   IR:  declare void f(float %f1, float %f3)
+//
+//   C:   void f(struct { float f0, f1; });
+//   IR:  declare void f(float inreg %f0, float inreg %f1)
+//
+//   C:   void f(int, float);
+//   IR:  declare void f(int signext %i0, float %f3)
+//
+//   C:   void f(struct { int i0high; float f1; });
+//   IR:  declare void f(i32 inreg %i0high, float inreg %f1)
+//
+// Two ints in a struct are simply coerced to i64:
+//
+//   C:   void f(struct { int i0high, i0low; });
+//   IR:  declare void f(i64 %i0.coerced)
+//
+// The frontend and backend divide the task of producing ABI compliant code for
+// C functions. The C frontend will:
+//
+//  - Annotate integer arguments with zeroext or signext attributes.
+//
+//  - Split structs into one or two 64-bit sized chunks, or 32-bit chunks with
+//    inreg attributes.
+//
+//  - Pass structs larger than 16 bytes indirectly with an explicit pointer
+//    argument. The byval attribute is not used.
+//
+// The backend will:
+//
+//  - Assign all arguments to 64-bit aligned stack slots, 32-bits for inreg.
+//
+//  - Promote to integer or floating point registers depending on type.
+//
+// Function return values are passed exactly like function arguments, except a
+// struct up to 32 bytes in size can be returned in registers.
+
+// Function arguments AND return values.
 def CC_Sparc64 : CallingConv<[
+  // The frontend uses the inreg flag to indicate i32 and float arguments from
+  // structs. These arguments are not promoted to 64 bits, but they can still
+  // be assigned to integer and float registers.
+  CCIfInReg<CCIfType<[i32, f32], CCCustom<"CC_Sparc64_Half">>>,
+
   // All integers are promoted to i64 by the caller.
   CCIfType<[i32], CCPromoteToType<i64>>,
-  // Integer arguments get passed in integer registers if there is space.
-  CCIfType<[i64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
-  // FIXME: Floating point arguments.
 
-  // Alternatively, they are assigned to the stack in 8-byte aligned units.
-  CCAssignToStack<8, 8>
+  // Custom assignment is required because stack space is reserved for all
+  // arguments whether they are passed in registers or not.
+  CCCustom<"CC_Sparc64_Full">
 ]>;
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index a0dae6e..7874240 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -37,18 +37,27 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
   // Get the number of bytes to allocate from the FrameInfo
   int NumBytes = (int) MFI->getStackSize();
 
-  // Emit the correct save instruction based on the number of bytes in
-  // the frame. Minimum stack frame size according to V8 ABI is:
-  //   16 words for register window spill
-  //    1 word for address of returned aggregate-value
-  // +  6 words for passing parameters on the stack
-  // ----------
-  //   23 words * 4 bytes per word = 92 bytes
-  NumBytes += 92;
+  if (SubTarget.is64Bit()) {
+    // All 64-bit stack frames must be 16-byte aligned, and must reserve space
+    // for spilling the 16 window registers at %sp+BIAS..%sp+BIAS+128.
+    NumBytes += 128;
+    // Frames with calls must also reserve space for 6 outgoing arguments
+    // whether they are used or not. LowerCall_64 takes care of that.
+    assert(NumBytes % 16 == 0 && "Stack size not 16-byte aligned");
+  } else {
+    // Emit the correct save instruction based on the number of bytes in
+    // the frame. Minimum stack frame size according to V8 ABI is:
+    //   16 words for register window spill
+    //    1 word for address of returned aggregate-value
+    // +  6 words for passing parameters on the stack
+    // ----------
+    //   23 words * 4 bytes per word = 92 bytes
+    NumBytes += 92;
 
-  // Round up to next doubleword boundary -- a double-word boundary
-  // is required by the ABI.
-  NumBytes = (NumBytes + 7) & ~7;
+    // Round up to next doubleword boundary -- a double-word boundary
+    // is required by the ABI.
+    NumBytes = RoundUpToAlignment(NumBytes, 8);
+  }
   NumBytes = -NumBytes;
 
   if (NumBytes >= -4096) {
@@ -70,15 +79,18 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
 void SparcFrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  MachineInstr &MI = *I;
-  DebugLoc dl = MI.getDebugLoc();
-  int Size = MI.getOperand(0).getImm();
-  if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
-    Size = -Size;
-  const SparcInstrInfo &TII =
-    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
-  if (Size)
-    BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+  if (!hasReservedCallFrame(MF)) {
+    MachineInstr &MI = *I;
+    DebugLoc DL = MI.getDebugLoc();
+    int Size = MI.getOperand(0).getImm();
+    if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+      Size = -Size;
+    const SparcInstrInfo &TII =
+      *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+    if (Size)
+      BuildMI(MBB, I, DL, TII.get(SP::ADDri), SP::O6).addReg(SP::O6)
+        .addImm(Size);
+  }
   MBB.erase(I);
 }
 
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 464233e..c375662 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -22,10 +22,12 @@ namespace llvm {
   class SparcSubtarget;
 
 class SparcFrameLowering : public TargetFrameLowering {
+  const SparcSubtarget &SubTarget;
 public:
-  explicit SparcFrameLowering(const SparcSubtarget &/*sti*/)
-    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0) {
-  }
+  explicit SparcFrameLowering(const SparcSubtarget &ST)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+                          ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8),
+      SubTarget(ST) {}
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 5fa545d..a709685 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -73,7 +73,7 @@ SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
 bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
                                      SDValue &Base, SDValue &Offset) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI.getPointerTy());
     Offset = CurDAG->getTargetConstant(0, MVT::i32);
     return true;
   }
@@ -87,7 +87,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
         if (FrameIndexSDNode *FIN =
                 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
           // Constant offset from frame ref.
-          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
+                                             TLI.getPointerTy());
         } else {
           Base = Addr.getOperand(0);
         }
@@ -130,7 +131,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
   }
 
   R1 = Addr;
-  R2 = CurDAG->getRegister(SP::G0, MVT::i32);
+  R2 = CurDAG->getRegister(SP::G0, TLI.getPointerTy());
   return true;
 }
 
@@ -146,6 +147,9 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
 
   case ISD::SDIV:
   case ISD::UDIV: {
+    // sdivx / udivx handle 64-bit divides.
+    if (N->getValueType(0) == MVT::i64)
+      break;
     // FIXME: should use a custom expander to expose the SRA to the dag.
     SDValue DivLHS = N->getOperand(0);
     SDValue DivRHS = N->getOperand(1);
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 325f134..3863e2c 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "SparcISelLowering.h"
 #include "SparcMachineFunctionInfo.h"
 #include "SparcTargetMachine.h"
+#include "MCTargetDesc/SparcBaseInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -74,27 +75,118 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
   return true;
 }
 
+// Allocate a full-sized argument for the 64-bit ABI.
+static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
+                            MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                            ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  assert((LocVT == MVT::f32 || LocVT.getSizeInBits() == 64) &&
+         "Can't handle non-64 bits locations");
+
+  // Stack space is allocated for all arguments starting from [%fp+BIAS+128].
+  unsigned Offset = State.AllocateStack(8, 8);
+  unsigned Reg = 0;
+
+  if (LocVT == MVT::i64 && Offset < 6*8)
+    // Promote integers to %i0-%i5.
+    Reg = SP::I0 + Offset/8;
+  else if (LocVT == MVT::f64 && Offset < 16*8)
+    // Promote doubles to %d0-%d30. (Which LLVM calls D0-D15).
+    Reg = SP::D0 + Offset/8;
+  else if (LocVT == MVT::f32 && Offset < 16*8)
+    // Promote floats to %f1, %f3, ...
+    Reg = SP::F1 + Offset/4;
+
+  // Promote to register when possible, otherwise use the stack slot.
+  if (Reg) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return true;
+  }
+
+  // This argument goes on the stack in an 8-byte slot.
+  // When passing floats, LocVT is smaller than 8 bytes. Adjust the offset to
+  // the right-aligned float. The first 4 bytes of the stack slot are undefined.
+  if (LocVT == MVT::f32)
+    Offset += 4;
+
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return true;
+}
+
+// Allocate a half-sized argument for the 64-bit ABI.
+//
+// This is used when passing { float, int } structs by value in registers.
+static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT,
+                            MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                            ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations");
+  unsigned Offset = State.AllocateStack(4, 4);
+
+  if (LocVT == MVT::f32 && Offset < 16*8) {
+    // Promote floats to %f0-%f31.
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, SP::F0 + Offset/4,
+                                     LocVT, LocInfo));
+    return true;
+  }
+
+  if (LocVT == MVT::i32 && Offset < 6*8) {
+    // Promote integers to %i0-%i5, using half the register.
+    unsigned Reg = SP::I0 + Offset/8;
+    LocVT = MVT::i64;
+    LocInfo = CCValAssign::AExt;
+
+    // Set the Custom bit if this i32 goes in the high bits of a register.
+    if (Offset % 8 == 0)
+      State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg,
+                                             LocVT, LocInfo));
+    else
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return true;
+  }
+
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return true;
+}
+
 #include "SparcGenCallingConv.inc"
 
+// The calling conventions in SparcCallingConv.td are described in terms of the
+// callee's register window. This function translates registers to the
+// corresponding caller window %o register.
+static unsigned toCallerWindow(unsigned Reg) {
+  assert(SP::I0 + 7 == SP::I7 && SP::O0 + 7 == SP::O7 && "Unexpected enum");
+  if (Reg >= SP::I0 && Reg <= SP::I7)
+    return Reg - SP::I0 + SP::O0;
+  return Reg;
+}
+
 SDValue
 SparcTargetLowering::LowerReturn(SDValue Chain,
-                                 CallingConv::ID CallConv, bool isVarArg,
+                                 CallingConv::ID CallConv, bool IsVarArg,
                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
                                  const SmallVectorImpl<SDValue> &OutVals,
-                                 DebugLoc dl, SelectionDAG &DAG) const {
+                                 DebugLoc DL, SelectionDAG &DAG) const {
+  if (Subtarget->is64Bit())
+    return LowerReturn_64(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
+  return LowerReturn_32(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
+}
 
+SDValue
+SparcTargetLowering::LowerReturn_32(SDValue Chain,
+                                    CallingConv::ID CallConv, bool IsVarArg,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    DebugLoc DL, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
 
   // CCValAssign - represent the assignment of the return value to locations.
   SmallVector<CCValAssign, 16> RVLocs;
 
   // CCState - Info about the registers and stack slot.
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
                  DAG.getTarget(), RVLocs, *DAG.getContext());
 
-  // Analize return values.
-  CCInfo.AnalyzeReturn(Outs, Subtarget->is64Bit() ?
-                             RetCC_Sparc64 : RetCC_Sparc32);
+  // Analyze return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
 
   SDValue Flag;
   SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -106,7 +198,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(),
                              OutVals[i], Flag);
 
     // Guarantee that all emitted copies are stuck together with flags.
@@ -121,8 +213,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
     unsigned Reg = SFI->getSRetReturnReg();
     if (!Reg)
       llvm_unreachable("sret virtual register not created in the entry block");
-    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
-    Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
+    SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+    Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Flag);
     Flag = Chain.getValue(1);
     RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy()));
     RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
@@ -135,7 +227,85 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
   if (Flag.getNode())
     RetOps.push_back(Flag);
 
-  return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other,
+  return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other,
+                     &RetOps[0], RetOps.size());
+}
+
+// Lower return values for the 64-bit ABI.
+// Return values are passed the exactly the same way as function arguments.
+SDValue
+SparcTargetLowering::LowerReturn_64(SDValue Chain,
+                                    CallingConv::ID CallConv, bool IsVarArg,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    DebugLoc DL, SelectionDAG &DAG) const {
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+                 DAG.getTarget(), RVLocs, *DAG.getContext());
+
+  // Analyze return values.
+  CCInfo.AnalyzeReturn(Outs, CC_Sparc64);
+
+  SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+
+  // The second operand on the return instruction is the return address offset.
+  // The return address is always %i7+8 with the 64-bit ABI.
+  RetOps.push_back(DAG.getConstant(8, MVT::i32));
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    SDValue OutVal = OutVals[i];
+
+    // Integer return values must be sign or zero extended by the callee.
+    switch (VA.getLocInfo()) {
+    case CCValAssign::SExt:
+      OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
+      break;
+    case CCValAssign::ZExt:
+      OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
+      break;
+    case CCValAssign::AExt:
+      OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
+    default:
+      break;
+    }
+
+    // The custom bit on an i32 return value indicates that it should be passed
+    // in the high bits of the register.
+    if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
+      OutVal = DAG.getNode(ISD::SHL, DL, MVT::i64, OutVal,
+                           DAG.getConstant(32, MVT::i32));
+
+      // The next value may go in the low bits of the same register.
+      // Handle both at once.
+      if (i+1 < RVLocs.size() && RVLocs[i+1].getLocReg() == VA.getLocReg()) {
+        SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, OutVals[i+1]);
+        OutVal = DAG.getNode(ISD::OR, DL, MVT::i64, OutVal, NV);
+        // Skip the next value, it's already done.
+        ++i;
+      }
+    }
+
+    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
+
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+  }
+
+  RetOps[0] = Chain;  // Update chain.
+
+  // Add the flag if we have it.
+  if (Flag.getNode())
+    RetOps.push_back(Flag);
+
+  return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other,
                      &RetOps[0], RetOps.size());
 }
 
@@ -373,6 +543,9 @@ LowerFormalArguments_64(SDValue Chain,
                  getTargetMachine(), ArgLocs, *DAG.getContext());
   CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64);
 
+  // The argument array begins at %fp+BIAS+128, after the register save area.
+  const unsigned ArgArea = 128;
+
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
     if (VA.isRegLoc()) {
@@ -384,6 +557,11 @@ LowerFormalArguments_64(SDValue Chain,
                                    getRegClassFor(VA.getLocVT()));
       SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
 
+      // Get the high bits for i32 struct elements.
+      if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+        Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
+                          DAG.getConstant(32, MVT::i32));
+
       // The caller promoted the argument, so insert an Assert?ext SDNode so we
       // won't promote the value again in this function.
       switch (VA.getLocInfo()) {
@@ -409,13 +587,71 @@ LowerFormalArguments_64(SDValue Chain,
 
     // The registers are exhausted. This argument was passed on the stack.
     assert(VA.isMemLoc());
+    // The CC_Sparc64_Full/Half functions compute stack offsets relative to the
+    // beginning of the arguments area at %fp+BIAS+128.
+    unsigned Offset = VA.getLocMemOffset() + ArgArea;
+    unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
+    // Adjust offset for extended arguments, SPARC is big-endian.
+    // The caller will have written the full slot with extended bytes, but we
+    // prefer our own extending loads.
+    if (VA.isExtInLoc())
+      Offset += 8 - ValSize;
+    int FI = MF.getFrameInfo()->CreateFixedObject(ValSize, Offset, true);
+    InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain,
+                                 DAG.getFrameIndex(FI, getPointerTy()),
+                                 MachinePointerInfo::getFixedStack(FI),
+                                 false, false, false, 0));
   }
+
+  if (!IsVarArg)
+    return Chain;
+
+  // This function takes variable arguments, some of which may have been passed
+  // in registers %i0-%i5. Variable floating point arguments are never passed
+  // in floating point registers. They go on %i0-%i5 or on the stack like
+  // integer arguments.
+  //
+  // The va_start intrinsic needs to know the offset to the first variable
+  // argument.
+  unsigned ArgOffset = CCInfo.getNextStackOffset();
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+  // Skip the 128 bytes of register save area.
+  FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgArea +
+                                  Subtarget->getStackPointerBias());
+
+  // Save the variable arguments that were passed in registers.
+  // The caller is required to reserve stack space for 6 arguments regardless
+  // of how many arguments were actually passed.
+  SmallVector<SDValue, 8> OutChains;
+  for (; ArgOffset < 6*8; ArgOffset += 8) {
+    unsigned VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass);
+    SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+    int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true);
+    OutChains.push_back(DAG.getStore(Chain, DL, VArg,
+                                     DAG.getFrameIndex(FI, getPointerTy()),
+                                     MachinePointerInfo::getFixedStack(FI),
+                                     false, false, 0));
+  }
+
+  if (!OutChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                        &OutChains[0], OutChains.size());
+
   return Chain;
 }
 
 SDValue
 SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                SmallVectorImpl<SDValue> &InVals) const {
+  if (Subtarget->is64Bit())
+    return LowerCall_64(CLI, InVals);
+  return LowerCall_32(CLI, InVals);
+}
+
+// Lower a call for the 32-bit ABI.
+SDValue
+SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
+                                  SmallVectorImpl<SDValue> &InVals) const {
   SelectionDAG &DAG                     = CLI.DAG;
   DebugLoc &dl                          = CLI.DL;
   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
@@ -618,11 +854,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // stuck together.
   SDValue InFlag;
   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    unsigned Reg = RegsToPass[i].first;
-    // Remap I0->I7 -> O0->O7.
-    if (Reg >= SP::I0 && Reg <= SP::I7)
-      Reg = Reg-SP::I0+SP::O0;
-
+    unsigned Reg = toCallerWindow(RegsToPass[i].first);
     Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
     InFlag = Chain.getValue(1);
   }
@@ -644,13 +876,9 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   Ops.push_back(Callee);
   if (hasStructRetAttr)
     Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    unsigned Reg = RegsToPass[i].first;
-    if (Reg >= SP::I0 && Reg <= SP::I7)
-      Reg = Reg-SP::I0+SP::O0;
-
-    Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
-  }
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(toCallerWindow(RegsToPass[i].first),
+                                  RegsToPass[i].second.getValueType()));
   if (InFlag.getNode())
     Ops.push_back(InFlag);
 
@@ -670,13 +898,7 @@ SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    unsigned Reg = RVLocs[i].getLocReg();
-
-    // Remap I0->I7 -> O0->O7.
-    if (Reg >= SP::I0 && Reg <= SP::I7)
-      Reg = Reg-SP::I0+SP::O0;
-
-    Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+    Chain = DAG.getCopyFromReg(Chain, dl, toCallerWindow(RVLocs[i].getLocReg()),
                                RVLocs[i].getValVT(), InFlag).getValue(1);
     InFlag = Chain.getValue(2);
     InVals.push_back(Chain.getValue(0));
@@ -709,6 +931,259 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
   return getDataLayout()->getTypeAllocSize(ElementTy);
 }
 
+
+// Fixup floating point arguments in the ... part of a varargs call.
+//
+// The SPARC v9 ABI requires that floating point arguments are treated the same
+// as integers when calling a varargs function. This does not apply to the
+// fixed arguments that are part of the function's prototype.
+//
+// This function post-processes a CCValAssign array created by
+// AnalyzeCallOperands().
+static void fixupVariableFloatArgs(SmallVectorImpl<CCValAssign> &ArgLocs,
+                                   ArrayRef<ISD::OutputArg> Outs) {
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    const CCValAssign &VA = ArgLocs[i];
+    // FIXME: What about f32 arguments? C promotes them to f64 when calling
+    // varargs functions.
+    if (!VA.isRegLoc() || VA.getLocVT() != MVT::f64)
+      continue;
+    // The fixed arguments to a varargs function still go in FP registers.
+    if (Outs[VA.getValNo()].IsFixed)
+      continue;
+
+    // This floating point argument should be reassigned.
+    CCValAssign NewVA;
+
+    // Determine the offset into the argument array.
+    unsigned Offset = 8 * (VA.getLocReg() - SP::D0);
+    assert(Offset < 16*8 && "Offset out of range, bad register enum?");
+
+    if (Offset < 6*8) {
+      // This argument should go in %i0-%i5.
+      unsigned IReg = SP::I0 + Offset/8;
+      // Full register, just bitconvert into i64.
+      NewVA = CCValAssign::getReg(VA.getValNo(), VA.getValVT(),
+                                  IReg, MVT::i64, CCValAssign::BCvt);
+    } else {
+      // This needs to go to memory, we're out of integer registers.
+      NewVA = CCValAssign::getMem(VA.getValNo(), VA.getValVT(),
+                                  Offset, VA.getLocVT(), VA.getLocInfo());
+    }
+    ArgLocs[i] = NewVA;
+  }
+}
+
+// Lower a call for the 64-bit ABI.
+SDValue
+SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
+                                  SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  DebugLoc DL = CLI.DL;
+  SDValue Chain = CLI.Chain;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
+                 DAG.getTarget(), ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64);
+
+  // Get the size of the outgoing arguments stack space requirement.
+  // The stack offset computed by CC_Sparc64 includes all arguments.
+  // Called functions expect 6 argument words to exist in the stack frame, used
+  // or not.
+  unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset());
+
+  // Keep stack frames 16-byte aligned.
+  ArgsSize = RoundUpToAlignment(ArgsSize, 16);
+
+  // Varargs calls require special treatment.
+  if (CLI.IsVarArg)
+    fixupVariableFloatArgs(ArgLocs, CLI.Outs);
+
+  // Adjust the stack pointer to make room for the arguments.
+  // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
+  // with more than 6 arguments.
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+
+  // Collect the set of registers to pass to the function and their values.
+  // This will be emitted as a sequence of CopyToReg nodes glued to the call
+  // instruction.
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+  // Collect chains from all the memory opeations that copy arguments to the
+  // stack. They must follow the stack pointer adjustment above and precede the
+  // call instruction itself.
+  SmallVector<SDValue, 8> MemOpChains;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    const CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = CLI.OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default:
+      llvm_unreachable("Unknown location info!");
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
+      break;
+    }
+
+    if (VA.isRegLoc()) {
+      // The custom bit on an i32 return value indicates that it should be
+      // passed in the high bits of the register.
+      if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
+        Arg = DAG.getNode(ISD::SHL, DL, MVT::i64, Arg,
+                          DAG.getConstant(32, MVT::i32));
+
+        // The next value may go in the low bits of the same register.
+        // Handle both at once.
+        if (i+1 < ArgLocs.size() && ArgLocs[i+1].isRegLoc() &&
+            ArgLocs[i+1].getLocReg() == VA.getLocReg()) {
+          SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64,
+                                   CLI.OutVals[i+1]);
+          Arg = DAG.getNode(ISD::OR, DL, MVT::i64, Arg, NV);
+          // Skip the next value, it's already done.
+          ++i;
+        }
+      }
+      RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), Arg));
+      continue;
+    }
+
+    assert(VA.isMemLoc());
+
+    // Create a store off the stack pointer for this argument.
+    SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy());
+    // The argument area starts at %fp+BIAS+128 in the callee frame,
+    // %sp+BIAS+128 in ours.
+    SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() +
+                                           Subtarget->getStackPointerBias() +
+                                           128);
+    PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+    MemOpChains.push_back(DAG.getStore(Chain, DL, Arg, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+  }
+
+  // Emit all stores, make sure they occur before the call.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of CopyToReg nodes glued together with token chain and
+  // glue operands which copy the outgoing args into registers. The InGlue is
+  // necessary since all emitted instructions must be stuck together in order
+  // to pass the live physical registers.
+  SDValue InGlue;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, DL,
+                             RegsToPass[i].first, RegsToPass[i].second, InGlue);
+    InGlue = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  SDValue Callee = CLI.Callee;
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy());
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
+
+  // Build the operands for the call instruction itself.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  // Make sure the CopyToReg nodes are glued to the call instruction which
+  // consumes the registers.
+  if (InGlue.getNode())
+    Ops.push_back(InGlue);
+
+  // Now the call itself.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
+  InGlue = Chain.getValue(1);
+
+  // Revert the stack pointer immediately after the call.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+                             DAG.getIntPtrConstant(0, true), InGlue);
+  InGlue = Chain.getValue(1);
+
+  // Now extract the return values. This is more or less the same as
+  // LowerFormalArguments_64.
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
+                 DAG.getTarget(), RVLocs, *DAG.getContext());
+  RVInfo.AnalyzeCallResult(CLI.Ins, CC_Sparc64);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    unsigned Reg = toCallerWindow(VA.getLocReg());
+
+    // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
+    // reside in the same register in the high and low bits. Reuse the
+    // CopyFromReg previous node to avoid duplicate copies.
+    SDValue RV;
+    if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
+      if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
+        RV = Chain.getValue(0);
+
+    // But usually we'll create a new CopyFromReg for a different register.
+    if (!RV.getNode()) {
+      RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
+      Chain = RV.getValue(1);
+      InGlue = Chain.getValue(2);
+    }
+
+    // Get the high bits for i32 struct elements.
+    if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+      RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
+                       DAG.getConstant(32, MVT::i32));
+
+    // The callee promoted the return value, so insert an Assert?ext SDNode so
+    // we won't promote the value again in this function.
+    switch (VA.getLocInfo()) {
+    case CCValAssign::SExt:
+      RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
+                       DAG.getValueType(VA.getValVT()));
+      break;
+    case CCValAssign::ZExt:
+      RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
+                       DAG.getValueType(VA.getValVT()));
+      break;
+    default:
+      break;
+    }
+
+    // Truncate the register down to the return value type.
+    if (VA.isExtInLoc())
+      RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
+
+    InVals.push_back(RV);
+  }
+
+  return Chain;
+}
+
 //===----------------------------------------------------------------------===//
 // TargetLowering Implementation
 //===----------------------------------------------------------------------===//
@@ -778,9 +1253,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 
   // Custom legalize GlobalAddress nodes into LO/HI parts.
-  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
-  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
-  setOperationAction(ISD::ConstantPool , MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom);
+  setOperationAction(ISD::GlobalTLSAddress, getPointerTy(), Custom);
+  setOperationAction(ISD::ConstantPool, getPointerTy(), Custom);
 
   // Sparc doesn't have sext_inreg, replace them with shl/sra
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -831,7 +1306,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
 
   // FIXME: There are instructions available for ATOMIC_FENCE
   // on SparcV8 and later.
-  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
 
   setOperationAction(ISD::FSIN , MVT::f64, Expand);
@@ -965,46 +1439,89 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
   }
 }
 
+// Convert to a target node and set target flags.
+SDValue SparcTargetLowering::withTargetFlags(SDValue Op, unsigned TF,
+                                             SelectionDAG &DAG) const {
+  if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
+    return DAG.getTargetGlobalAddress(GA->getGlobal(),
+                                      GA->getDebugLoc(),
+                                      GA->getValueType(0),
+                                      GA->getOffset(), TF);
+
+  if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
+    return DAG.getTargetConstantPool(CP->getConstVal(),
+                                     CP->getValueType(0),
+                                     CP->getAlignment(),
+                                     CP->getOffset(), TF);
+
+  if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
+    return DAG.getTargetExternalSymbol(ES->getSymbol(),
+                                       ES->getValueType(0), TF);
+
+  llvm_unreachable("Unhandled address SDNode");
+}
+
+// Split Op into high and low parts according to HiTF and LoTF.
+// Return an ADD node combining the parts.
+SDValue SparcTargetLowering::makeHiLoPair(SDValue Op,
+                                          unsigned HiTF, unsigned LoTF,
+                                          SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  SDValue Hi = DAG.getNode(SPISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
+  SDValue Lo = DAG.getNode(SPISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
+  return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
+}
+
+// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
+// or ExternalSymbol SDNode.
+SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  EVT VT = getPointerTy();
+
+  // Handle PIC mode first.
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    // This is the pic32 code model, the GOT is known to be smaller than 4GB.
+    SDValue HiLo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+    SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, VT);
+    SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo);
+    return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr,
+                       MachinePointerInfo::getGOT(), false, false, false, 0);
+  }
+
+  // This is one of the absolute code models.
+  switch(getTargetMachine().getCodeModel()) {
+  default:
+    llvm_unreachable("Unsupported absolute code model");
+  case CodeModel::Small:
+    // abs32.
+    return makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+  case CodeModel::Medium: {
+    // abs44.
+    SDValue H44 = makeHiLoPair(Op, SPII::MO_H44, SPII::MO_M44, DAG);
+    H44 = DAG.getNode(ISD::SHL, DL, VT, H44, DAG.getConstant(12, MVT::i32));
+    SDValue L44 = withTargetFlags(Op, SPII::MO_L44, DAG);
+    L44 = DAG.getNode(SPISD::Lo, DL, VT, L44);
+    return DAG.getNode(ISD::ADD, DL, VT, H44, L44);
+  }
+  case CodeModel::Large: {
+    // abs64.
+    SDValue Hi = makeHiLoPair(Op, SPII::MO_HH, SPII::MO_HM, DAG);
+    Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, DAG.getConstant(32, MVT::i32));
+    SDValue Lo = makeHiLoPair(Op, SPII::MO_HI, SPII::MO_LO, DAG);
+    return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
+  }
+  }
+}
+
 SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
                                                 SelectionDAG &DAG) const {
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  // FIXME there isn't really any debug info here
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
-  SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
-  SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
-
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
-    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-
-  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
-                                   getPointerTy());
-  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                GlobalBase, RelAddr);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     AbsAddr, MachinePointerInfo(), false, false, false, 0);
+  return makeAddress(Op, DAG);
 }
 
 SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
                                                SelectionDAG &DAG) const {
-  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
-  // FIXME there isn't really any debug info here
-  DebugLoc dl = Op.getDebugLoc();
-  const Constant *C = N->getConstVal();
-  SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
-  SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
-  SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
-    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-
-  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
-                                   getPointerTy());
-  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
-                                GlobalBase, RelAddr);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     AbsAddr, MachinePointerInfo(), false, false, false, 0);
+  return makeAddress(Op, DAG);
 }
 
 static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -1092,14 +1609,13 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
 
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
-  DebugLoc dl = Op.getDebugLoc();
+  DebugLoc DL = Op.getDebugLoc();
   SDValue Offset =
-    DAG.getNode(ISD::ADD, dl, MVT::i32,
-                DAG.getRegister(SP::I6, MVT::i32),
-                DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
-                                MVT::i32));
+    DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(),
+                DAG.getRegister(SP::I6, TLI.getPointerTy()),
+                DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset()));
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1),
+  return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
                       MachinePointerInfo(SV), false, false, 0);
 }
 
@@ -1108,33 +1624,22 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
   EVT VT = Node->getValueType(0);
   SDValue InChain = Node->getOperand(0);
   SDValue VAListPtr = Node->getOperand(1);
+  EVT PtrVT = VAListPtr.getValueType();
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
-  DebugLoc dl = Node->getDebugLoc();
-  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
+  DebugLoc DL = Node->getDebugLoc();
+  SDValue VAList = DAG.getLoad(PtrVT, DL, InChain, VAListPtr,
                                MachinePointerInfo(SV), false, false, false, 0);
-  // Increment the pointer, VAList, to the next vaarg
-  SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
-                                  DAG.getConstant(VT.getSizeInBits()/8,
-                                                  MVT::i32));
-  // Store the incremented VAList to the legalized pointer
-  InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
+  // Increment the pointer, VAList, to the next vaarg.
+  SDValue NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+                                DAG.getIntPtrConstant(VT.getSizeInBits()/8));
+  // Store the incremented VAList to the legalized pointer.
+  InChain = DAG.getStore(VAList.getValue(1), DL, NextPtr,
                          VAListPtr, MachinePointerInfo(SV), false, false, 0);
-  // Load the actual argument out of the pointer VAList, unless this is an
-  // f64 load.
-  if (VT != MVT::f64)
-    return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
-                       false, false, false, 0);
-
-  // Otherwise, load it as i64, then do a bitconvert.
-  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
-                          false, false, false, 0);
-
-  // Bit-Convert the value to f64.
-  SDValue Ops[2] = {
-    DAG.getNode(ISD::BITCAST, dl, MVT::f64, V),
-    V.getValue(1)
-  };
-  return DAG.getMergeValues(Ops, 2, dl);
+  // Load the actual argument out of the pointer VAList.
+  // We can't count on greater alignment than the word size.
+  return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
+                     false, false, false,
+                     std::min(PtrVT.getSizeInBits(), VT.getSizeInBits())/8);
 }
 
 static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index aa2ef71..fd706be 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -71,6 +71,7 @@ namespace llvm {
     getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+    virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
@@ -95,6 +96,10 @@ namespace llvm {
     virtual SDValue
       LowerCall(TargetLowering::CallLoweringInfo &CLI,
                 SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
+                         SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
+                         SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
       LowerReturn(SDValue Chain,
@@ -102,11 +107,25 @@ namespace llvm {
                   const SmallVectorImpl<ISD::OutputArg> &Outs,
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl, SelectionDAG &DAG) const;
+    SDValue LowerReturn_32(SDValue Chain,
+                           CallingConv::ID CallConv, bool IsVarArg,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           DebugLoc DL, SelectionDAG &DAG) const;
+    SDValue LowerReturn_64(SDValue Chain,
+                           CallingConv::ID CallConv, bool IsVarArg,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           DebugLoc DL, SelectionDAG &DAG) const;
 
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
 
     unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
+    SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
+    SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
+                         SelectionDAG &DAG) const;
+    SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td
index ca1153b..91805f9 100644
--- a/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/lib/Target/Sparc/SparcInstr64Bit.td
@@ -40,6 +40,9 @@ let Predicates = [Is64Bit] in {
 def : Pat<(i64 (zext i32:$val)), (SRLri $val, 0)>;
 def : Pat<(i64 (sext i32:$val)), (SRAri $val, 0)>;
 
+def : Pat<(i64 (and i64:$val, 0xffffffff)), (SRLri $val, 0)>;
+def : Pat<(i64 (sext_inreg i64:$val, i32)), (SRAri $val, 0)>;
+
 defm SLLX : F3_S<"sllx", 0b100101, 1, shl, i64, I64Regs>;
 defm SRLX : F3_S<"srlx", 0b100110, 1, srl, i64, I64Regs>;
 defm SRAX : F3_S<"srax", 0b100111, 1, sra, i64, I64Regs>;
@@ -130,7 +133,7 @@ def HM10 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(Val, MVT::i32);
 }]>;
 def : Pat<(i64 imm:$val),
-          (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i64 32)),
+          (ORrr (SLLXri (ORri (SETHIi (HH22 $val)), (HM10 $val)), (i32 32)),
                 (ORri (SETHIi (HI22 $val)), (LO10 $val)))>,
       Requires<[Is64Bit]>;
 
@@ -178,6 +181,45 @@ def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (SUBCCri $a, (as_i32imm $b))>;
 
 
 //===----------------------------------------------------------------------===//
+// 64-bit Integer Multiply and Divide.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [Is64Bit] in {
+
+def MULXrr : F3_1<2, 0b001001,
+                  (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+                  "mulx $rs1, $rs2, $rd",
+                  [(set i64:$rd, (mul i64:$rs1, i64:$rs2))]>;
+def MULXri : F3_2<2, 0b001001,
+                  (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+                  "mulx $rs1, $i, $rd",
+                  [(set i64:$rd, (mul i64:$rs1, (i64 simm13:$i)))]>;
+
+// Division can trap.
+let hasSideEffects = 1 in {
+def SDIVXrr : F3_1<2, 0b101101,
+                   (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+                   "sdivx $rs1, $rs2, $rd",
+                   [(set i64:$rd, (sdiv i64:$rs1, i64:$rs2))]>;
+def SDIVXri : F3_2<2, 0b101101,
+                   (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+                   "sdivx $rs1, $i, $rd",
+                   [(set i64:$rd, (sdiv i64:$rs1, (i64 simm13:$i)))]>;
+
+def UDIVXrr : F3_1<2, 0b001101,
+                   (outs I64Regs:$rd), (ins I64Regs:$rs1, I64Regs:$rs2),
+                   "udivx $rs1, $rs2, $rd",
+                   [(set i64:$rd, (udiv i64:$rs1, i64:$rs2))]>;
+def UDIVXri : F3_2<2, 0b001101,
+                   (outs IntRegs:$rd), (ins IntRegs:$rs1, i64imm:$i),
+                   "udivx $rs1, $i, $rd",
+                   [(set i64:$rd, (udiv i64:$rs1, (i64 simm13:$i)))]>;
+} // hasSideEffects = 1
+
+} // Predicates = [Is64Bit]
+
+
+//===----------------------------------------------------------------------===//
 // 64-bit Loads and Stores.
 //===----------------------------------------------------------------------===//
 //
@@ -203,16 +245,22 @@ def LDXri  : F3_2<3, 0b001011,
 // Extending loads to i64.
 def : Pat<(i64 (zextloadi8 ADDRrr:$addr)), (LDUBrr ADDRrr:$addr)>;
 def : Pat<(i64 (zextloadi8 ADDRri:$addr)), (LDUBri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi8 ADDRrr:$addr)),  (LDUBrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi8 ADDRri:$addr)),  (LDUBri ADDRri:$addr)>;
 def : Pat<(i64 (sextloadi8 ADDRrr:$addr)), (LDSBrr ADDRrr:$addr)>;
 def : Pat<(i64 (sextloadi8 ADDRri:$addr)), (LDSBri ADDRri:$addr)>;
 
 def : Pat<(i64 (zextloadi16 ADDRrr:$addr)), (LDUHrr ADDRrr:$addr)>;
 def : Pat<(i64 (zextloadi16 ADDRri:$addr)), (LDUHri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi16 ADDRrr:$addr)),  (LDUHrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi16 ADDRri:$addr)),  (LDUHri ADDRri:$addr)>;
 def : Pat<(i64 (sextloadi16 ADDRrr:$addr)), (LDSHrr ADDRrr:$addr)>;
 def : Pat<(i64 (sextloadi16 ADDRri:$addr)), (LDSHri ADDRri:$addr)>;
 
 def : Pat<(i64 (zextloadi32 ADDRrr:$addr)), (LDrr ADDRrr:$addr)>;
 def : Pat<(i64 (zextloadi32 ADDRri:$addr)), (LDri ADDRri:$addr)>;
+def : Pat<(i64 (extloadi32 ADDRrr:$addr)),  (LDrr ADDRrr:$addr)>;
+def : Pat<(i64 (extloadi32 ADDRri:$addr)),  (LDri ADDRri:$addr)>;
 
 // Sign-extending load of i32 into i64 is a new SPARC v9 instruction.
 def LDSWrr : F3_1<3, 0b001011,
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
index f101856..e7fde08 100644
--- a/lib/Target/Sparc/SparcInstrFormats.td
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -142,10 +142,10 @@ class F3_Si<bits<2> opVal, bits<6> op3val, bit xVal, dag outs, dag ins,
 // Define rr and ri shift instructions with patterns.
 multiclass F3_S<string OpcStr, bits<6> Op3Val, bit XVal, SDNode OpNode,
                 ValueType VT, RegisterClass RC> {
-  def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, RC:$rs2),
+  def rr : F3_Sr<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, IntRegs:$rs2),
                  !strconcat(OpcStr, " $rs, $rs2, $rd"),
-                 [(set VT:$rd, (OpNode VT:$rs, VT:$rs2))]>;
-  def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, unknown:$shcnt),
+                 [(set VT:$rd, (OpNode VT:$rs, i32:$rs2))]>;
+  def ri : F3_Si<2, Op3Val, XVal, (outs RC:$rd), (ins RC:$rs, i32imm:$shcnt),
                  !strconcat(OpcStr, " $rs, $shcnt, $rd"),
-                 [(set VT:$rd, (OpNode VT:$rs, (VT imm:$shcnt)))]>;
+                 [(set VT:$rd, (OpNode VT:$rs, (i32 imm:$shcnt)))]>;
 }
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 5ff4395..baefb06 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -64,8 +64,7 @@ def HI22 : SDNodeXForm<imm, [{
 }]>;
 
 def SETHIimm : PatLeaf<(imm), [{
-  return (((unsigned)N->getZExtValue() >> 10) << 10) ==
-         (unsigned)N->getZExtValue();
+  return isShiftedUInt<22, 10>(N->getZExtValue());
 }], HI22>;
 
 // Addressing modes.
@@ -796,10 +795,8 @@ def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
 def : Pat<(SPlo tconstpool:$in), (ORri (i32 G0), tconstpool:$in)>;
 
 // Add reg, lo.  This is used when taking the addr of a global/constpool entry.
-def : Pat<(add i32:$r, (SPlo tglobaladdr:$in)),
-          (ADDri $r, tglobaladdr:$in)>;
-def : Pat<(add i32:$r, (SPlo tconstpool:$in)),
-          (ADDri $r, tconstpool:$in)>;
+def : Pat<(add iPTR:$r, (SPlo tglobaladdr:$in)), (ADDri $r, tglobaladdr:$in)>;
+def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)),  (ADDri $r, tconstpool:$in)>;
 
 // Calls: 
 def : Pat<(call tglobaladdr:$dst),
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index db9b30e..3af4c61 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -74,8 +74,9 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // Addressable stack objects are accessed using neg. offsets from %fp
   MachineFunction &MF = *MI.getParent()->getParent();
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-               MI.getOperand(FIOperandNum + 1).getImm();
+  int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+                   MI.getOperand(FIOperandNum + 1).getImm() +
+                   Subtarget.getStackPointerBias();
 
   // Replace frame index with a frame pointer reference.
   if (Offset >= -4096 && Offset <= 4095) {
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index a81931b..b94dd11 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -52,6 +52,12 @@ public:
     }
     return std::string(p);
   }
+
+  /// The 64-bit ABI uses biased stack and frame pointers, so the stack frame
+  /// of the current function is the area from [%sp+BIAS] to [%fp+BIAS].
+  int64_t getStackPointerBias() const {
+    return is64Bit() ? 2047 : 0;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/SystemZ/AsmParser/CMakeLists.txt b/lib/Target/SystemZ/AsmParser/CMakeLists.txt
new file mode 100644
index 0000000..78a5714
--- /dev/null
+++ b/lib/Target/SystemZ/AsmParser/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMSystemZAsmParser
+  SystemZAsmParser.cpp
+  )
+
+add_dependencies(LLVMSystemZAsmParser SystemZCommonTableGen)
diff --git a/lib/Target/SystemZ/AsmParser/LLVMBuild.txt b/lib/Target/SystemZ/AsmParser/LLVMBuild.txt
new file mode 100644
index 0000000..0b97e71
--- /dev/null
+++ b/lib/Target/SystemZ/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/SystemZ/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SystemZAsmParser
+parent = SystemZ
+required_libraries = SystemZDesc SystemZInfo MC MCParser Support
+add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/AsmParser/Makefile b/lib/Target/SystemZ/AsmParser/Makefile
new file mode 100644
index 0000000..623ae2c
--- /dev/null
+++ b/lib/Target/SystemZ/AsmParser/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/SystemZ/AsmParser/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSystemZAsmParser
+
+# Hack: we need to include 'main' SystemZ target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
new file mode 100644
index 0000000..c7725a1
--- /dev/null
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -0,0 +1,689 @@
+//===-- SystemZAsmParser.cpp - Parse SystemZ assembly instructions --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+// Return true if Expr is in the range [MinValue, MaxValue].
+static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) {
+  if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) {
+    int64_t Value = CE->getValue();
+    return Value >= MinValue && Value <= MaxValue;
+  }
+  return false;
+}
+
+namespace {
+class SystemZOperand : public MCParsedAsmOperand {
+public:
+  enum RegisterKind {
+    GR32Reg,
+    GR64Reg,
+    GR128Reg,
+    ADDR32Reg,
+    ADDR64Reg,
+    FP32Reg,
+    FP64Reg,
+    FP128Reg
+  };
+
+private:
+  enum OperandKind {
+    KindToken,
+    KindReg,
+    KindAccessReg,
+    KindImm,
+    KindMem
+  };
+
+  OperandKind Kind;
+  SMLoc StartLoc, EndLoc;
+
+  // A string of length Length, starting at Data.
+  struct TokenOp {
+    const char *Data;
+    unsigned Length;
+  };
+
+  // LLVM register Num, which has kind Kind.
+  struct RegOp {
+    RegisterKind Kind;
+    unsigned Num;
+  };
+
+  // Base + Disp + Index, where Base and Index are LLVM registers or 0.
+  // RegKind says what type the registers have (ADDR32Reg or ADDR64Reg).
+  struct MemOp {
+    unsigned Base : 8;
+    unsigned Index : 8;
+    unsigned RegKind : 8;
+    unsigned Unused : 8;
+    const MCExpr *Disp;
+  };
+
+  union {
+    TokenOp Token;
+    RegOp Reg;
+    unsigned AccessReg;
+    const MCExpr *Imm;
+    MemOp Mem;
+  };
+
+  SystemZOperand(OperandKind kind, SMLoc startLoc, SMLoc endLoc)
+    : Kind(kind), StartLoc(startLoc), EndLoc(endLoc)
+  {}
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.  Null MCExpr = 0.
+    if (Expr == 0)
+      Inst.addOperand(MCOperand::CreateImm(0));
+    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+public:
+  // Create particular kinds of operand.
+  static SystemZOperand *createToken(StringRef Str, SMLoc Loc) {
+    SystemZOperand *Op = new SystemZOperand(KindToken, Loc, Loc);
+    Op->Token.Data = Str.data();
+    Op->Token.Length = Str.size();
+    return Op;
+  }
+  static SystemZOperand *createReg(RegisterKind Kind, unsigned Num,
+                                   SMLoc StartLoc, SMLoc EndLoc) {
+    SystemZOperand *Op = new SystemZOperand(KindReg, StartLoc, EndLoc);
+    Op->Reg.Kind = Kind;
+    Op->Reg.Num = Num;
+    return Op;
+  }
+  static SystemZOperand *createAccessReg(unsigned Num, SMLoc StartLoc,
+                                         SMLoc EndLoc) {
+    SystemZOperand *Op = new SystemZOperand(KindAccessReg, StartLoc, EndLoc);
+    Op->AccessReg = Num;
+    return Op;
+  }
+  static SystemZOperand *createImm(const MCExpr *Expr, SMLoc StartLoc,
+                                   SMLoc EndLoc) {
+    SystemZOperand *Op = new SystemZOperand(KindImm, StartLoc, EndLoc);
+    Op->Imm = Expr;
+    return Op;
+  }
+  static SystemZOperand *createMem(RegisterKind RegKind, unsigned Base,
+                                   const MCExpr *Disp, unsigned Index,
+                                   SMLoc StartLoc, SMLoc EndLoc) {
+    SystemZOperand *Op = new SystemZOperand(KindMem, StartLoc, EndLoc);
+    Op->Mem.RegKind = RegKind;
+    Op->Mem.Base = Base;
+    Op->Mem.Index = Index;
+    Op->Mem.Disp = Disp;
+    return Op;
+  }
+
+  // Token operands
+  virtual bool isToken() const LLVM_OVERRIDE {
+    return Kind == KindToken;
+  }
+  StringRef getToken() const {
+    assert(Kind == KindToken && "Not a token");
+    return StringRef(Token.Data, Token.Length);
+  }
+
+  // Register operands.
+  virtual bool isReg() const LLVM_OVERRIDE {
+    return Kind == KindReg;
+  }
+  bool isReg(RegisterKind RegKind) const {
+    return Kind == KindReg && Reg.Kind == RegKind;
+  }
+  virtual unsigned getReg() const LLVM_OVERRIDE {
+    assert(Kind == KindReg && "Not a register");
+    return Reg.Num;
+  }
+
+  // Access register operands.  Access registers aren't exposed to LLVM
+  // as registers.
+  bool isAccessReg() const {
+    return Kind == KindAccessReg;
+  }
+
+  // Immediate operands.
+  virtual bool isImm() const LLVM_OVERRIDE {
+    return Kind == KindImm;
+  }
+  bool isImm(int64_t MinValue, int64_t MaxValue) const {
+    return Kind == KindImm && inRange(Imm, MinValue, MaxValue);
+  }
+  const MCExpr *getImm() const {
+    assert(Kind == KindImm && "Not an immediate");
+    return Imm;
+  }
+
+  // Memory operands.
+  virtual bool isMem() const LLVM_OVERRIDE {
+    return Kind == KindMem;
+  }
+  bool isMem(RegisterKind RegKind, bool HasIndex) const {
+    return (Kind == KindMem &&
+            Mem.RegKind == RegKind &&
+            (HasIndex || !Mem.Index));
+  }
+  bool isMemDisp12(RegisterKind RegKind, bool HasIndex) const {
+    return isMem(RegKind, HasIndex) && inRange(Mem.Disp, 0, 0xfff);
+  }
+  bool isMemDisp20(RegisterKind RegKind, bool HasIndex) const {
+    return isMem(RegKind, HasIndex) && inRange(Mem.Disp, -524288, 524287);
+  }
+
+  // Override MCParsedAsmOperand.
+  virtual SMLoc getStartLoc() const LLVM_OVERRIDE { return StartLoc; }
+  virtual SMLoc getEndLoc() const LLVM_OVERRIDE { return EndLoc; }
+  virtual void print(raw_ostream &OS) const LLVM_OVERRIDE;
+
+  // Used by the TableGen code to add particular types of operand
+  // to an instruction.
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+  void addAccessRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands");
+    assert(Kind == KindAccessReg && "Invalid operand type");
+    Inst.addOperand(MCOperand::CreateImm(AccessReg));
+  }
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands");
+    addExpr(Inst, getImm());
+  }
+  void addBDAddrOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands");
+    assert(Kind == KindMem && Mem.Index == 0 && "Invalid operand type");
+    Inst.addOperand(MCOperand::CreateReg(Mem.Base));
+    addExpr(Inst, Mem.Disp);
+  }
+  void addBDXAddrOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands");
+    assert(Kind == KindMem && "Invalid operand type");
+    Inst.addOperand(MCOperand::CreateReg(Mem.Base));
+    addExpr(Inst, Mem.Disp);
+    Inst.addOperand(MCOperand::CreateReg(Mem.Index));
+  }
+
+  // Used by the TableGen code to check for particular operand types.
+  bool isGR32() const { return isReg(GR32Reg); }
+  bool isGR64() const { return isReg(GR64Reg); }
+  bool isGR128() const { return isReg(GR128Reg); }
+  bool isADDR32() const { return isReg(ADDR32Reg); }
+  bool isADDR64() const { return isReg(ADDR64Reg); }
+  bool isADDR128() const { return false; }
+  bool isFP32() const { return isReg(FP32Reg); }
+  bool isFP64() const { return isReg(FP64Reg); }
+  bool isFP128() const { return isReg(FP128Reg); }
+  bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, false); }
+  bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, false); }
+  bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, false); }
+  bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, false); }
+  bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, true); }
+  bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, true); }
+  bool isU4Imm() const { return isImm(0, 15); }
+  bool isU6Imm() const { return isImm(0, 63); }
+  bool isU8Imm() const { return isImm(0, 255); }
+  bool isS8Imm() const { return isImm(-128, 127); }
+  bool isU16Imm() const { return isImm(0, 65535); }
+  bool isS16Imm() const { return isImm(-32768, 32767); }
+  bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); }
+  bool isS32Imm() const { return isImm(-(1LL << 31), (1LL << 31) - 1); }
+};
+
+// Maps of asm register numbers to LLVM register numbers, with 0 indicating
+// an invalid register.  We don't use register class directly because that
+// specifies the allocation order.
+static const unsigned GR32Regs[] = {
+  SystemZ::R0W, SystemZ::R1W, SystemZ::R2W, SystemZ::R3W,
+  SystemZ::R4W, SystemZ::R5W, SystemZ::R6W, SystemZ::R7W,
+  SystemZ::R8W, SystemZ::R9W, SystemZ::R10W, SystemZ::R11W,
+  SystemZ::R12W, SystemZ::R13W, SystemZ::R14W, SystemZ::R15W
+};
+static const unsigned GR64Regs[] = {
+  SystemZ::R0D, SystemZ::R1D, SystemZ::R2D, SystemZ::R3D,
+  SystemZ::R4D, SystemZ::R5D, SystemZ::R6D, SystemZ::R7D,
+  SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D,
+  SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D
+};
+static const unsigned GR128Regs[] = {
+  SystemZ::R0Q, 0, SystemZ::R2Q, 0,
+  SystemZ::R4Q, 0, SystemZ::R6Q, 0,
+  SystemZ::R8Q, 0, SystemZ::R10Q, 0,
+  SystemZ::R12Q, 0, SystemZ::R14Q, 0
+};
+static const unsigned FP32Regs[] = {
+  SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S,
+  SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S,
+  SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
+  SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S
+};
+static const unsigned FP64Regs[] = {
+  SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D,
+  SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D,
+  SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
+  SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D
+};
+static const unsigned FP128Regs[] = {
+  SystemZ::F0Q, SystemZ::F1Q, 0, 0,
+  SystemZ::F4Q, SystemZ::F5Q, 0, 0,
+  SystemZ::F8Q, SystemZ::F9Q, 0, 0,
+  SystemZ::F12Q, SystemZ::F13Q, 0, 0
+};
+
+class SystemZAsmParser : public MCTargetAsmParser {
+#define GET_ASSEMBLER_HEADER
+#include "SystemZGenAsmMatcher.inc"
+
+private:
+  MCSubtargetInfo &STI;
+  MCAsmParser &Parser;
+  struct Register {
+    char Prefix;
+    unsigned Number;
+    SMLoc StartLoc, EndLoc;
+  };
+
+  bool parseRegister(Register &Reg);
+
+  OperandMatchResultTy
+  parseRegister(Register &Reg, char Prefix, const unsigned *Regs,
+                bool IsAddress = false);
+
+  OperandMatchResultTy
+  parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                char Prefix, const unsigned *Regs,
+                SystemZOperand::RegisterKind Kind,
+                bool IsAddress = false);
+
+  OperandMatchResultTy
+  parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+               const unsigned *Regs, SystemZOperand::RegisterKind RegKind,
+               bool HasIndex);
+
+  bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                    StringRef Mnemonic);
+
+public:
+  SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+    : MCTargetAsmParser(), STI(sti), Parser(parser) {
+    MCAsmParserExtension::Initialize(Parser);
+
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+
+  // Override MCTargetAsmParser.
+  virtual bool ParseDirective(AsmToken DirectiveID) LLVM_OVERRIDE;
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+                             SMLoc &EndLoc) LLVM_OVERRIDE;
+  virtual bool ParseInstruction(ParseInstructionInfo &Info,
+                                StringRef Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands)
+    LLVM_OVERRIDE;
+  virtual bool
+    MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                            SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                            MCStreamer &Out, unsigned &ErrorInfo,
+                            bool MatchingInlineAsm) LLVM_OVERRIDE;
+
+  // Used by the TableGen code to parse particular operand types.
+  OperandMatchResultTy
+  parseGR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::GR32Reg);
+  }
+  OperandMatchResultTy
+  parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::GR64Reg);
+  }
+  OperandMatchResultTy
+  parseGR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'r', GR128Regs, SystemZOperand::GR128Reg);
+  }
+  OperandMatchResultTy
+  parseADDR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'r', GR32Regs, SystemZOperand::ADDR32Reg,
+                         true);
+  }
+  OperandMatchResultTy
+  parseADDR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'r', GR64Regs, SystemZOperand::ADDR64Reg,
+                         true);
+  }
+  OperandMatchResultTy
+  parseADDR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    llvm_unreachable("Shouldn't be used as an operand");
+  }
+  OperandMatchResultTy
+  parseFP32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'f', FP32Regs, SystemZOperand::FP32Reg);
+  }
+  OperandMatchResultTy
+  parseFP64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'f', FP64Regs, SystemZOperand::FP64Reg);
+  }
+  OperandMatchResultTy
+  parseFP128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseRegister(Operands, 'f', FP128Regs, SystemZOperand::FP128Reg);
+  }
+  OperandMatchResultTy
+  parseBDAddr32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseAddress(Operands, GR32Regs, SystemZOperand::ADDR32Reg, false);
+  }
+  OperandMatchResultTy
+  parseBDAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, false);
+  }
+  OperandMatchResultTy
+  parseBDXAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+    return parseAddress(Operands, GR64Regs, SystemZOperand::ADDR64Reg, true);
+  }
+  OperandMatchResultTy
+  parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+};
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
+#define GET_MATCHER_IMPLEMENTATION
+#include "SystemZGenAsmMatcher.inc"
+
+void SystemZOperand::print(raw_ostream &OS) const {
+  llvm_unreachable("Not implemented");
+}
+
+// Parse one register of the form %<prefix><number>.
+bool SystemZAsmParser::parseRegister(Register &Reg) {
+  Reg.StartLoc = Parser.getTok().getLoc();
+
+  // Eat the % prefix.
+  if (Parser.getTok().isNot(AsmToken::Percent))
+    return true;
+  Parser.Lex();
+
+  // Expect a register name.
+  if (Parser.getTok().isNot(AsmToken::Identifier))
+    return true;
+
+  // Check the prefix.
+  StringRef Name = Parser.getTok().getString();
+  if (Name.size() < 2)
+    return true;
+  Reg.Prefix = Name[0];
+
+  // Treat the rest of the register name as a register number.
+  if (Name.substr(1).getAsInteger(10, Reg.Number))
+    return true;
+
+  Reg.EndLoc = Parser.getTok().getLoc();
+  Parser.Lex();
+  return false;
+}
+
+// Parse a register with prefix Prefix and convert it to LLVM numbering.
+// Regs maps asm register numbers to LLVM register numbers, with zero
+// entries indicating an invalid register.  IsAddress says whether the
+// register appears in an address context.
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parseRegister(Register &Reg, char Prefix,
+                                const unsigned *Regs, bool IsAddress) {
+  if (parseRegister(Reg))
+    return MatchOperand_NoMatch;
+  if (Reg.Prefix != Prefix || Reg.Number > 15 || Regs[Reg.Number] == 0) {
+    Error(Reg.StartLoc, "invalid register");
+    return MatchOperand_ParseFail;
+  }
+  if (Reg.Number == 0 && IsAddress) {
+    Error(Reg.StartLoc, "%r0 used in an address");
+    return MatchOperand_ParseFail;
+  }
+  Reg.Number = Regs[Reg.Number];
+  return MatchOperand_Success;
+}
+
+// Parse a register and add it to Operands.  Prefix is 'r' for GPRs,
+// 'f' for FPRs, etc.  Regs maps asm register numbers to LLVM register numbers,
+// with zero entries indicating an invalid register.  Kind is the type of
+// register represented by Regs and IsAddress says whether the register is
+// being parsed in an address context, meaning that %r0 evaluates as 0.
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                                char Prefix, const unsigned *Regs,
+                                SystemZOperand::RegisterKind Kind,
+                                bool IsAddress) {
+  Register Reg;
+  OperandMatchResultTy Result = parseRegister(Reg, Prefix, Regs, IsAddress);
+  if (Result == MatchOperand_Success)
+    Operands.push_back(SystemZOperand::createReg(Kind, Reg.Number,
+                                                 Reg.StartLoc, Reg.EndLoc));
+  return Result;
+}
+
+// Parse a memory operand and add it to Operands.  Regs maps asm register
+// numbers to LLVM address registers and RegKind says what kind of address
+// register we're using (ADDR32Reg or ADDR64Reg).  HasIndex says whether
+// the address allows index registers.
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               const unsigned *Regs,
+                               SystemZOperand::RegisterKind RegKind,
+                               bool HasIndex) {
+  SMLoc StartLoc = Parser.getTok().getLoc();
+
+  // Parse the displacement, which must always be present.
+  const MCExpr *Disp;
+  if (getParser().parseExpression(Disp))
+    return MatchOperand_NoMatch;
+
+  // Parse the optional base and index.
+  unsigned Index = 0;
+  unsigned Base = 0;
+  if (getLexer().is(AsmToken::LParen)) {
+    Parser.Lex();
+
+    // Parse the first register.
+    Register Reg;
+    OperandMatchResultTy Result = parseRegister(Reg, 'r', GR64Regs, true);
+    if (Result != MatchOperand_Success)
+      return Result;
+
+    // Check whether there's a second register.  If so, the one that we
+    // just parsed was the index.
+    if (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex();
+
+      if (!HasIndex) {
+        Error(Reg.StartLoc, "invalid use of indexed addressing");
+        return MatchOperand_ParseFail;
+      }
+
+      Index = Reg.Number;
+      Result = parseRegister(Reg, 'r', GR64Regs, true);
+      if (Result != MatchOperand_Success)
+        return Result;
+    }
+    Base = Reg.Number;
+
+    // Consume the closing bracket.
+    if (getLexer().isNot(AsmToken::RParen))
+      return MatchOperand_NoMatch;
+    Parser.Lex();
+  }
+
+  SMLoc EndLoc =
+    SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index,
+                                               StartLoc, EndLoc));
+  return MatchOperand_Success;
+}
+
+bool SystemZAsmParser::ParseDirective(AsmToken DirectiveID) {
+  return true;
+}
+
+bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+                                     SMLoc &EndLoc) {
+  Register Reg;
+  if (parseRegister(Reg))
+    return Error(Reg.StartLoc, "register expected");
+  if (Reg.Prefix == 'r' && Reg.Number < 16)
+    RegNo = GR64Regs[Reg.Number];
+  else if (Reg.Prefix == 'f' && Reg.Number < 16)
+    RegNo = FP64Regs[Reg.Number];
+  else
+    return Error(Reg.StartLoc, "invalid register");
+  StartLoc = Reg.StartLoc;
+  EndLoc = Reg.EndLoc;
+  return false;
+}
+
+bool SystemZAsmParser::
+ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
+                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  Operands.push_back(SystemZOperand::createToken(Name, NameLoc));
+
+  // Read the remaining operands.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    // Read the first operand.
+    if (parseOperand(Operands, Name)) {
+      Parser.eatToEndOfStatement();
+      return true;
+    }
+
+    // Read any subsequent operands.
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex();
+      if (parseOperand(Operands, Name)) {
+        Parser.eatToEndOfStatement();
+        return true;
+      }
+    }
+    if (getLexer().isNot(AsmToken::EndOfStatement)) {
+      SMLoc Loc = getLexer().getLoc();
+      Parser.eatToEndOfStatement();
+      return Error(Loc, "unexpected token in argument list");
+    }
+  }
+
+  // Consume the EndOfStatement.
+  Parser.Lex();
+  return false;
+}
+
+bool SystemZAsmParser::
+parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+             StringRef Mnemonic) {
+  // Check if the current operand has a custom associated parser, if so, try to
+  // custom parse the operand, or fallback to the general approach.
+  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+  if (ResTy == MatchOperand_Success)
+    return false;
+
+  // If there wasn't a custom match, try the generic matcher below. Otherwise,
+  // there was a match, but an error occurred, in which case, just return that
+  // the operand parsing failed.
+  if (ResTy == MatchOperand_ParseFail)
+    return true;
+
+  // The only other type of operand is an immediate.
+  const MCExpr *Expr;
+  SMLoc StartLoc = Parser.getTok().getLoc();
+  if (getParser().parseExpression(Expr))
+    return true;
+
+  SMLoc EndLoc =
+    SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
+  return false;
+}
+
+bool SystemZAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out, unsigned &ErrorInfo,
+                        bool MatchingInlineAsm) {
+  MCInst Inst;
+  unsigned MatchResult;
+
+  MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+                                     MatchingInlineAsm);
+  switch (MatchResult) {
+  default: break;
+  case Match_Success:
+    Inst.setLoc(IDLoc);
+    Out.EmitInstruction(Inst);
+    return false;
+
+  case Match_MissingFeature: {
+    assert(ErrorInfo && "Unknown missing feature!");
+    // Special case the error message for the very common case where only
+    // a single subtarget feature is missing
+    std::string Msg = "instruction requires:";
+    unsigned Mask = 1;
+    for (unsigned I = 0; I < sizeof(ErrorInfo) * 8 - 1; ++I) {
+      if (ErrorInfo & Mask) {
+        Msg += " ";
+        Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+      }
+      Mask <<= 1;
+    }
+    return Error(IDLoc, Msg);
+  }
+
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((SystemZOperand*)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc())
+        ErrorLoc = IDLoc;
+    }
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+
+  case Match_MnemonicFail:
+    return Error(IDLoc, "invalid instruction");
+  }
+
+  llvm_unreachable("Unexpected match type");
+}
+
+SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::
+parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  Register Reg;
+  if (parseRegister(Reg))
+    return MatchOperand_NoMatch;
+  if (Reg.Prefix != 'a' || Reg.Number > 15) {
+    Error(Reg.StartLoc, "invalid register");
+    return MatchOperand_ParseFail;
+  }
+  Operands.push_back(SystemZOperand::createAccessReg(Reg.Number,
+                                                     Reg.StartLoc, Reg.EndLoc));
+  return MatchOperand_Success;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmParser() {
+  RegisterMCAsmParser<SystemZAsmParser> X(TheSystemZTarget);
+}
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
new file mode 100644
index 0000000..67b17fc
--- /dev/null
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -0,0 +1,32 @@
+set(LLVM_TARGET_DEFINITIONS SystemZ.td)
+
+tablegen(LLVM SystemZGenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM SystemZGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM SystemZGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM SystemZGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM SystemZGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM SystemZGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM SystemZGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM SystemZGenSubtargetInfo.inc -gen-subtarget)
+add_public_tablegen_target(SystemZCommonTableGen)
+
+add_llvm_target(SystemZCodeGen
+  SystemZAsmPrinter.cpp
+  SystemZCallingConv.cpp
+  SystemZConstantPoolValue.cpp
+  SystemZFrameLowering.cpp
+  SystemZISelDAGToDAG.cpp
+  SystemZISelLowering.cpp
+  SystemZInstrInfo.cpp
+  SystemZMCInstLower.cpp
+  SystemZRegisterInfo.cpp
+  SystemZSubtarget.cpp
+  SystemZTargetMachine.cpp
+  )
+
+add_dependencies(LLVMSystemZCodeGen intrinsics_gen)
+
+add_subdirectory(AsmParser)
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/SystemZ/InstPrinter/CMakeLists.txt b/lib/Target/SystemZ/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000..ddbf82f
--- /dev/null
+++ b/lib/Target/SystemZ/InstPrinter/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMSystemZAsmPrinter
+  SystemZInstPrinter.cpp
+  )
+
+add_dependencies(LLVMSystemZAsmPrinter SystemZCommonTableGen)
diff --git a/lib/Target/SystemZ/InstPrinter/LLVMBuild.txt b/lib/Target/SystemZ/InstPrinter/LLVMBuild.txt
new file mode 100644
index 0000000..fdfd738
--- /dev/null
+++ b/lib/Target/SystemZ/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/SystemZ/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SystemZAsmPrinter
+parent = SystemZ
+required_libraries = MC Support
+add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/InstPrinter/Makefile b/lib/Target/SystemZ/InstPrinter/Makefile
new file mode 100644
index 0000000..3ba8126
--- /dev/null
+++ b/lib/Target/SystemZ/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/SystemZ/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSystemZAsmPrinter
+
+# Hack: we need to include 'main' mips target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
new file mode 100644
index 0000000..d73cf49
--- /dev/null
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -0,0 +1,150 @@
+//===-- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+
+#include "SystemZInstPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#include "SystemZGenAsmWriter.inc"
+
+void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
+                                      unsigned Index, raw_ostream &O) {
+  O << Disp;
+  if (Base) {
+    O << '(';
+    if (Index)
+      O << '%' << getRegisterName(Index) << ',';
+    O << '%' << getRegisterName(Base) << ')';
+  } else
+    assert(!Index && "Shouldn't have an index without a base");
+}
+
+void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) {
+  if (MO.isReg())
+    O << '%' << getRegisterName(MO.getReg());
+  else if (MO.isImm())
+    O << MO.getImm();
+  else if (MO.isExpr())
+    O << *MO.getExpr();
+  else
+    llvm_unreachable("Invalid operand");
+}
+
+void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                   StringRef Annot) {
+  printInstruction(MI, O);
+  printAnnotation(O, Annot);
+}
+
+void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
+  O << '%' << getRegisterName(RegNo);
+}
+
+void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isUInt<4>(Value) && "Invalid u4imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isUInt<6>(Value) && "Invalid u6imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isInt<8>(Value) && "Invalid s8imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isUInt<8>(Value) && "Invalid u8imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isInt<16>(Value) && "Invalid s16imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isUInt<16>(Value) && "Invalid u16imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isInt<32>(Value) && "Invalid s32imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  int64_t Value = MI->getOperand(OpNum).getImm();
+  assert(isUInt<32>(Value) && "Invalid u32imm argument");
+  O << Value;
+}
+
+void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum,
+                                               raw_ostream &O) {
+  uint64_t Value = MI->getOperand(OpNum).getImm();
+  assert(Value < 16 && "Invalid access register number");
+  O << "%a" << (unsigned int)Value;
+}
+
+void SystemZInstPrinter::printCallOperand(const MCInst *MI, int OpNum,
+                                          raw_ostream &O) {
+  printOperand(MI, OpNum, O);
+  O << "@PLT";
+}
+
+void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
+                                      raw_ostream &O) {
+  printOperand(MI->getOperand(OpNum), O);
+}
+
+void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  printAddress(MI->getOperand(OpNum).getReg(),
+               MI->getOperand(OpNum + 1).getImm(), 0, O);
+}
+
+void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum,
+                                             raw_ostream &O) {
+  printAddress(MI->getOperand(OpNum).getReg(),
+               MI->getOperand(OpNum + 1).getImm(),
+               MI->getOperand(OpNum + 2).getReg(), O);
+}
+
+void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum,
+                                           raw_ostream &O) {
+  static const char *const CondNames[] = {
+    "o", "h", "nle", "l", "nhe", "lh", "ne",
+    "e", "nlh", "he", "nl", "le", "nh", "no"
+  };
+  uint64_t Imm = MI->getOperand(OpNum).getImm();
+  assert(Imm > 0 && Imm < 15 && "Invalid condition");
+  O << CondNames[Imm - 1];
+}
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
new file mode 100644
index 0000000..b82e79d
--- /dev/null
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -0,0 +1,68 @@
+//==- SystemZInstPrinter.h - Convert SystemZ MCInst to assembly --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a SystemZ MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEMZINSTPRINTER_H
+#define LLVM_SYSTEMZINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class MCOperand;
+
+class SystemZInstPrinter : public MCInstPrinter {
+public:
+  SystemZInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                     const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
+  // Automatically generated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+  // Print an address with the given base, displacement and index.
+  static void printAddress(unsigned Base, int64_t Disp, unsigned Index,
+                           raw_ostream &O);
+
+  // Print the given operand.
+  static void printOperand(const MCOperand &MO, raw_ostream &O);
+
+  // Override MCInstPrinter.
+  virtual void printRegName(raw_ostream &O, unsigned RegNo) const
+    LLVM_OVERRIDE;
+  virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot)
+    LLVM_OVERRIDE;
+
+private:
+  // Print various types of operand.
+  void printOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+
+  // Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
+  // This forms part of the instruction name rather than the operand list.
+  void printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt
new file mode 100644
index 0000000..aba0de2
--- /dev/null
+++ b/lib/Target/SystemZ/LLVMBuild.txt
@@ -0,0 +1,34 @@
+;===- ./lib/Target/SystemZ/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = SystemZ
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_jit = 1
+
+[component_1]
+type = Library
+name = SystemZCodeGen
+parent = SystemZ
+required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SystemZDesc SystemZInfo Support Target
+add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..3d13128
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_llvm_library(LLVMSystemZDesc
+  SystemZMCAsmBackend.cpp
+  SystemZMCAsmInfo.cpp
+  SystemZMCCodeEmitter.cpp
+  SystemZMCObjectWriter.cpp
+  SystemZMCTargetDesc.cpp
+  )
+
+add_dependencies(LLVMSystemZDesc SystemZCommonTableGen)
diff --git a/lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt b/lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 0000000..cbdb59c
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SystemZDesc
+parent = SystemZ
+required_libraries = MC SystemZAsmPrinter SystemZInfo Support
+add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/MCTargetDesc/Makefile b/lib/Target/SystemZ/MCTargetDesc/Makefile
new file mode 100644
index 0000000..08f1a9d
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSystemZDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
new file mode 100644
index 0000000..e901c6c
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -0,0 +1,151 @@
+//===-- SystemZMCAsmBackend.cpp - SystemZ assembler backend ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "MCTargetDesc/SystemZMCFixups.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectWriter.h"
+
+using namespace llvm;
+
+// Value is a fully-resolved relocation value: Symbol + Addend [- Pivot].
+// Return the bits that should be installed in a relocation field for
+// fixup kind Kind.
+static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
+  if (Kind < FirstTargetFixupKind)
+    return Value;
+
+  switch (unsigned(Kind)) {
+  case SystemZ::FK_390_PC16DBL:
+  case SystemZ::FK_390_PC32DBL:
+  case SystemZ::FK_390_PLT16DBL:
+  case SystemZ::FK_390_PLT32DBL:
+    return (int64_t)Value / 2;
+  }
+
+  llvm_unreachable("Unknown fixup kind!");
+}
+
+// If Opcode can be relaxed, return the relaxed form, otherwise return 0.
+static unsigned getRelaxedOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  case SystemZ::BRC:  return SystemZ::BRCL;
+  case SystemZ::J:    return SystemZ::JG;
+  case SystemZ::BRAS: return SystemZ::BRASL;
+  }
+  return 0;
+}
+
+namespace {
+class SystemZMCAsmBackend : public MCAsmBackend {
+  uint8_t OSABI;
+public:
+  SystemZMCAsmBackend(uint8_t osABI)
+    : OSABI(osABI) {}
+
+  // Override MCAsmBackend
+  virtual unsigned getNumFixupKinds() const LLVM_OVERRIDE {
+    return SystemZ::NumTargetFixupKinds;
+  }
+  virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const
+    LLVM_OVERRIDE;
+  virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                          uint64_t Value) const LLVM_OVERRIDE;
+  virtual bool mayNeedRelaxation(const MCInst &Inst) const LLVM_OVERRIDE;
+  virtual bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                                    uint64_t Value,
+                                    const MCRelaxableFragment *Fragment,
+                                    const MCAsmLayout &Layout) const
+    LLVM_OVERRIDE;
+  virtual void relaxInstruction(const MCInst &Inst,
+                                MCInst &Res) const LLVM_OVERRIDE;
+  virtual bool writeNopData(uint64_t Count,
+                            MCObjectWriter *OW) const LLVM_OVERRIDE;
+  virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const
+    LLVM_OVERRIDE {
+    return createSystemZObjectWriter(OS, OSABI);
+  }
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const
+    LLVM_OVERRIDE {
+    return false;
+  }
+};
+} // end anonymous namespace
+
+const MCFixupKindInfo &
+SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+  const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = {
+    { "FK_390_PC16DBL",  0, 16, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_390_PC32DBL",  0, 32, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_390_PLT16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_390_PLT32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
+  };
+
+  if (Kind < FirstTargetFixupKind)
+    return MCAsmBackend::getFixupKindInfo(Kind);
+
+  assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+         "Invalid kind!");
+  return Infos[Kind - FirstTargetFixupKind];
+}
+
+void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+                                     unsigned DataSize, uint64_t Value) const {
+  MCFixupKind Kind = Fixup.getKind();
+  unsigned Offset = Fixup.getOffset();
+  unsigned Size = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
+
+  assert(Offset + Size <= DataSize && "Invalid fixup offset!");
+
+  // Big-endian insertion of Size bytes.
+  Value = extractBitsForFixup(Kind, Value);
+  unsigned ShiftValue = (Size * 8) - 8;
+  for (unsigned I = 0; I != Size; ++I) {
+    Data[Offset + I] |= uint8_t(Value >> ShiftValue);
+    ShiftValue -= 8;
+  }
+}
+
+bool SystemZMCAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
+  return getRelaxedOpcode(Inst.getOpcode()) != 0;
+}
+
+bool
+SystemZMCAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                          uint64_t Value,
+                                          const MCRelaxableFragment *Fragment,
+                                          const MCAsmLayout &Layout) const {
+  // At the moment we just need to relax 16-bit fields to wider fields.
+  Value = extractBitsForFixup(Fixup.getKind(), Value);
+  return (int16_t)Value != (int64_t)Value;
+}
+
+void SystemZMCAsmBackend::relaxInstruction(const MCInst &Inst,
+                                           MCInst &Res) const {
+  unsigned Opcode = getRelaxedOpcode(Inst.getOpcode());
+  assert(Opcode && "Unexpected insn to relax");
+  Res = Inst;
+  Res.setOpcode(Opcode);
+}
+
+bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
+                                       MCObjectWriter *OW) const {
+  for (uint64_t I = 0; I != Count; ++I)
+    OW->Write8(7);
+  return true;
+}
+
+MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T, StringRef TT,
+                                              StringRef CPU) {
+  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+  return new SystemZMCAsmBackend(OSABI);
+}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
new file mode 100644
index 0000000..c96a0d4
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -0,0 +1,38 @@
+//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+
+using namespace llvm;
+
+SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
+  PointerSize = 8;
+  CalleeSaveStackSlotSize = 8;
+  IsLittleEndian = false;
+
+  CommentString = "#";
+  PCSymbol = ".";
+  GlobalPrefix = "";
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  ZeroDirective = "\t.space\t";
+  Data64bitsDirective = "\t.quad\t";
+  UsesELFSectionDirectiveForBSS = true;
+  SupportsDebugInformation = true;
+  HasLEB128 = true;
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+}
+
+const MCSection *
+SystemZMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const {
+  return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+                           0, SectionKind::getMetadata());
+}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
new file mode 100644
index 0000000..bac1bca
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -0,0 +1,31 @@
+//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZTARGETASMINFO_H
+#define SystemZTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class Target;
+class StringRef;
+
+class SystemZMCAsmInfo : public MCAsmInfo {
+public:
+  explicit SystemZMCAsmInfo(const Target &T, StringRef TT);
+
+  // Override MCAsmInfo;
+  virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const
+    LLVM_OVERRIDE;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
new file mode 100644
index 0000000..ea2250f
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -0,0 +1,131 @@
+//===-- SystemZMCCodeEmitter.cpp - Convert SystemZ code to machine code ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "MCTargetDesc/SystemZMCFixups.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+
+using namespace llvm;
+
+namespace {
+class SystemZMCCodeEmitter : public MCCodeEmitter {
+  const MCInstrInfo &MCII;
+  MCContext &Ctx;
+
+public:
+  SystemZMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+    : MCII(mcii), Ctx(ctx) {
+  }
+
+  ~SystemZMCCodeEmitter() {}
+
+  // OVerride MCCodeEmitter.
+  virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                 SmallVectorImpl<MCFixup> &Fixups) const
+    LLVM_OVERRIDE;
+
+private:
+  // Automatically generated by TableGen.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+  // Called by the TableGen code to get the binary encoding of operand
+  // MO in MI.  Fixups is the list of fixups against MI.
+  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+
+  // Operand OpNum of MI needs a PC-relative fixup of kind Kind at
+  // Offset bytes from the start of MI.  Add the fixup to Fixups
+  // and return the in-place addend, which since we're a RELA target
+  // is always 0.
+  unsigned getPCRelEncoding(const MCInst &MI, unsigned int OpNum,
+                            SmallVectorImpl<MCFixup> &Fixups,
+                            unsigned Kind, int64_t Offset) const;
+
+  unsigned getPC16DBLEncoding(const MCInst &MI, unsigned int OpNum,
+                              SmallVectorImpl<MCFixup> &Fixups) const {
+    return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2);
+  }
+  unsigned getPC32DBLEncoding(const MCInst &MI, unsigned int OpNum,
+                              SmallVectorImpl<MCFixup> &Fixups) const {
+    return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2);
+  }
+  unsigned getPLT16DBLEncoding(const MCInst &MI, unsigned int OpNum,
+                               SmallVectorImpl<MCFixup> &Fixups) const {
+    return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT16DBL, 2);
+  }
+  unsigned getPLT32DBLEncoding(const MCInst &MI, unsigned int OpNum,
+                               SmallVectorImpl<MCFixup> &Fixups) const {
+    return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PLT32DBL, 2);
+  }
+};
+}
+
+MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
+                                                const MCRegisterInfo &MRI,
+                                                const MCSubtargetInfo &MCSTI,
+                                                MCContext &Ctx) {
+  return new SystemZMCCodeEmitter(MCII, Ctx);
+}
+
+void SystemZMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
+  unsigned Size = MCII.get(MI.getOpcode()).getSize();
+  // Big-endian insertion of Size bytes.
+  unsigned ShiftValue = (Size * 8) - 8;
+  for (unsigned I = 0; I != Size; ++I) {
+    OS << uint8_t(Bits >> ShiftValue);
+    ShiftValue -= 8;
+  }
+}
+
+unsigned SystemZMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg())
+    return Ctx.getRegisterInfo().getEncodingValue(MO.getReg());
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+  llvm_unreachable("Unexpected operand type!");
+}
+
+unsigned
+SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned int OpNum,
+                                       SmallVectorImpl<MCFixup> &Fixups,
+                                       unsigned Kind, int64_t Offset) const {
+  const MCOperand &MO = MI.getOperand(OpNum);
+  // For compatibility with the GNU assembler, treat constant operands as
+  // unadjusted PC-relative offsets.
+  if (MO.isImm())
+    return MO.getImm() / 2;
+
+  const MCExpr *Expr = MO.getExpr();
+  if (Offset) {
+    // The operand value is relative to the start of MI, but the fixup
+    // is relative to the operand field itself, which is Offset bytes
+    // into MI.  Add Offset to the relocation value to cancel out
+    // this difference.
+    const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
+    Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+  }
+  Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind));
+  return 0;
+}
+
+#include "SystemZGenMCCodeEmitter.inc"
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
new file mode 100644
index 0000000..9c94ebb
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -0,0 +1,31 @@
+//===-- SystemZMCFixups.h - SystemZ-specific fixup entries ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEMZMCFIXUPS_H
+#define LLVM_SYSTEMZMCFIXUPS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace SystemZ {
+  enum FixupKind {
+    // These correspond directly to R_390_* relocations.
+    FK_390_PC16DBL = FirstTargetFixupKind,
+    FK_390_PC32DBL,
+    FK_390_PLT16DBL,
+    FK_390_PLT32DBL,
+
+    // Marker
+    LastTargetFixupKind,
+    NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+  };
+}
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
new file mode 100644
index 0000000..36e3d83
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -0,0 +1,140 @@
+//===-- SystemZMCObjectWriter.cpp - SystemZ ELF writer --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "MCTargetDesc/SystemZMCFixups.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+
+using namespace llvm;
+
+namespace {
+class SystemZObjectWriter : public MCELFObjectTargetWriter {
+public:
+  SystemZObjectWriter(uint8_t OSABI);
+
+  virtual ~SystemZObjectWriter();
+
+protected:
+  // Override MCELFObjectTargetWriter.
+  virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                bool IsPCRel, bool IsRelocWithSymbol,
+                                int64_t Addend) const LLVM_OVERRIDE;
+  virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                         const MCValue &Target,
+                                         const MCFragment &F,
+                                         const MCFixup &Fixup,
+                                         bool IsPCRel) const LLVM_OVERRIDE;
+};
+} // end anonymouse namespace
+
+SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
+  : MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390,
+                            /*HasRelocationAddend=*/ true) {}
+
+SystemZObjectWriter::~SystemZObjectWriter() {
+}
+
+// Return the relocation type for an absolute value of MCFixupKind Kind.
+static unsigned getAbsoluteReloc(unsigned Kind) {
+  switch (Kind) {
+  case FK_Data_1: return ELF::R_390_8;
+  case FK_Data_2: return ELF::R_390_16;
+  case FK_Data_4: return ELF::R_390_32;
+  case FK_Data_8: return ELF::R_390_64;
+  }
+  llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the relocation type for a PC-relative value of MCFixupKind Kind.
+static unsigned getPCRelReloc(unsigned Kind) {
+  switch (Kind) {
+  case FK_Data_2:                return ELF::R_390_PC16;
+  case FK_Data_4:                return ELF::R_390_PC32;
+  case FK_Data_8:                return ELF::R_390_PC64;
+  case SystemZ::FK_390_PC16DBL:  return ELF::R_390_PC16DBL;
+  case SystemZ::FK_390_PC32DBL:  return ELF::R_390_PC32DBL;
+  case SystemZ::FK_390_PLT16DBL: return ELF::R_390_PLT16DBL;
+  case SystemZ::FK_390_PLT32DBL: return ELF::R_390_PLT32DBL;
+  }
+  llvm_unreachable("Unsupported PC-relative address");
+}
+
+// Return the R_390_TLS_LE* relocation type for MCFixupKind Kind.
+static unsigned getTLSLEReloc(unsigned Kind) {
+  switch (Kind) {
+  case FK_Data_4: return ELF::R_390_TLS_LE32;
+  case FK_Data_8: return ELF::R_390_TLS_LE64;
+  }
+  llvm_unreachable("Unsupported absolute address");
+}
+
+// Return the PLT relocation counterpart of MCFixupKind Kind.
+static unsigned getPLTReloc(unsigned Kind) {
+  switch (Kind) {
+  case SystemZ::FK_390_PC16DBL: return ELF::R_390_PLT16DBL;
+  case SystemZ::FK_390_PC32DBL: return ELF::R_390_PLT32DBL;
+  }
+  llvm_unreachable("Unsupported absolute address");
+}
+
+unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target,
+                                           const MCFixup &Fixup,
+                                           bool IsPCRel,
+                                           bool IsRelocWithSymbol,
+                                           int64_t Addend) const {
+  MCSymbolRefExpr::VariantKind Modifier = (Target.isAbsolute() ?
+                                           MCSymbolRefExpr::VK_None :
+                                           Target.getSymA()->getKind());
+  unsigned Kind = Fixup.getKind();
+  switch (Modifier) {
+  case MCSymbolRefExpr::VK_None:
+    if (IsPCRel)
+      return getPCRelReloc(Kind);
+    return getAbsoluteReloc(Kind);
+
+  case MCSymbolRefExpr::VK_NTPOFF:
+    assert(!IsPCRel && "NTPOFF shouldn't be PC-relative");
+    return getTLSLEReloc(Kind);
+
+  case MCSymbolRefExpr::VK_GOT:
+    if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
+      return ELF::R_390_GOTENT;
+    llvm_unreachable("Only PC-relative GOT accesses are supported for now");
+
+  case MCSymbolRefExpr::VK_PLT:
+    assert(IsPCRel && "@PLT shouldt be PC-relative");
+    return getPLTReloc(Kind);
+
+  default:
+    llvm_unreachable("Modifier not supported");
+  }
+}
+
+const MCSymbol *SystemZObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+                                                    const MCValue &Target,
+                                                    const MCFragment &F,
+                                                    const MCFixup &Fixup,
+                                                    bool IsPCRel) const {
+  // The addend in a PC-relative R_390_* relocation is always applied to
+  // the PC-relative part of the address.  If some kind of indirection
+  // is applied to the symbol first, we can't use an addend there too.
+  if (!Target.isAbsolute() &&
+      Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None &&
+      IsPCRel)
+    return &Target.getSymA()->getSymbol().AliasedSymbol();
+  return NULL;
+}
+
+MCObjectWriter *llvm::createSystemZObjectWriter(raw_ostream &OS,
+                                                uint8_t OSABI) {
+  MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI);
+  return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
+}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
new file mode 100644
index 0000000..49a7f47
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -0,0 +1,160 @@
+//===-- SystemZMCTargetDesc.cpp - SystemZ target descriptions -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCTargetDesc.h"
+#include "InstPrinter/SystemZInstPrinter.h"
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SystemZGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCAsmInfo *createSystemZMCAsmInfo(const Target &T, StringRef TT) {
+  MCAsmInfo *MAI = new SystemZMCAsmInfo(T, TT);
+  MachineLocation FPDst(MachineLocation::VirtualFP);
+  MachineLocation FPSrc(SystemZ::R15D, -SystemZMC::CFAOffsetFromInitialSP);
+  MAI->addInitialFrameState(0, FPDst, FPSrc);
+  return MAI;
+}
+
+static MCInstrInfo *createSystemZMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitSystemZMCInstrInfo(X);
+  return X;
+}
+
+static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitSystemZMCRegisterInfo(X, SystemZ::R14D);
+  return X;
+}
+
+static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
+                                                     StringRef CPU,
+                                                     StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitSystemZMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                                 CodeModel::Model CM,
+                                                 CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+
+  // Static code is suitable for use in a dynamic executable; there is no
+  // separate DynamicNoPIC model.
+  if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC)
+    RM = Reloc::Static;
+
+  // For SystemZ we define the models as follows:
+  //
+  // Small:  BRASL can call any function and will use a stub if necessary.
+  //         Locally-binding symbols will always be in range of LARL.
+  //
+  // Medium: BRASL can call any function and will use a stub if necessary.
+  //         GOT slots and locally-defined text will always be in range
+  //         of LARL, but other symbols might not be.
+  //
+  // Large:  Equivalent to Medium for now.
+  //
+  // Kernel: Equivalent to Medium for now.
+  //
+  // This means that any PIC module smaller than 4GB meets the
+  // requirements of Small, so Small seems like the best default there.
+  //
+  // All symbols bind locally in a non-PIC module, so the choice is less
+  // obvious.  There are two cases:
+  //
+  // - When creating an executable, PLTs and copy relocations allow
+  //   us to treat external symbols as part of the executable.
+  //   Any executable smaller than 4GB meets the requirements of Small,
+  //   so that seems like the best default.
+  //
+  // - When creating JIT code, stubs will be in range of BRASL if the
+  //   image is less than 4GB in size.  GOT entries will likewise be
+  //   in range of LARL.  However, the JIT environment has no equivalent
+  //   of copy relocs, so locally-binding data symbols might not be in
+  //   the range of LARL.  We need the Medium model in that case.
+  if (CM == CodeModel::Default)
+    CM = CodeModel::Small;
+  else if (CM == CodeModel::JITDefault)
+    CM = RM == Reloc::PIC_ ? CodeModel::Small : CodeModel::Medium;
+  X->InitMCCodeGenInfo(RM, CM, OL);
+  return X;
+}
+
+static MCInstPrinter *createSystemZMCInstPrinter(const Target &T,
+                                                 unsigned SyntaxVariant,
+                                                 const MCAsmInfo &MAI,
+                                                 const MCInstrInfo &MII,
+                                                 const MCRegisterInfo &MRI,
+                                                 const MCSubtargetInfo &STI) {
+  return new SystemZInstPrinter(MAI, MII, MRI);
+}
+
+static MCStreamer *createSystemZMCObjectStreamer(const Target &T, StringRef TT,
+                                                 MCContext &Ctx,
+                                                 MCAsmBackend &MAB,
+                                                 raw_ostream &OS,
+                                                 MCCodeEmitter *Emitter,
+                                                 bool RelaxAll,
+                                                 bool NoExecStack) {
+  return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+extern "C" void LLVMInitializeSystemZTargetMC() {
+  // Register the MCAsmInfo.
+  TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget,
+                                    createSystemZMCAsmInfo);
+
+  // Register the MCCodeGenInfo.
+  TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget,
+                                        createSystemZMCCodeGenInfo);
+
+  // Register the MCCodeEmitter.
+  TargetRegistry::RegisterMCCodeEmitter(TheSystemZTarget,
+					createSystemZMCCodeEmitter);
+
+  // Register the MCInstrInfo.
+  TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
+                                      createSystemZMCInstrInfo);
+
+  // Register the MCRegisterInfo.
+  TargetRegistry::RegisterMCRegInfo(TheSystemZTarget,
+                                    createSystemZMCRegisterInfo);
+
+  // Register the MCSubtargetInfo.
+  TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
+                                          createSystemZMCSubtargetInfo);
+
+  // Register the MCAsmBackend.
+  TargetRegistry::RegisterMCAsmBackend(TheSystemZTarget,
+                                       createSystemZMCAsmBackend);
+
+  // Register the MCInstPrinter.
+  TargetRegistry::RegisterMCInstPrinter(TheSystemZTarget,
+                                        createSystemZMCInstPrinter);
+
+  // Register the MCObjectStreamer;
+  TargetRegistry::RegisterMCObjectStreamer(TheSystemZTarget,
+                                           createSystemZMCObjectStreamer);
+}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
new file mode 100644
index 0000000..229912f
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -0,0 +1,62 @@
+//===-- SystemZMCTargetDesc.h - SystemZ target descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMCTARGETDESC_H
+#define SYSTEMZMCTARGETDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+class raw_ostream;
+
+extern Target TheSystemZTarget;
+
+namespace SystemZMC {
+  // How many bytes are in the ABI-defined, caller-allocated part of
+  // a stack frame.
+  const int64_t CallFrameSize = 160;
+
+  // The offset of the DWARF CFA from the incoming stack pointer.
+  const int64_t CFAOffsetFromInitialSP = CallFrameSize;
+}
+
+MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
+                                          const MCRegisterInfo &MRI,
+                                          const MCSubtargetInfo &STI,
+                                          MCContext &Ctx);
+
+MCAsmBackend *createSystemZMCAsmBackend(const Target &T, StringRef TT,
+                                        StringRef CPU);
+
+MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI);
+} // end namespace llvm
+
+// Defines symbolic names for SystemZ registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "SystemZGenRegisterInfo.inc"
+
+// Defines symbolic names for the SystemZ instructions.
+#define GET_INSTRINFO_ENUM
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SystemZGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile
new file mode 100644
index 0000000..c992584
--- /dev/null
+++ b/lib/Target/SystemZ/Makefile
@@ -0,0 +1,28 @@
+##===- lib/Target/SystemZ/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMSystemZCodeGen
+TARGET = SystemZ
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = SystemZGenRegisterInfo.inc \
+		SystemZGenAsmWriter.inc \
+		SystemZGenAsmMatcher.inc \
+		SystemZGenCodeEmitter.inc \
+		SystemZGenInstrInfo.inc \
+		SystemZGenDAGISel.inc \
+		SystemZGenSubtargetInfo.inc \
+		SystemZGenCallingConv.inc \
+		SystemZGenMCCodeEmitter.inc
+
+DIRS = InstPrinter AsmParser TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt
new file mode 100644
index 0000000..d1f56a4
--- /dev/null
+++ b/lib/Target/SystemZ/README.txt
@@ -0,0 +1,146 @@
+//===---------------------------------------------------------------------===//
+// Random notes about and ideas for the SystemZ backend.
+//===---------------------------------------------------------------------===//
+
+The initial backend is deliberately restricted to z10.  We should add support
+for later architectures at some point.
+
+--
+
+SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand() is passed "m" for all
+inline asm memory constraints; it doesn't get to see the original constraint.
+This means that it must conservatively treat all inline asm constraints
+as the most restricted type, "R".
+
+--
+
+If an inline asm ties an i32 "r" result to an i64 input, the input
+will be treated as an i32, leaving the upper bits uninitialised.
+For example:
+
+define void @f4(i32 *%dst) {
+  %val = call i32 asm "blah $0", "=r,0" (i64 103)
+  store i32 %val, i32 *%dst
+  ret void
+}
+
+from CodeGen/SystemZ/asm-09.ll will use LHI rather than LGHI.
+to load 103.  This seems to be a general target-independent problem.
+
+--
+
+The tuning of the choice between Load Address (LA) and addition in
+SystemZISelDAGToDAG.cpp is suspect.  It should be tweaked based on
+performance measurements.
+
+--
+
+There is no scheduling support.
+
+--
+
+We don't use the Branch on Count or Branch on Index families of instruction.
+
+--
+
+We don't use the condition code results of anything except comparisons.
+
+Implementing this may need something more finely grained than the z_cmp
+and z_ucmp that we have now.  It might (or might not) also be useful to
+have a mask of "don't care" values in conditional branches.  For example,
+integer comparisons never set CC to 3, so the bottom bit of the CC mask
+isn't particularly relevant.  JNLH and JE are equally good for testing
+equality after an integer comparison, etc.
+
+--
+
+We don't optimize string and block memory operations.
+
+--
+
+We don't take full advantage of builtins like fabsl because the calling
+conventions require f128s to be returned by invisible reference.
+
+--
+
+DAGCombiner can detect integer absolute, but there's not yet an associated
+ISD opcode.  We could add one and implement it using Load Positive.
+Negated absolutes could use Load Negative.
+
+--
+
+DAGCombiner doesn't yet fold truncations of extended loads.  Functions like:
+
+    unsigned long f (unsigned long x, unsigned short *y)
+    {
+      return (x << 32) | *y;
+    }
+
+therefore end up as:
+
+        sllg    %r2, %r2, 32
+        llgh    %r0, 0(%r3)
+        lr      %r2, %r0
+        br      %r14
+
+but truncating the load would give:
+
+        sllg    %r2, %r2, 32
+        lh      %r2, 0(%r3)
+        br      %r14
+
+--
+
+Functions like:
+
+define i64 @f1(i64 %a) {
+  %and = and i64 %a, 1
+  ret i64 %and
+}
+
+ought to be implemented as:
+
+        lhi     %r0, 1
+        ngr     %r2, %r0
+        br      %r14
+
+but two-address optimisations reverse the order of the AND and force:
+
+        lhi     %r0, 1
+        ngr     %r0, %r2
+        lgr     %r2, %r0
+        br      %r14
+
+CodeGen/SystemZ/and-04.ll has several examples of this.
+
+--
+
+Out-of-range displacements are usually handled by loading the full
+address into a register.  In many cases it would be better to create
+an anchor point instead.  E.g. for:
+
+define void @f4a(i128 *%aptr, i64 %base) {
+  %addr = add i64 %base, 524288
+  %bptr = inttoptr i64 %addr to i128 *
+  %a = load volatile i128 *%aptr
+  %b = load i128 *%bptr
+  %add = add i128 %a, %b
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+(from CodeGen/SystemZ/int-add-08.ll) we load %base+524288 and %base+524296
+into separate registers, rather than using %base+524288 as a base for both.
+
+--
+
+Dynamic stack allocations round the size to 8 bytes and then allocate
+that rounded amount.  It would be simpler to subtract the unrounded
+size from the copy of the stack pointer and then align the result.
+See CodeGen/SystemZ/alloca-01.ll for an example.
+
+--
+
+Atomic loads and stores use the default compare-and-swap based implementation.
+This is probably much too conservative in practice, and the overhead is
+especially bad for 8- and 16-bit accesses.
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
new file mode 100644
index 0000000..b811cbe
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -0,0 +1,77 @@
+//==- SystemZ.h - Top-Level Interface for SystemZ representation -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM SystemZ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZ_H
+#define SYSTEMZ_H
+
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/Support/CodeGen.h"
+
+namespace llvm {
+  class SystemZTargetMachine;
+  class FunctionPass;
+
+  namespace SystemZ {
+    // Condition-code mask values.
+    const unsigned CCMASK_0 = 1 << 3;
+    const unsigned CCMASK_1 = 1 << 2;
+    const unsigned CCMASK_2 = 1 << 1;
+    const unsigned CCMASK_3 = 1 << 0;
+    const unsigned CCMASK_ANY = CCMASK_0 | CCMASK_1 | CCMASK_2 | CCMASK_3;
+
+    // Condition-code mask assignments for floating-point comparisons.
+    const unsigned CCMASK_CMP_EQ = CCMASK_0;
+    const unsigned CCMASK_CMP_LT = CCMASK_1;
+    const unsigned CCMASK_CMP_GT = CCMASK_2;
+    const unsigned CCMASK_CMP_UO = CCMASK_3;
+    const unsigned CCMASK_CMP_NE = CCMASK_CMP_LT | CCMASK_CMP_GT;
+    const unsigned CCMASK_CMP_LE = CCMASK_CMP_EQ | CCMASK_CMP_LT;
+    const unsigned CCMASK_CMP_GE = CCMASK_CMP_EQ | CCMASK_CMP_GT;
+    const unsigned CCMASK_CMP_O  = CCMASK_ANY ^ CCMASK_CMP_UO;
+
+    // Return true if Val fits an LLILL operand.
+    static inline bool isImmLL(uint64_t Val) {
+      return (Val & ~0x000000000000ffffULL) == 0;
+    }
+
+    // Return true if Val fits an LLILH operand.
+    static inline bool isImmLH(uint64_t Val) {
+      return (Val & ~0x00000000ffff0000ULL) == 0;
+    }
+
+    // Return true if Val fits an LLIHL operand.
+    static inline bool isImmHL(uint64_t Val) {
+      return (Val & ~0x00000ffff00000000ULL) == 0;
+    }
+
+    // Return true if Val fits an LLIHH operand.
+    static inline bool isImmHH(uint64_t Val) {
+      return (Val & ~0xffff000000000000ULL) == 0;
+    }
+
+    // Return true if Val fits an LLILF operand.
+    static inline bool isImmLF(uint64_t Val) {
+      return (Val & ~0x00000000ffffffffULL) == 0;
+    }
+
+    // Return true if Val fits an LLIHF operand.
+    static inline bool isImmHF(uint64_t Val) {
+      return (Val & ~0xffffffff00000000ULL) == 0;
+    }
+  }
+
+  FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
+                                     CodeGenOpt::Level OptLevel);
+} // end namespace llvm;
+#endif
diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td
new file mode 100644
index 0000000..e03c32f
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZ.td
@@ -0,0 +1,75 @@
+//===-- SystemZ.td - Describe the SystemZ target machine -----*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// SystemZ supported processors
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"z10", []>;
+
+//===----------------------------------------------------------------------===//
+// Register file description
+//===----------------------------------------------------------------------===//
+
+include "SystemZRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling convention description
+//===----------------------------------------------------------------------===//
+
+include "SystemZCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction descriptions
+//===----------------------------------------------------------------------===//
+
+include "SystemZOperators.td"
+include "SystemZOperands.td"
+include "SystemZPatterns.td"
+include "SystemZInstrFormats.td"
+include "SystemZInstrInfo.td"
+include "SystemZInstrFP.td"
+
+def SystemZInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Assembly parser
+//===----------------------------------------------------------------------===//
+
+def SystemZAsmParser : AsmParser {
+  let ShouldEmitMatchRegisterName = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Assembly writer
+//===----------------------------------------------------------------------===//
+
+def SystemZAsmWriter : AsmWriter {
+  string AsmWriterClassName = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-level target declaration
+//===----------------------------------------------------------------------===//
+
+def SystemZ : Target {
+  let InstructionSet = SystemZInstrInfo;
+  let AssemblyParsers = [SystemZAsmParser];
+  let AssemblyWriters = [SystemZAsmWriter];
+}
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
new file mode 100644
index 0000000..1e15ab1
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -0,0 +1,113 @@
+//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly printer -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Streams SystemZ assembly language and associated data, in the form of
+// MCInsts and MCExprs respectively.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZAsmPrinter.h"
+#include "InstPrinter/SystemZInstPrinter.h"
+#include "SystemZConstantPoolValue.h"
+#include "SystemZMCInstLower.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  SystemZMCInstLower Lower(Mang, MF->getContext(), *this);
+  MCInst LoweredMI;
+  Lower.lower(MI, LoweredMI);
+  OutStreamer.EmitInstruction(LoweredMI);
+}
+
+// Convert a SystemZ-specific constant pool modifier into the associated
+// MCSymbolRefExpr variant kind.
+static MCSymbolRefExpr::VariantKind
+getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) {
+  switch (Modifier) {
+  case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF;
+  }
+  llvm_unreachable("Invalid SystemCPModifier!");
+}
+
+void SystemZAsmPrinter::
+EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+  SystemZConstantPoolValue *ZCPV =
+    static_cast<SystemZConstantPoolValue*>(MCPV);
+
+  const MCExpr *Expr =
+    MCSymbolRefExpr::Create(Mang->getSymbol(ZCPV->getGlobalValue()),
+                            getModifierVariantKind(ZCPV->getModifier()),
+                            OutContext);
+  uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
+
+  OutStreamer.EmitValue(Expr, Size);
+}
+
+bool SystemZAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
+                                        unsigned OpNo,
+                                        unsigned AsmVariant,
+                                        const char *ExtraCode,
+                                        raw_ostream &OS) {
+  if (ExtraCode && *ExtraCode == 'n') {
+    if (!MI->getOperand(OpNo).isImm())
+      return true;
+    OS << -int64_t(MI->getOperand(OpNo).getImm());
+  } else {
+    SystemZMCInstLower Lower(Mang, MF->getContext(), *this);
+    MCOperand MO(Lower.lowerOperand(MI->getOperand(OpNo)));
+    SystemZInstPrinter::printOperand(MO, OS);
+  }
+  return false;
+}
+
+bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                              unsigned OpNo,
+                                              unsigned AsmVariant,
+                                              const char *ExtraCode,
+                                              raw_ostream &OS) {
+  SystemZInstPrinter::printAddress(MI->getOperand(OpNo).getReg(),
+                                   MI->getOperand(OpNo + 1).getImm(),
+                                   MI->getOperand(OpNo + 2).getReg(), OS);
+  return false;
+}
+
+void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
+  if (Subtarget->isTargetELF()) {
+    const TargetLoweringObjectFileELF &TLOFELF =
+      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+      const DataLayout *TD = TM.getDataLayout();
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        OutStreamer.EmitLabel(Stubs[i].first);
+        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+                                    TD->getPointerSize(0), 0);
+      }
+      Stubs.clear();
+    }
+  }
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmPrinter() {
+  RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
+}
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h
new file mode 100644
index 0000000..4b6c51b
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -0,0 +1,52 @@
+//===-- SystemZAsmPrinter.h - SystemZ LLVM assembly printer ----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZASMPRINTER_H
+#define SYSTEMZASMPRINTER_H
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class MCStreamer;
+class MachineBasicBlock;
+class MachineInstr;
+class Module;
+class raw_ostream;
+
+class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter {
+private:
+  const SystemZSubtarget *Subtarget;
+
+public:
+  SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) {
+    Subtarget = &TM.getSubtarget<SystemZSubtarget>();
+  }
+
+  // Override AsmPrinter.
+  virtual const char *getPassName() const LLVM_OVERRIDE {
+    return "SystemZ Assembly Printer";
+  }
+  virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE;
+  virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV)
+    LLVM_OVERRIDE;
+  virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &OS) LLVM_OVERRIDE;
+  virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                                     unsigned AsmVariant,
+                                     const char *ExtraCode,
+                                     raw_ostream &OS) LLVM_OVERRIDE;
+  virtual void EmitEndOfAsmFile(Module &M) LLVM_OVERRIDE;
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZCallingConv.cpp b/lib/Target/SystemZ/SystemZCallingConv.cpp
new file mode 100644
index 0000000..cc9c84b
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZCallingConv.cpp
@@ -0,0 +1,21 @@
+//===-- SystemZCallingConv.cpp - Calling conventions for SystemZ ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZCallingConv.h"
+#include "SystemZRegisterInfo.h"
+
+using namespace llvm;
+
+const unsigned SystemZ::ArgGPRs[SystemZ::NumArgGPRs] = {
+  SystemZ::R2D, SystemZ::R3D, SystemZ::R4D, SystemZ::R5D, SystemZ::R6D
+};
+
+const unsigned SystemZ::ArgFPRs[SystemZ::NumArgFPRs] = {
+  SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
+};
diff --git a/lib/Target/SystemZ/SystemZCallingConv.h b/lib/Target/SystemZ/SystemZCallingConv.h
new file mode 100644
index 0000000..298985e
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZCallingConv.h
@@ -0,0 +1,23 @@
+//===-- SystemZCallingConv.h - Calling conventions for SystemZ --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZCALLINGCONV_H
+#define SYSTEMZCALLINGCONV_H
+
+namespace llvm {
+  namespace SystemZ {
+    const unsigned NumArgGPRs = 5;
+    extern const unsigned ArgGPRs[NumArgGPRs];
+
+    const unsigned NumArgFPRs = 4;
+    extern const unsigned ArgFPRs[NumArgFPRs];
+  }
+}
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
new file mode 100644
index 0000000..c2d727f
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZCallingConv.td
@@ -0,0 +1,65 @@
+//=- SystemZCallingConv.td - Calling conventions for SystemZ -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for the SystemZ ABI.
+//===----------------------------------------------------------------------===//
+
+class CCIfExtend<CCAction A>
+  : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
+
+//===----------------------------------------------------------------------===//
+// SVR4 return value calling convention
+//===----------------------------------------------------------------------===//
+def RetCC_SystemZ : CallingConv<[
+  // Promote i32 to i64 if it has an explicit extension type.
+  CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
+
+  // ABI-compliant code returns 64-bit integers in R2.  Make the other
+  // call-clobbered argument registers available for code that doesn't
+  // care about the ABI.  (R6 is an argument register too, but is
+  // call-saved and therefore not suitable for return values.)
+  CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W]>>,
+  CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
+
+  // ABI-complaint code returns float and double in F0.  Make the
+  // other floating-point argument registers available for code that
+  // doesn't care about the ABI.  All floating-point argument registers
+  // are call-clobbered, so we can use all of them here.
+  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+  CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>
+
+  // ABI-compliant code returns long double by reference, but that conversion
+  // is left to higher-level code.  Perhaps we could add an f128 definition
+  // here for code that doesn't care about the ABI?
+]>;
+
+//===----------------------------------------------------------------------===//
+// SVR4 argument calling conventions
+//===----------------------------------------------------------------------===//
+def CC_SystemZ : CallingConv<[
+  // Promote i32 to i64 if it has an explicit extension type.
+  // The convention is that true integer arguments that are smaller
+  // than 64 bits should be marked as extended, but structures that
+  // are smaller than 64 bits shouldn't.
+  CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
+
+  // Force long double values to the stack and pass i64 pointers to them.
+  CCIfType<[f128], CCPassIndirect<i64>>,
+
+  // The first 5 integer arguments are passed in R2-R6.  Note that R6
+  // is call-saved.
+  CCIfType<[i32], CCAssignToReg<[R2W, R3W, R4W, R5W, R6W]>>,
+  CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
+
+  // The first 4 float and double arguments are passed in even registers F0-F6.
+  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+  CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
+
+  // Other arguments are passed in 8-byte-aligned 8-byte stack slots.
+  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
new file mode 100644
index 0000000..e9c4f6d
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -0,0 +1,62 @@
+//===-- SystemZConstantPoolValue.cpp - SystemZ constant-pool value --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+SystemZConstantPoolValue::
+SystemZConstantPoolValue(const GlobalValue *gv,
+                         SystemZCP::SystemZCPModifier modifier)
+  : MachineConstantPoolValue(gv->getType()), GV(gv), Modifier(modifier) {}
+
+SystemZConstantPoolValue *
+SystemZConstantPoolValue::Create(const GlobalValue *GV,
+                                 SystemZCP::SystemZCPModifier Modifier) {
+  return new SystemZConstantPoolValue(GV, Modifier);
+}
+
+unsigned SystemZConstantPoolValue::getRelocationInfo() const {
+  switch (Modifier) {
+  case SystemZCP::NTPOFF:
+    // May require a relocation, but the relocations are always resolved
+    // by the static linker.
+    return 1;
+  }
+  llvm_unreachable("Unknown modifier");
+}
+
+int SystemZConstantPoolValue::
+getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) {
+  unsigned AlignMask = Alignment - 1;
+  const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+  for (unsigned I = 0, E = Constants.size(); I != E; ++I) {
+    if (Constants[I].isMachineConstantPoolEntry() &&
+        (Constants[I].getAlignment() & AlignMask) == 0) {
+      SystemZConstantPoolValue *ZCPV =
+        static_cast<SystemZConstantPoolValue *>(Constants[I].Val.MachineCPVal);
+      if (ZCPV->GV == GV && ZCPV->Modifier == Modifier)
+        return I;
+    }
+  }
+  return -1;
+}
+
+void SystemZConstantPoolValue::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
+  ID.AddPointer(GV);
+  ID.AddInteger(Modifier);
+}
+
+void SystemZConstantPoolValue::print(raw_ostream &O) const {
+  O << GV << "@" << int(Modifier);
+}
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.h b/lib/Target/SystemZ/SystemZConstantPoolValue.h
new file mode 100644
index 0000000..9927bdb
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -0,0 +1,55 @@
+//===- SystemZConstantPoolValue.h - SystemZ constant-pool value -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZCONSTANTPOOLVALUE_H
+#define SYSTEMZCONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+class GlobalValue;
+
+namespace SystemZCP {
+  enum SystemZCPModifier {
+    NTPOFF
+  };
+}
+
+/// A SystemZ-specific constant pool value.  At present, the only
+/// defined constant pool values are offsets of thread-local variables
+/// (written x@NTPOFF).
+class SystemZConstantPoolValue : public MachineConstantPoolValue {
+  const GlobalValue *GV;
+  SystemZCP::SystemZCPModifier Modifier;
+
+protected:
+  SystemZConstantPoolValue(const GlobalValue *GV,
+                           SystemZCP::SystemZCPModifier Modifier);
+
+public:
+  static SystemZConstantPoolValue *
+    Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier);
+
+  // Override MachineConstantPoolValue.
+  virtual unsigned getRelocationInfo() const LLVM_OVERRIDE;
+  virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+                                        unsigned Alignment) LLVM_OVERRIDE;
+  virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID) LLVM_OVERRIDE;
+  virtual void print(raw_ostream &O) const LLVM_OVERRIDE;
+
+  // Access SystemZ-specific fields.
+  const GlobalValue *getGlobalValue() const { return GV; }
+  SystemZCP::SystemZCPModifier getModifier() const { return Modifier; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
new file mode 100644
index 0000000..fda33de
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -0,0 +1,535 @@
+//===-- SystemZFrameLowering.cpp - Frame lowering for SystemZ -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZFrameLowering.h"
+#include "SystemZCallingConv.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+
+using namespace llvm;
+
+SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm,
+                                           const SystemZSubtarget &sti)
+  : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
+                        -SystemZMC::CallFrameSize),
+    TM(tm),
+    STI(sti) {
+  // The ABI-defined register save slots, relative to the incoming stack
+  // pointer.
+  static const unsigned SpillOffsetTable[][2] = {
+    { SystemZ::R2D,  0x10 },
+    { SystemZ::R3D,  0x18 },
+    { SystemZ::R4D,  0x20 },
+    { SystemZ::R5D,  0x28 },
+    { SystemZ::R6D,  0x30 },
+    { SystemZ::R7D,  0x38 },
+    { SystemZ::R8D,  0x40 },
+    { SystemZ::R9D,  0x48 },
+    { SystemZ::R10D, 0x50 },
+    { SystemZ::R11D, 0x58 },
+    { SystemZ::R12D, 0x60 },
+    { SystemZ::R13D, 0x68 },
+    { SystemZ::R14D, 0x70 },
+    { SystemZ::R15D, 0x78 },
+    { SystemZ::F0D,  0x80 },
+    { SystemZ::F2D,  0x88 },
+    { SystemZ::F4D,  0x90 },
+    { SystemZ::F6D,  0x98 }
+  };
+
+  // Create a mapping from register number to save slot offset.
+  RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+  for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
+    RegSpillOffsets[SpillOffsetTable[I][0]] = SpillOffsetTable[I][1];
+}
+
+void SystemZFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                     RegScavenger *RS) const {
+  MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+  bool HasFP = hasFP(MF);
+  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  bool IsVarArg = MF.getFunction()->isVarArg();
+
+  // va_start stores incoming FPR varargs in the normal way, but delegates
+  // the saving of incoming GPR varargs to spillCalleeSavedRegisters().
+  // Record these pending uses, which typically include the call-saved
+  // argument register R6D.
+  if (IsVarArg)
+    for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
+      MRI.setPhysRegUsed(SystemZ::ArgGPRs[I]);
+
+  // If the function requires a frame pointer, record that the hard
+  // frame pointer will be clobbered.
+  if (HasFP)
+    MRI.setPhysRegUsed(SystemZ::R11D);
+
+  // If the function calls other functions, record that the return
+  // address register will be clobbered.
+  if (MFFrame->hasCalls())
+    MRI.setPhysRegUsed(SystemZ::R14D);
+
+  // If we are saving GPRs other than the stack pointer, we might as well
+  // save and restore the stack pointer at the same time, via STMG and LMG.
+  // This allows the deallocation to be done by the LMG, rather than needing
+  // a separate %r15 addition.
+  const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
+  for (unsigned I = 0; CSRegs[I]; ++I) {
+    unsigned Reg = CSRegs[I];
+    if (SystemZ::GR64BitRegClass.contains(Reg) && MRI.isPhysRegUsed(Reg)) {
+      MRI.setPhysRegUsed(SystemZ::R15D);
+      break;
+    }
+  }
+}
+
+// Add GPR64 to the save instruction being built by MIB, which is in basic
+// block MBB.  IsImplicit says whether this is an explicit operand to the
+// instruction, or an implicit one that comes between the explicit start
+// and end registers.
+static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
+                        const SystemZTargetMachine &TM,
+                        unsigned GPR64, bool IsImplicit) {
+  const SystemZRegisterInfo *RI = TM.getRegisterInfo();
+  unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_32bit);
+  bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32);
+  if (!IsLive || !IsImplicit) {
+    MIB.addReg(GPR64, getImplRegState(IsImplicit) | getKillRegState(!IsLive));
+    if (!IsLive)
+      MBB.addLiveIn(GPR64);
+  }
+}
+
+bool SystemZFrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          const std::vector<CalleeSavedInfo> &CSI,
+                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+  SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  bool IsVarArg = MF.getFunction()->isVarArg();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Scan the call-saved GPRs and find the bounds of the register spill area.
+  unsigned SavedGPRFrameSize = 0;
+  unsigned LowGPR = 0;
+  unsigned HighGPR = SystemZ::R15D;
+  unsigned StartOffset = -1U;
+  for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+    unsigned Reg = CSI[I].getReg();
+    if (SystemZ::GR64BitRegClass.contains(Reg)) {
+      SavedGPRFrameSize += 8;
+      unsigned Offset = RegSpillOffsets[Reg];
+      assert(Offset && "Unexpected GPR save");
+      if (StartOffset > Offset) {
+        LowGPR = Reg;
+        StartOffset = Offset;
+      }
+    }
+  }
+
+  // Save information about the range and location of the call-saved
+  // registers, for use by the epilogue inserter.
+  ZFI->setSavedGPRFrameSize(SavedGPRFrameSize);
+  ZFI->setLowSavedGPR(LowGPR);
+  ZFI->setHighSavedGPR(HighGPR);
+
+  // Include the GPR varargs, if any.  R6D is call-saved, so would
+  // be included by the loop above, but we also need to handle the
+  // call-clobbered argument registers.
+  if (IsVarArg) {
+    unsigned FirstGPR = ZFI->getVarArgsFirstGPR();
+    if (FirstGPR < SystemZ::NumArgGPRs) {
+      unsigned Reg = SystemZ::ArgGPRs[FirstGPR];
+      unsigned Offset = RegSpillOffsets[Reg];
+      if (StartOffset > Offset) {
+        LowGPR = Reg; StartOffset = Offset;
+      }
+    }
+  }
+
+  // Save GPRs
+  if (LowGPR) {
+    assert(LowGPR != HighGPR && "Should be saving %r15 and something else");
+
+    // Build an STMG instruction.
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG));
+
+    // Add the explicit register operands.
+    addSavedGPR(MBB, MIB, TM, LowGPR, false);
+    addSavedGPR(MBB, MIB, TM, HighGPR, false);
+
+    // Add the address.
+    MIB.addReg(SystemZ::R15D).addImm(StartOffset);
+
+    // Make sure all call-saved GPRs are included as operands and are
+    // marked as live on entry.
+    for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+      unsigned Reg = CSI[I].getReg();
+      if (SystemZ::GR64BitRegClass.contains(Reg))
+        addSavedGPR(MBB, MIB, TM, Reg, true);
+    }
+
+    // ...likewise GPR varargs.
+    if (IsVarArg)
+      for (unsigned I = ZFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
+        addSavedGPR(MBB, MIB, TM, SystemZ::ArgGPRs[I], true);
+  }
+
+  // Save FPRs in the normal TargetInstrInfo way.
+  for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+    unsigned Reg = CSI[I].getReg();
+    if (SystemZ::FP64BitRegClass.contains(Reg)) {
+      MBB.addLiveIn(Reg);
+      TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+                               &SystemZ::FP64BitRegClass, TRI);
+    }
+  }
+
+  return true;
+}
+
+bool SystemZFrameLowering::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            const std::vector<CalleeSavedInfo> &CSI,
+                            const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+  SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  bool HasFP = hasFP(MF);
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Restore FPRs in the normal TargetInstrInfo way.
+  for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+    unsigned Reg = CSI[I].getReg();
+    if (SystemZ::FP64BitRegClass.contains(Reg))
+      TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+                                &SystemZ::FP64BitRegClass, TRI);
+  }
+
+  // Restore call-saved GPRs (but not call-clobbered varargs, which at
+  // this point might hold return values).
+  unsigned LowGPR = ZFI->getLowSavedGPR();
+  unsigned HighGPR = ZFI->getHighSavedGPR();
+  unsigned StartOffset = RegSpillOffsets[LowGPR];
+  if (LowGPR) {
+    // If we saved any of %r2-%r5 as varargs, we should also be saving
+    // and restoring %r6.  If we're saving %r6 or above, we should be
+    // restoring it too.
+    assert(LowGPR != HighGPR && "Should be loading %r15 and something else");
+
+    // Build an LMG instruction.
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG));
+
+    // Add the explicit register operands.
+    MIB.addReg(LowGPR, RegState::Define);
+    MIB.addReg(HighGPR, RegState::Define);
+
+    // Add the address.
+    MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D);
+    MIB.addImm(StartOffset);
+
+    // Do a second scan adding regs as being defined by instruction
+    for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+      unsigned Reg = CSI[I].getReg();
+      if (Reg != LowGPR && Reg != HighGPR)
+        MIB.addReg(Reg, RegState::ImplicitDefine);
+    }
+  }
+
+  return true;
+}
+
+// Emit instructions before MBBI (in MBB) to add NumBytes to Reg.
+static void emitIncrement(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator &MBBI,
+                          const DebugLoc &DL,
+                          unsigned Reg, int64_t NumBytes,
+                          const TargetInstrInfo *TII) {
+  while (NumBytes) {
+    unsigned Opcode;
+    int64_t ThisVal = NumBytes;
+    if (isInt<16>(NumBytes))
+      Opcode = SystemZ::AGHI;
+    else {
+      Opcode = SystemZ::AGFI;
+      // Make sure we maintain 8-byte stack alignment.
+      int64_t MinVal = -int64_t(1) << 31;
+      int64_t MaxVal = (int64_t(1) << 31) - 8;
+      if (ThisVal < MinVal)
+        ThisVal = MinVal;
+      else if (ThisVal > MaxVal)
+        ThisVal = MaxVal;
+    }
+    MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII->get(Opcode), Reg)
+      .addReg(Reg).addImm(ThisVal);
+    // The PSW implicit def is dead.
+    MI->getOperand(3).setIsDead();
+    NumBytes -= ThisVal;
+  }
+}
+
+void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  const SystemZInstrInfo *ZII =
+    static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+  SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineModuleInfo &MMI = MF.getMMI();
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  const std::vector<CalleeSavedInfo> &CSI = MFFrame->getCalleeSavedInfo();
+  bool HasFP = hasFP(MF);
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // The current offset of the stack pointer from the CFA.
+  int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
+
+  if (ZFI->getLowSavedGPR()) {
+    // Skip over the GPR saves.
+    if (MBBI != MBB.end() && MBBI->getOpcode() == SystemZ::STMG)
+      ++MBBI;
+    else
+      llvm_unreachable("Couldn't skip over GPR saves");
+
+    // Add CFI for the GPR saves.
+    MCSymbol *GPRSaveLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL,
+            ZII->get(TargetOpcode::PROLOG_LABEL)).addSym(GPRSaveLabel);
+    for (std::vector<CalleeSavedInfo>::const_iterator
+           I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+      unsigned Reg = I->getReg();
+      if (SystemZ::GR64BitRegClass.contains(Reg)) {
+        int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg];
+        MachineLocation StackSlot(MachineLocation::VirtualFP, Offset);
+        MachineLocation RegValue(Reg);
+        Moves.push_back(MachineMove(GPRSaveLabel, StackSlot, RegValue));
+      }
+    }
+  }
+
+  uint64_t StackSize = getAllocatedStackSize(MF);
+  if (StackSize) {
+    // Allocate StackSize bytes.
+    int64_t Delta = -int64_t(StackSize);
+    emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
+
+    // Add CFI for the allocation.
+    MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL))
+      .addSym(AdjustSPLabel);
+    MachineLocation FPDest(MachineLocation::VirtualFP);
+    MachineLocation FPSrc(MachineLocation::VirtualFP, SPOffsetFromCFA + Delta);
+    Moves.push_back(MachineMove(AdjustSPLabel, FPDest, FPSrc));
+    SPOffsetFromCFA += Delta;
+  }
+
+  if (HasFP) {
+    // Copy the base of the frame to R11.
+    BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR), SystemZ::R11D)
+      .addReg(SystemZ::R15D);
+
+    // Add CFI for the new frame location.
+    MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL))
+      .addSym(SetFPLabel);
+    MachineLocation HardFP(SystemZ::R11D);
+    MachineLocation VirtualFP(MachineLocation::VirtualFP);
+    Moves.push_back(MachineMove(SetFPLabel, HardFP, VirtualFP));
+
+    // Mark the FramePtr as live at the beginning of every block except
+    // the entry block.  (We'll have marked R11 as live on entry when
+    // saving the GPRs.)
+    for (MachineFunction::iterator
+           I = llvm::next(MF.begin()), E = MF.end(); I != E; ++I)
+      I->addLiveIn(SystemZ::R11D);
+  }
+
+  // Skip over the FPR saves.
+  MCSymbol *FPRSaveLabel = 0;
+  for (std::vector<CalleeSavedInfo>::const_iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+    unsigned Reg = I->getReg();
+    if (SystemZ::FP64BitRegClass.contains(Reg)) {
+      if (MBBI != MBB.end() &&
+          (MBBI->getOpcode() == SystemZ::STD ||
+           MBBI->getOpcode() == SystemZ::STDY))
+        ++MBBI;
+      else
+        llvm_unreachable("Couldn't skip over FPR save");
+
+      // Add CFI for the this save.
+      if (!FPRSaveLabel)
+        FPRSaveLabel = MMI.getContext().CreateTempSymbol();
+      unsigned Reg = I->getReg();
+      int64_t Offset = getFrameIndexOffset(MF, I->getFrameIdx());
+      MachineLocation Slot(MachineLocation::VirtualFP,
+                           SPOffsetFromCFA + Offset);
+      MachineLocation RegValue(Reg);
+      Moves.push_back(MachineMove(FPRSaveLabel, Slot, RegValue));
+    }
+  }
+  // Complete the CFI for the FPR saves, modelling them as taking effect
+  // after the last save.
+  if (FPRSaveLabel)
+    BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::PROLOG_LABEL))
+      .addSym(FPRSaveLabel);
+}
+
+void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
+                                        MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const SystemZInstrInfo *ZII =
+    static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+  SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+
+  // Skip the return instruction.
+  assert(MBBI->getOpcode() == SystemZ::RET &&
+         "Can only insert epilogue into returning blocks");
+
+  uint64_t StackSize = getAllocatedStackSize(MF);
+  if (ZFI->getLowSavedGPR()) {
+    --MBBI;
+    unsigned Opcode = MBBI->getOpcode();
+    if (Opcode != SystemZ::LMG)
+      llvm_unreachable("Expected to see callee-save register restore code");
+
+    unsigned AddrOpNo = 2;
+    DebugLoc DL = MBBI->getDebugLoc();
+    uint64_t Offset = StackSize + MBBI->getOperand(AddrOpNo + 1).getImm();
+    unsigned NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset);
+
+    // If the offset is too large, use the largest stack-aligned offset
+    // and add the rest to the base register (the stack or frame pointer).
+    if (!NewOpcode) {
+      uint64_t NumBytes = Offset - 0x7fff8;
+      emitIncrement(MBB, MBBI, DL, MBBI->getOperand(AddrOpNo).getReg(),
+                    NumBytes, ZII);
+      Offset -= NumBytes;
+      NewOpcode = ZII->getOpcodeForOffset(Opcode, Offset);
+      assert(NewOpcode && "No restore instruction available");
+    }
+
+    MBBI->setDesc(ZII->get(NewOpcode));
+    MBBI->getOperand(AddrOpNo + 1).ChangeToImmediate(Offset);
+  } else if (StackSize) {
+    DebugLoc DL = MBBI->getDebugLoc();
+    emitIncrement(MBB, MBBI, DL, SystemZ::R15D, StackSize, ZII);
+  }
+}
+
+bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
+  return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
+          MF.getFrameInfo()->hasVarSizedObjects() ||
+          MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP());
+}
+
+int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                              int FI) const {
+  const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+
+  // Start with the offset of FI from the top of the caller-allocated frame
+  // (i.e. the top of the 160 bytes allocated by the caller).  This initial
+  // offset is therefore negative.
+  int64_t Offset = (MFFrame->getObjectOffset(FI) +
+                    MFFrame->getOffsetAdjustment());
+  if (FI >= 0)
+    // Non-fixed objects are allocated below the incoming stack pointer.
+    // Account for the space at the top of the frame that we choose not
+    // to allocate.
+    Offset += getUnallocatedTopBytes(MF);
+
+  // Make the offset relative to the incoming stack pointer.
+  Offset -= getOffsetOfLocalArea();
+
+  // Make the offset relative to the bottom of the frame.
+  Offset += getAllocatedStackSize(MF);
+
+  return Offset;
+}
+
+uint64_t SystemZFrameLowering::
+getUnallocatedTopBytes(const MachineFunction &MF) const {
+  return MF.getInfo<SystemZMachineFunctionInfo>()->getSavedGPRFrameSize();
+}
+
+uint64_t SystemZFrameLowering::
+getAllocatedStackSize(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+
+  // Start with the size of the local variables and spill slots.
+  uint64_t StackSize = MFFrame->getStackSize();
+
+  // Remove any bytes that we choose not to allocate.
+  StackSize -= getUnallocatedTopBytes(MF);
+
+  // Include space for an emergency spill slot, if one might be needed.
+  StackSize += getEmergencySpillSlotSize(MF);
+
+  // We need to allocate the ABI-defined 160-byte base area whenever
+  // we allocate stack space for our own use and whenever we call another
+  // function.
+  if (StackSize || MFFrame->hasVarSizedObjects() || MFFrame->hasCalls())
+    StackSize += SystemZMC::CallFrameSize;
+
+  return StackSize;
+}
+
+unsigned SystemZFrameLowering::
+getEmergencySpillSlotSize(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  uint64_t MaxReach = MFFrame->getStackSize() + SystemZMC::CallFrameSize * 2;
+  return isUInt<12>(MaxReach) ? 0 : 8;
+}
+
+unsigned SystemZFrameLowering::
+getEmergencySpillSlotOffset(const MachineFunction &MF) const {
+  assert(getEmergencySpillSlotSize(MF) && "No emergency spill slot");
+  return SystemZMC::CallFrameSize;
+}
+
+bool
+SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  // The ABI requires us to allocate 160 bytes of stack space for the callee,
+  // with any outgoing stack arguments being placed above that.  It seems
+  // better to make that area a permanent feature of the frame even if
+  // we're using a frame pointer.
+  return true;
+}
+
+void SystemZFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF,
+                              MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MI) const {
+  switch (MI->getOpcode()) {
+  case SystemZ::ADJCALLSTACKDOWN:
+  case SystemZ::ADJCALLSTACKUP:
+    assert(hasReservedCallFrame(MF) &&
+           "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
+    MBB.erase(MI);
+    break;
+
+  default:
+    llvm_unreachable("Unexpected call frame instruction");
+  }
+}
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
new file mode 100644
index 0000000..5ca049c
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -0,0 +1,93 @@
+//===-- SystemZFrameLowering.h - Frame lowering for SystemZ -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZFRAMELOWERING_H
+#define SYSTEMZFRAMELOWERING_H
+
+#include "SystemZSubtarget.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+class SystemZTargetMachine;
+class SystemZSubtarget;
+
+class SystemZFrameLowering : public TargetFrameLowering {
+  IndexedMap<unsigned> RegSpillOffsets;
+
+protected:
+  const SystemZTargetMachine &TM;
+  const SystemZSubtarget &STI;
+
+public:
+  SystemZFrameLowering(const SystemZTargetMachine &tm,
+                       const SystemZSubtarget &sti);
+
+  // Override FrameLowering.
+  virtual void
+    processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                         RegScavenger *RS) const LLVM_OVERRIDE;
+  virtual bool
+    spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI,
+                              const std::vector<CalleeSavedInfo> &CSI,
+                              const TargetRegisterInfo *TRI) const
+    LLVM_OVERRIDE;
+  virtual bool
+    restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBII,
+                                const std::vector<CalleeSavedInfo> &CSI,
+                                const TargetRegisterInfo *TRI) const
+    LLVM_OVERRIDE;
+  virtual void emitPrologue(MachineFunction &MF) const LLVM_OVERRIDE;
+  virtual void emitEpilogue(MachineFunction &MF,
+                            MachineBasicBlock &MBB) const LLVM_OVERRIDE;
+  virtual bool hasFP(const MachineFunction &MF) const LLVM_OVERRIDE;
+  virtual int getFrameIndexOffset(const MachineFunction &MF,
+                                  int FI) const LLVM_OVERRIDE;
+  virtual bool hasReservedCallFrame(const MachineFunction &MF) const
+    LLVM_OVERRIDE;
+  virtual void
+  eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI) const
+    LLVM_OVERRIDE;
+
+  // The target-independent code automatically allocates save slots for
+  // call-saved GPRs.  However, we don't need those slots for SystemZ,
+  // because the ABI sets aside GPR save slots in the caller-allocated part
+  // of the frame.  Since the target-independent code puts this unneeded
+  // area at the top of the callee-allocated part of frame, we choose not
+  // to allocate it and adjust the offsets accordingly.  Return the
+  // size of this unallocated area.
+  // FIXME: seems a bit hackish.
+  uint64_t getUnallocatedTopBytes(const MachineFunction &MF) const;
+
+  // Return the number of bytes in the callee-allocated part of the frame.
+  uint64_t getAllocatedStackSize(const MachineFunction &MF) const;
+
+  // Return the number of frame bytes that should be reserved for
+  // an emergency spill slot, for use by the register scaveneger.
+  // Return 0 if register scaveging won't be needed.
+  unsigned getEmergencySpillSlotSize(const MachineFunction &MF) const;
+
+  // Return the offset from the frame pointer of the emergency spill slot,
+  // which always fits within a 12-bit unsigned displacement field.
+  // Only valid if getEmergencySpillSlotSize(MF) returns nonzero.
+  unsigned getEmergencySpillSlotOffset(const MachineFunction &MF) const;
+
+  // Return the byte offset from the incoming stack pointer of Reg's
+  // ABI-defined save slot.  Return 0 if no slot is defined for Reg.
+  unsigned getRegSpillOffset(unsigned Reg) const {
+    return RegSpillOffsets[Reg];
+  }
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
new file mode 100644
index 0000000..d436ba9
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -0,0 +1,616 @@
+//===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SystemZ target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+// Used to build addressing modes.
+struct SystemZAddressingMode {
+  // The shape of the address.
+  enum AddrForm {
+    // base+displacement
+    FormBD,
+
+    // base+displacement+index for load and store operands
+    FormBDXNormal,
+
+    // base+displacement+index for load address operands
+    FormBDXLA,
+
+    // base+displacement+index+ADJDYNALLOC
+    FormBDXDynAlloc
+  };
+  AddrForm Form;
+
+  // The type of displacement.  The enum names here correspond directly
+  // to the definitions in SystemZOperand.td.  We could split them into
+  // flags -- single/pair, 128-bit, etc. -- but it hardly seems worth it.
+  enum DispRange {
+    Disp12Only,
+    Disp12Pair,
+    Disp20Only,
+    Disp20Only128,
+    Disp20Pair
+  };
+  DispRange DR;
+
+  // The parts of the address.  The address is equivalent to:
+  //
+  //     Base + Disp + Index + (IncludesDynAlloc ? ADJDYNALLOC : 0)
+  SDValue Base;
+  int64_t Disp;
+  SDValue Index;
+  bool IncludesDynAlloc;
+
+  SystemZAddressingMode(AddrForm form, DispRange dr)
+    : Form(form), DR(dr), Base(), Disp(0), Index(),
+      IncludesDynAlloc(false) {}
+
+  // True if the address can have an index register.
+  bool hasIndexField() { return Form != FormBD; }
+
+  // True if the address can (and must) include ADJDYNALLOC.
+  bool isDynAlloc() { return Form == FormBDXDynAlloc; }
+
+  void dump() {
+    errs() << "SystemZAddressingMode " << this << '\n';
+
+    errs() << " Base ";
+    if (Base.getNode() != 0)
+      Base.getNode()->dump();
+    else
+      errs() << "null\n";
+
+    if (hasIndexField()) {
+      errs() << " Index ";
+      if (Index.getNode() != 0)
+        Index.getNode()->dump();
+      else
+        errs() << "null\n";
+    }
+
+    errs() << " Disp " << Disp;
+    if (IncludesDynAlloc)
+      errs() << " + ADJDYNALLOC";
+    errs() << '\n';
+  }
+};
+
+class SystemZDAGToDAGISel : public SelectionDAGISel {
+  const SystemZTargetLowering &Lowering;
+  const SystemZSubtarget &Subtarget;
+
+  // Used by SystemZOperands.td to create integer constants.
+  inline SDValue getImm(const SDNode *Node, uint64_t Imm) {
+    return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
+  }
+
+  // Try to fold more of the base or index of AM into AM, where IsBase
+  // selects between the base and index.
+  bool expandAddress(SystemZAddressingMode &AM, bool IsBase);
+
+  // Try to describe N in AM, returning true on success.
+  bool selectAddress(SDValue N, SystemZAddressingMode &AM);
+
+  // Extract individual target operands from matched address AM.
+  void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
+                          SDValue &Base, SDValue &Disp);
+  void getAddressOperands(const SystemZAddressingMode &AM, EVT VT,
+                          SDValue &Base, SDValue &Disp, SDValue &Index);
+
+  // Try to match Addr as a FormBD address with displacement type DR.
+  // Return true on success, storing the base and displacement in
+  // Base and Disp respectively.
+  bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr,
+                    SDValue &Base, SDValue &Disp);
+
+  // Try to match Addr as a FormBDX* address of form Form with
+  // displacement type DR.  Return true on success, storing the base,
+  // displacement and index in Base, Disp and Index respectively.
+  bool selectBDXAddr(SystemZAddressingMode::AddrForm Form,
+                     SystemZAddressingMode::DispRange DR, SDValue Addr,
+                     SDValue &Base, SDValue &Disp, SDValue &Index);
+
+  // PC-relative address matching routines used by SystemZOperands.td.
+  bool selectPCRelAddress(SDValue Addr, SDValue &Target) {
+    if (Addr.getOpcode() == SystemZISD::PCREL_WRAPPER) {
+      Target = Addr.getOperand(0);
+      return true;
+    }
+    return false;
+  }
+
+  // BD matching routines used by SystemZOperands.td.
+  bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) {
+    return selectBDAddr(SystemZAddressingMode::Disp12Only, Addr, Base, Disp);
+  }
+  bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+    return selectBDAddr(SystemZAddressingMode::Disp12Pair, Addr, Base, Disp);
+  }
+  bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) {
+    return selectBDAddr(SystemZAddressingMode::Disp20Only, Addr, Base, Disp);
+  }
+  bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) {
+    return selectBDAddr(SystemZAddressingMode::Disp20Pair, Addr, Base, Disp);
+  }
+
+  // BDX matching routines used by SystemZOperands.td.
+  bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
+                           SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+                         SystemZAddressingMode::Disp12Only,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+                           SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+                         SystemZAddressingMode::Disp12Pair,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp,
+                            SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXDynAlloc,
+                         SystemZAddressingMode::Disp12Only,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp,
+                           SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+                         SystemZAddressingMode::Disp20Only,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp,
+                              SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+                         SystemZAddressingMode::Disp20Only128,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+                           SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXNormal,
+                         SystemZAddressingMode::Disp20Pair,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+                          SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
+                         SystemZAddressingMode::Disp12Pair,
+                         Addr, Base, Disp, Index);
+  }
+  bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp,
+                          SDValue &Index) {
+    return selectBDXAddr(SystemZAddressingMode::FormBDXLA,
+                         SystemZAddressingMode::Disp20Pair,
+                         Addr, Base, Disp, Index);
+  }
+
+  // If Op0 is null, then Node is a constant that can be loaded using:
+  //
+  //   (Opcode UpperVal LowerVal)
+  //
+  // If Op0 is nonnull, then Node can be implemented using:
+  //
+  //   (Opcode (Opcode Op0 UpperVal) LowerVal)
+  SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
+                              uint64_t UpperVal, uint64_t LowerVal);
+
+public:
+  SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
+    : SelectionDAGISel(TM, OptLevel),
+      Lowering(*TM.getTargetLowering()),
+      Subtarget(*TM.getSubtargetImpl()) { }
+
+  // Override MachineFunctionPass.
+  virtual const char *getPassName() const LLVM_OVERRIDE {
+    return "SystemZ DAG->DAG Pattern Instruction Selection";
+  }
+
+  // Override SelectionDAGISel.
+  virtual SDNode *Select(SDNode *Node) LLVM_OVERRIDE;
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps)
+    LLVM_OVERRIDE;
+
+  // Include the pieces autogenerated from the target description.
+  #include "SystemZGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
+                                         CodeGenOpt::Level OptLevel) {
+  return new SystemZDAGToDAGISel(TM, OptLevel);
+}
+
+// Return true if Val should be selected as a displacement for an address
+// with range DR.  Here we're interested in the range of both the instruction
+// described by DR and of any pairing instruction.
+static bool selectDisp(SystemZAddressingMode::DispRange DR, int64_t Val) {
+  switch (DR) {
+  case SystemZAddressingMode::Disp12Only:
+    return isUInt<12>(Val);
+
+  case SystemZAddressingMode::Disp12Pair:
+  case SystemZAddressingMode::Disp20Only:
+  case SystemZAddressingMode::Disp20Pair:
+    return isInt<20>(Val);
+
+  case SystemZAddressingMode::Disp20Only128:
+    return isInt<20>(Val) && isInt<20>(Val + 8);
+  }
+  llvm_unreachable("Unhandled displacement range");
+}
+
+// Change the base or index in AM to Value, where IsBase selects
+// between the base and index.
+static void changeComponent(SystemZAddressingMode &AM, bool IsBase,
+                            SDValue Value) {
+  if (IsBase)
+    AM.Base = Value;
+  else
+    AM.Index = Value;
+}
+
+// The base or index of AM is equivalent to Value + ADJDYNALLOC,
+// where IsBase selects between the base and index.  Try to fold the
+// ADJDYNALLOC into AM.
+static bool expandAdjDynAlloc(SystemZAddressingMode &AM, bool IsBase,
+                              SDValue Value) {
+  if (AM.isDynAlloc() && !AM.IncludesDynAlloc) {
+    changeComponent(AM, IsBase, Value);
+    AM.IncludesDynAlloc = true;
+    return true;
+  }
+  return false;
+}
+
+// The base of AM is equivalent to Base + Index.  Try to use Index as
+// the index register.
+static bool expandIndex(SystemZAddressingMode &AM, SDValue Base,
+                        SDValue Index) {
+  if (AM.hasIndexField() && !AM.Index.getNode()) {
+    AM.Base = Base;
+    AM.Index = Index;
+    return true;
+  }
+  return false;
+}
+
+// The base or index of AM is equivalent to Op0 + Op1, where IsBase selects
+// between the base and index.  Try to fold Op1 into AM's displacement.
+static bool expandDisp(SystemZAddressingMode &AM, bool IsBase,
+                       SDValue Op0, ConstantSDNode *Op1) {
+  // First try adjusting the displacement.
+  int64_t TestDisp = AM.Disp + Op1->getSExtValue();
+  if (selectDisp(AM.DR, TestDisp)) {
+    changeComponent(AM, IsBase, Op0);
+    AM.Disp = TestDisp;
+    return true;
+  }
+
+  // We could consider forcing the displacement into a register and
+  // using it as an index, but it would need to be carefully tuned.
+  return false;
+}
+
+bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM,
+                                        bool IsBase) {
+  SDValue N = IsBase ? AM.Base : AM.Index;
+  unsigned Opcode = N.getOpcode();
+  if (Opcode == ISD::TRUNCATE) {
+    N = N.getOperand(0);
+    Opcode = N.getOpcode();
+  }
+  if (Opcode == ISD::ADD || CurDAG->isBaseWithConstantOffset(N)) {
+    SDValue Op0 = N.getOperand(0);
+    SDValue Op1 = N.getOperand(1);
+
+    unsigned Op0Code = Op0->getOpcode();
+    unsigned Op1Code = Op1->getOpcode();
+
+    if (Op0Code == SystemZISD::ADJDYNALLOC)
+      return expandAdjDynAlloc(AM, IsBase, Op1);
+    if (Op1Code == SystemZISD::ADJDYNALLOC)
+      return expandAdjDynAlloc(AM, IsBase, Op0);
+
+    if (Op0Code == ISD::Constant)
+      return expandDisp(AM, IsBase, Op1, cast<ConstantSDNode>(Op0));
+    if (Op1Code == ISD::Constant)
+      return expandDisp(AM, IsBase, Op0, cast<ConstantSDNode>(Op1));
+
+    if (IsBase && expandIndex(AM, Op0, Op1))
+      return true;
+  }
+  return false;
+}
+
+// Return true if an instruction with displacement range DR should be
+// used for displacement value Val.  selectDisp(DR, Val) must already hold.
+static bool isValidDisp(SystemZAddressingMode::DispRange DR, int64_t Val) {
+  assert(selectDisp(DR, Val) && "Invalid displacement");
+  switch (DR) {
+  case SystemZAddressingMode::Disp12Only:
+  case SystemZAddressingMode::Disp20Only:
+  case SystemZAddressingMode::Disp20Only128:
+    return true;
+
+  case SystemZAddressingMode::Disp12Pair:
+    // Use the other instruction if the displacement is too large.
+    return isUInt<12>(Val);
+
+  case SystemZAddressingMode::Disp20Pair:
+    // Use the other instruction if the displacement is small enough.
+    return !isUInt<12>(Val);
+  }
+  llvm_unreachable("Unhandled displacement range");
+}
+
+// Return true if Base + Disp + Index should be performed by LA(Y).
+static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) {
+  // Don't use LA(Y) for constants.
+  if (!Base)
+    return false;
+
+  // Always use LA(Y) for frame addresses, since we know that the destination
+  // register is almost always (perhaps always) going to be different from
+  // the frame register.
+  if (Base->getOpcode() == ISD::FrameIndex)
+    return true;
+
+  if (Disp) {
+    // Always use LA(Y) if there is a base, displacement and index.
+    if (Index)
+      return true;
+
+    // Always use LA if the displacement is small enough.  It should always
+    // be no worse than AGHI (and better if it avoids a move).
+    if (isUInt<12>(Disp))
+      return true;
+
+    // For similar reasons, always use LAY if the constant is too big for AGHI.
+    // LAY should be no worse than AGFI.
+    if (!isInt<16>(Disp))
+      return true;
+  } else {
+    // Don't use LA for plain registers.
+    if (!Index)
+      return false;
+
+    // Don't use LA for plain addition if the index operand is only used
+    // once.  It should be a natural two-operand addition in that case.
+    if (Index->hasOneUse())
+      return false;
+
+    // Prefer addition if the second operation is sign-extended, in the
+    // hope of using AGF.
+    unsigned IndexOpcode = Index->getOpcode();
+    if (IndexOpcode == ISD::SIGN_EXTEND ||
+        IndexOpcode == ISD::SIGN_EXTEND_INREG)
+      return false;
+  }
+
+  // Don't use LA for two-operand addition if either operand is only
+  // used once.  The addition instructions are better in that case.
+  if (Base->hasOneUse())
+    return false;
+
+  return true;
+}
+
+// Return true if Addr is suitable for AM, updating AM if so.
+bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
+                                        SystemZAddressingMode &AM) {
+  // Start out assuming that the address will need to be loaded separately,
+  // then try to extend it as much as we can.
+  AM.Base = Addr;
+
+  // First try treating the address as a constant.
+  if (Addr.getOpcode() == ISD::Constant &&
+      expandDisp(AM, true, SDValue(), cast<ConstantSDNode>(Addr)))
+    ;
+  else
+    // Otherwise try expanding each component.
+    while (expandAddress(AM, true) ||
+           (AM.Index.getNode() && expandAddress(AM, false)))
+      continue;
+
+  // Reject cases where it isn't profitable to use LA(Y).
+  if (AM.Form == SystemZAddressingMode::FormBDXLA &&
+      !shouldUseLA(AM.Base.getNode(), AM.Disp, AM.Index.getNode()))
+    return false;
+
+  // Reject cases where the other instruction in a pair should be used.
+  if (!isValidDisp(AM.DR, AM.Disp))
+    return false;
+
+  // Make sure that ADJDYNALLOC is included where necessary.
+  if (AM.isDynAlloc() && !AM.IncludesDynAlloc)
+    return false;
+
+  DEBUG(AM.dump());
+  return true;
+}
+
+// Insert a node into the DAG at least before Pos.  This will reposition
+// the node as needed, and will assign it a node ID that is <= Pos's ID.
+// Note that this does *not* preserve the uniqueness of node IDs!
+// The selection DAG must no longer depend on their uniqueness when this
+// function is used.
+static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) {
+  if (N.getNode()->getNodeId() == -1 ||
+      N.getNode()->getNodeId() > Pos->getNodeId()) {
+    DAG->RepositionNode(Pos, N.getNode());
+    N.getNode()->setNodeId(Pos->getNodeId());
+  }
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
+                                             EVT VT, SDValue &Base,
+                                             SDValue &Disp) {
+  Base = AM.Base;
+  if (!Base.getNode())
+    // Register 0 means "no base".  This is mostly useful for shifts.
+    Base = CurDAG->getRegister(0, VT);
+  else if (Base.getOpcode() == ISD::FrameIndex) {
+    // Lower a FrameIndex to a TargetFrameIndex.
+    int64_t FrameIndex = cast<FrameIndexSDNode>(Base)->getIndex();
+    Base = CurDAG->getTargetFrameIndex(FrameIndex, VT);
+  } else if (Base.getValueType() != VT) {
+    // Truncate values from i64 to i32, for shifts.
+    assert(VT == MVT::i32 && Base.getValueType() == MVT::i64 &&
+           "Unexpected truncation");
+    DebugLoc DL = Base.getDebugLoc();
+    SDValue Trunc = CurDAG->getNode(ISD::TRUNCATE, DL, VT, Base);
+    insertDAGNode(CurDAG, Base.getNode(), Trunc);
+    Base = Trunc;
+  }
+
+  // Lower the displacement to a TargetConstant.
+  Disp = CurDAG->getTargetConstant(AM.Disp, VT);
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM,
+                                             EVT VT, SDValue &Base,
+                                             SDValue &Disp, SDValue &Index) {
+  getAddressOperands(AM, VT, Base, Disp);
+
+  Index = AM.Index;
+  if (!Index.getNode())
+    // Register 0 means "no index".
+    Index = CurDAG->getRegister(0, VT);
+}
+
+bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR,
+                                       SDValue Addr, SDValue &Base,
+                                       SDValue &Disp) {
+  SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR);
+  if (!selectAddress(Addr, AM))
+    return false;
+
+  getAddressOperands(AM, Addr.getValueType(), Base, Disp);
+  return true;
+}
+
+bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form,
+                                        SystemZAddressingMode::DispRange DR,
+                                        SDValue Addr, SDValue &Base,
+                                        SDValue &Disp, SDValue &Index) {
+  SystemZAddressingMode AM(Form, DR);
+  if (!selectAddress(Addr, AM))
+    return false;
+
+  getAddressOperands(AM, Addr.getValueType(), Base, Disp, Index);
+  return true;
+}
+
+SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node,
+                                                 SDValue Op0, uint64_t UpperVal,
+                                                 uint64_t LowerVal) {
+  EVT VT = Node->getValueType(0);
+  DebugLoc DL = Node->getDebugLoc();
+  SDValue Upper = CurDAG->getConstant(UpperVal, VT);
+  if (Op0.getNode())
+    Upper = CurDAG->getNode(Opcode, DL, VT, Op0, Upper);
+  Upper = SDValue(Select(Upper.getNode()), 0);
+
+  SDValue Lower = CurDAG->getConstant(LowerVal, VT);
+  SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower);
+  return Or.getNode();
+}
+
+SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
+  // Dump information about the Node being selected
+  DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode()) {
+    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+    return 0;
+  }
+
+  unsigned Opcode = Node->getOpcode();
+  switch (Opcode) {
+  case ISD::OR:
+  case ISD::XOR:
+    // If this is a 64-bit operation in which both 32-bit halves are nonzero,
+    // split the operation into two.
+    if (Node->getValueType(0) == MVT::i64)
+      if (ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
+        uint64_t Val = Op1->getZExtValue();
+        if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val))
+          Node = splitLargeImmediate(Opcode, Node, Node->getOperand(0),
+                                     Val - uint32_t(Val), uint32_t(Val));
+      }
+    break;
+
+  case ISD::Constant:
+    // If this is a 64-bit constant that is out of the range of LLILF,
+    // LLIHF and LGFI, split it into two 32-bit pieces.
+    if (Node->getValueType(0) == MVT::i64) {
+      uint64_t Val = cast<ConstantSDNode>(Node)->getZExtValue();
+      if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(Val))
+        Node = splitLargeImmediate(ISD::OR, Node, SDValue(),
+                                   Val - uint32_t(Val), uint32_t(Val));
+    }
+    break;
+
+  case ISD::ATOMIC_LOAD_SUB:
+    // Try to convert subtractions of constants to additions.
+    if (ConstantSDNode *Op2 = dyn_cast<ConstantSDNode>(Node->getOperand(2))) {
+      uint64_t Value = -Op2->getZExtValue();
+      EVT VT = Node->getValueType(0);
+      if (VT == MVT::i32 || isInt<32>(Value)) {
+        SDValue Ops[] = { Node->getOperand(0), Node->getOperand(1),
+                          CurDAG->getConstant(int32_t(Value), VT) };
+        Node = CurDAG->MorphNodeTo(Node, ISD::ATOMIC_LOAD_ADD,
+                                   Node->getVTList(), Ops, array_lengthof(Ops));
+      }
+    }
+    break;
+  }
+
+  // Select the default instruction
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(errs() << "=> ";
+        if (ResNode == NULL || ResNode == Node)
+          Node->dump(CurDAG);
+        else
+          ResNode->dump(CurDAG);
+        errs() << "\n";
+        );
+  return ResNode;
+}
+
+bool SystemZDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op,
+                             char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  assert(ConstraintCode == 'm' && "Unexpected constraint code");
+  // Accept addresses with short displacements, which are compatible
+  // with Q, R, S and T.  But keep the index operand for future expansion.
+  SDValue Base, Disp, Index;
+  if (!selectBDXAddr(SystemZAddressingMode::FormBD,
+                     SystemZAddressingMode::Disp12Only,
+                     Op, Base, Disp, Index))
+    return true;
+  OutOps.push_back(Base);
+  OutOps.push_back(Disp);
+  OutOps.push_back(Index);
+  return false;
+}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
new file mode 100644
index 0000000..eb21b31
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -0,0 +1,2233 @@
+//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-lower"
+
+#include "SystemZISelLowering.h"
+#include "SystemZCallingConv.h"
+#include "SystemZConstantPoolValue.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+using namespace llvm;
+
+// Classify VT as either 32 or 64 bit.
+static bool is32Bit(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::i32:
+    return true;
+  case MVT::i64:
+    return false;
+  default:
+    llvm_unreachable("Unsupported type");
+  }
+}
+
+// Return a version of MachineOperand that can be safely used before the
+// final use.
+static MachineOperand earlyUseOperand(MachineOperand Op) {
+  if (Op.isReg())
+    Op.setIsKill(false);
+  return Op;
+}
+
+SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
+  : TargetLowering(tm, new TargetLoweringObjectFileELF()),
+    Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+  MVT PtrVT = getPointerTy();
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i32,  &SystemZ::GR32BitRegClass);
+  addRegisterClass(MVT::i64,  &SystemZ::GR64BitRegClass);
+  addRegisterClass(MVT::f32,  &SystemZ::FP32BitRegClass);
+  addRegisterClass(MVT::f64,  &SystemZ::FP64BitRegClass);
+  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
+
+  // Compute derived properties from the register classes
+  computeRegisterProperties();
+
+  // Set up special registers.
+  setExceptionPointerRegister(SystemZ::R6D);
+  setExceptionSelectorRegister(SystemZ::R7D);
+  setStackPointerRegisterToSaveRestore(SystemZ::R15D);
+
+  // TODO: It may be better to default to latency-oriented scheduling, however
+  // LLVM's current latency-oriented scheduler can't handle physreg definitions
+  // such as SystemZ has with PSW, so set this to the register-pressure
+  // scheduler, because it can.
+  setSchedulingPreference(Sched::RegPressure);
+
+  setBooleanContents(ZeroOrOneBooleanContent);
+  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+
+  // Instructions are strings of 2-byte aligned 2-byte values.
+  setMinFunctionAlignment(2);
+
+  // Handle operations that are handled in a similar way for all types.
+  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
+       I <= MVT::LAST_FP_VALUETYPE;
+       ++I) {
+    MVT VT = MVT::SimpleValueType(I);
+    if (isTypeLegal(VT)) {
+      // Expand SETCC(X, Y, COND) into SELECT_CC(X, Y, 1, 0, COND).
+      setOperationAction(ISD::SETCC, VT, Expand);
+
+      // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
+      setOperationAction(ISD::SELECT, VT, Expand);
+
+      // Lower SELECT_CC and BR_CC into separate comparisons and branches.
+      setOperationAction(ISD::SELECT_CC, VT, Custom);
+      setOperationAction(ISD::BR_CC,     VT, Custom);
+    }
+  }
+
+  // Expand jump table branches as address arithmetic followed by an
+  // indirect jump.
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+  // Expand BRCOND into a BR_CC (see above).
+  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+  // Handle integer types.
+  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
+       I <= MVT::LAST_INTEGER_VALUETYPE;
+       ++I) {
+    MVT VT = MVT::SimpleValueType(I);
+    if (isTypeLegal(VT)) {
+      // Expand individual DIV and REMs into DIVREMs.
+      setOperationAction(ISD::SDIV, VT, Expand);
+      setOperationAction(ISD::UDIV, VT, Expand);
+      setOperationAction(ISD::SREM, VT, Expand);
+      setOperationAction(ISD::UREM, VT, Expand);
+      setOperationAction(ISD::SDIVREM, VT, Custom);
+      setOperationAction(ISD::UDIVREM, VT, Custom);
+
+      // Expand ATOMIC_LOAD and ATOMIC_STORE using ATOMIC_CMP_SWAP.
+      // FIXME: probably much too conservative.
+      setOperationAction(ISD::ATOMIC_LOAD,  VT, Expand);
+      setOperationAction(ISD::ATOMIC_STORE, VT, Expand);
+
+      // No special instructions for these.
+      setOperationAction(ISD::CTPOP,           VT, Expand);
+      setOperationAction(ISD::CTTZ,            VT, Expand);
+      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+      setOperationAction(ISD::ROTR,            VT, Expand);
+
+      // Use *MUL_LOHI where possible and a wider multiplication otherwise.
+      setOperationAction(ISD::MULHS, VT, Expand);
+      setOperationAction(ISD::MULHU, VT, Expand);
+
+      // We have instructions for signed but not unsigned FP conversion.
+      setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+    }
+  }
+
+  // Type legalization will convert 8- and 16-bit atomic operations into
+  // forms that operate on i32s (but still keeping the original memory VT).
+  // Lower them into full i32 operations.
+  setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
+  setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Custom);
+
+  // We have instructions for signed but not unsigned FP conversion.
+  // Handle unsigned 32-bit types as signed 64-bit types.
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+
+  // We have native support for a 64-bit CTLZ, via FLOGR.
+  setOperationAction(ISD::CTLZ, MVT::i32, Promote);
+  setOperationAction(ISD::CTLZ, MVT::i64, Legal);
+
+  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
+  setOperationAction(ISD::OR, MVT::i64, Custom);
+
+  // The architecture has 32-bit SMUL_LOHI and UMUL_LOHI (MR and MLR),
+  // but they aren't really worth using.  There is no 64-bit SMUL_LOHI,
+  // but there is a 64-bit UMUL_LOHI: MLGR.
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
+
+  // FIXME: Can we support these natively?
+  setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
+  setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+  setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+
+  // We have native instructions for i8, i16 and i32 extensions, but not i1.
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  // Handle the various types of symbolic address.
+  setOperationAction(ISD::ConstantPool,     PtrVT, Custom);
+  setOperationAction(ISD::GlobalAddress,    PtrVT, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
+  setOperationAction(ISD::BlockAddress,     PtrVT, Custom);
+  setOperationAction(ISD::JumpTable,        PtrVT, Custom);
+
+  // We need to handle dynamic allocations specially because of the
+  // 160-byte area at the bottom of the stack.
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
+
+  // Use custom expanders so that we can force the function to use
+  // a frame pointer.
+  setOperationAction(ISD::STACKSAVE,    MVT::Other, Custom);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
+
+  // Expand these using getExceptionSelectorRegister() and
+  // getExceptionPointerRegister().
+  setOperationAction(ISD::EXCEPTIONADDR, PtrVT, Expand);
+  setOperationAction(ISD::EHSELECTION,   PtrVT, Expand);
+
+  // Handle floating-point types.
+  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
+       I <= MVT::LAST_FP_VALUETYPE;
+       ++I) {
+    MVT VT = MVT::SimpleValueType(I);
+    if (isTypeLegal(VT)) {
+      // We can use FI for FRINT.
+      setOperationAction(ISD::FRINT, VT, Legal);
+
+      // No special instructions for these.
+      setOperationAction(ISD::FSIN, VT, Expand);
+      setOperationAction(ISD::FCOS, VT, Expand);
+      setOperationAction(ISD::FREM, VT, Expand);
+    }
+  }
+
+  // We have fused multiply-addition for f32 and f64 but not f128.
+  setOperationAction(ISD::FMA, MVT::f32,  Legal);
+  setOperationAction(ISD::FMA, MVT::f64,  Legal);
+  setOperationAction(ISD::FMA, MVT::f128, Expand);
+
+  // Needed so that we don't try to implement f128 constant loads using
+  // a load-and-extend of a f80 constant (in cases where the constant
+  // would fit in an f80).
+  setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
+
+  // Floating-point truncation and stores need to be done separately.
+  setTruncStoreAction(MVT::f64,  MVT::f32, Expand);
+  setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+
+  // We have 64-bit FPR<->GPR moves, but need special handling for
+  // 32-bit forms.
+  setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+  setOperationAction(ISD::BITCAST, MVT::f32, Custom);
+
+  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
+  // structure, but VAEND is a no-op.
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  setOperationAction(ISD::VACOPY,  MVT::Other, Custom);
+  setOperationAction(ISD::VAEND,   MVT::Other, Expand);
+}
+
+bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
+  return Imm.isZero() || Imm.isNegZero();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline asm support
+//===----------------------------------------------------------------------===//
+
+TargetLowering::ConstraintType
+SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'a': // Address register
+    case 'd': // Data register (equivalent to 'r')
+    case 'f': // Floating-point register
+    case 'r': // General-purpose register
+      return C_RegisterClass;
+
+    case 'Q': // Memory with base and unsigned 12-bit displacement
+    case 'R': // Likewise, plus an index
+    case 'S': // Memory with base and signed 20-bit displacement
+    case 'T': // Likewise, plus an index
+    case 'm': // Equivalent to 'T'.
+      return C_Memory;
+
+    case 'I': // Unsigned 8-bit constant
+    case 'J': // Unsigned 12-bit constant
+    case 'K': // Signed 16-bit constant
+    case 'L': // Signed 20-bit displacement (on all targets we support)
+    case 'M': // 0x7fffffff
+      return C_Other;
+
+    default:
+      break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+TargetLowering::ConstraintWeight SystemZTargetLowering::
+getSingleConstraintMatchWeight(AsmOperandInfo &info,
+                               const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+  // If we don't have a value, we can't do a match,
+  // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+
+  case 'a': // Address register
+  case 'd': // Data register (equivalent to 'r')
+  case 'r': // General-purpose register
+    if (CallOperandVal->getType()->isIntegerTy())
+      weight = CW_Register;
+    break;
+
+  case 'f': // Floating-point register
+    if (type->isFloatingPointTy())
+      weight = CW_Register;
+    break;
+
+  case 'I': // Unsigned 8-bit constant
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+      if (isUInt<8>(C->getZExtValue()))
+        weight = CW_Constant;
+    break;
+
+  case 'J': // Unsigned 12-bit constant
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+      if (isUInt<12>(C->getZExtValue()))
+        weight = CW_Constant;
+    break;
+
+  case 'K': // Signed 16-bit constant
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+      if (isInt<16>(C->getSExtValue()))
+        weight = CW_Constant;
+    break;
+
+  case 'L': // Signed 20-bit displacement (on all targets we support)
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+      if (isInt<20>(C->getSExtValue()))
+        weight = CW_Constant;
+    break;
+
+  case 'M': // 0x7fffffff
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal))
+      if (C->getZExtValue() == 0x7fffffff)
+        weight = CW_Constant;
+    break;
+  }
+  return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  if (Constraint.size() == 1) {
+    // GCC Constraint Letters
+    switch (Constraint[0]) {
+    default: break;
+    case 'd': // Data register (equivalent to 'r')
+    case 'r': // General-purpose register
+      if (VT == MVT::i64)
+        return std::make_pair(0U, &SystemZ::GR64BitRegClass);
+      else if (VT == MVT::i128)
+        return std::make_pair(0U, &SystemZ::GR128BitRegClass);
+      return std::make_pair(0U, &SystemZ::GR32BitRegClass);
+
+    case 'a': // Address register
+      if (VT == MVT::i64)
+        return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
+      else if (VT == MVT::i128)
+        return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
+      return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
+
+    case 'f': // Floating-point register
+      if (VT == MVT::f64)
+        return std::make_pair(0U, &SystemZ::FP64BitRegClass);
+      else if (VT == MVT::f128)
+        return std::make_pair(0U, &SystemZ::FP128BitRegClass);
+      return std::make_pair(0U, &SystemZ::FP32BitRegClass);
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+void SystemZTargetLowering::
+LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+                             std::vector<SDValue> &Ops,
+                             SelectionDAG &DAG) const {
+  // Only support length 1 constraints for now.
+  if (Constraint.length() == 1) {
+    switch (Constraint[0]) {
+    case 'I': // Unsigned 8-bit constant
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+        if (isUInt<8>(C->getZExtValue()))
+          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+                                              Op.getValueType()));
+      return;
+
+    case 'J': // Unsigned 12-bit constant
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+        if (isUInt<12>(C->getZExtValue()))
+          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+                                              Op.getValueType()));
+      return;
+
+    case 'K': // Signed 16-bit constant
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+        if (isInt<16>(C->getSExtValue()))
+          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+                                              Op.getValueType()));
+      return;
+
+    case 'L': // Signed 20-bit displacement (on all targets we support)
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+        if (isInt<20>(C->getSExtValue()))
+          Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+                                              Op.getValueType()));
+      return;
+
+    case 'M': // 0x7fffffff
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+        if (C->getZExtValue() == 0x7fffffff)
+          Ops.push_back(DAG.getTargetConstant(C->getZExtValue(),
+                                              Op.getValueType()));
+      return;
+    }
+  }
+  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling conventions
+//===----------------------------------------------------------------------===//
+
+#include "SystemZGenCallingConv.inc"
+
+// Value is a value that has been passed to us in the location described by VA
+// (and so has type VA.getLocVT()).  Convert Value to VA.getValVT(), chaining
+// any loads onto Chain.
+static SDValue convertLocVTToValVT(SelectionDAG &DAG, DebugLoc DL,
+                                   CCValAssign &VA, SDValue Chain,
+                                   SDValue Value) {
+  // If the argument has been promoted from a smaller type, insert an
+  // assertion to capture this.
+  if (VA.getLocInfo() == CCValAssign::SExt)
+    Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
+                        DAG.getValueType(VA.getValVT()));
+  else if (VA.getLocInfo() == CCValAssign::ZExt)
+    Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
+                        DAG.getValueType(VA.getValVT()));
+
+  if (VA.isExtInLoc())
+    Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
+  else if (VA.getLocInfo() == CCValAssign::Indirect)
+    Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
+                        MachinePointerInfo(), false, false, false, 0);
+  else
+    assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
+  return Value;
+}
+
+// Value is a value of type VA.getValVT() that we need to copy into
+// the location described by VA.  Return a copy of Value converted to
+// VA.getValVT().  The caller is responsible for handling indirect values.
+static SDValue convertValVTToLocVT(SelectionDAG &DAG, DebugLoc DL,
+                                   CCValAssign &VA, SDValue Value) {
+  switch (VA.getLocInfo()) {
+  case CCValAssign::SExt:
+    return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
+  case CCValAssign::ZExt:
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
+  case CCValAssign::AExt:
+    return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
+  case CCValAssign::Full:
+    return Value;
+  default:
+    llvm_unreachable("Unhandled getLocInfo()");
+  }
+}
+
+SDValue SystemZTargetLowering::
+LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     DebugLoc DL, SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  SystemZMachineFunctionInfo *FuncInfo =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  const SystemZFrameLowering *TFL =
+    static_cast<const SystemZFrameLowering *>(TM.getFrameLowering());
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
+
+  unsigned NumFixedGPRs = 0;
+  unsigned NumFixedFPRs = 0;
+  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+    SDValue ArgValue;
+    CCValAssign &VA = ArgLocs[I];
+    EVT LocVT = VA.getLocVT();
+    if (VA.isRegLoc()) {
+      // Arguments passed in registers
+      const TargetRegisterClass *RC;
+      switch (LocVT.getSimpleVT().SimpleTy) {
+      default:
+        // Integers smaller than i64 should be promoted to i64.
+        llvm_unreachable("Unexpected argument type");
+      case MVT::i32:
+        NumFixedGPRs += 1;
+        RC = &SystemZ::GR32BitRegClass;
+        break;
+      case MVT::i64:
+        NumFixedGPRs += 1;
+        RC = &SystemZ::GR64BitRegClass;
+        break;
+      case MVT::f32:
+        NumFixedFPRs += 1;
+        RC = &SystemZ::FP32BitRegClass;
+        break;
+      case MVT::f64:
+        NumFixedFPRs += 1;
+        RC = &SystemZ::FP64BitRegClass;
+        break;
+      }
+
+      unsigned VReg = MRI.createVirtualRegister(RC);
+      MRI.addLiveIn(VA.getLocReg(), VReg);
+      ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
+    } else {
+      assert(VA.isMemLoc() && "Argument not register or memory");
+
+      // Create the frame index object for this incoming parameter.
+      int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8,
+                                      VA.getLocMemOffset(), true);
+
+      // Create the SelectionDAG nodes corresponding to a load
+      // from this parameter.  Unpromoted ints and floats are
+      // passed as right-justified 8-byte values.
+      EVT PtrVT = getPointerTy();
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
+        FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4));
+      ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
+                             MachinePointerInfo::getFixedStack(FI),
+                             false, false, false, 0);
+    }
+
+    // Convert the value of the argument register into the value that's
+    // being passed.
+    InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
+  }
+
+  if (IsVarArg) {
+    // Save the number of non-varargs registers for later use by va_start, etc.
+    FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
+    FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
+
+    // Likewise the address (in the form of a frame index) of where the
+    // first stack vararg would be.  The 1-byte size here is arbitrary.
+    int64_t StackSize = CCInfo.getNextStackOffset();
+    FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true));
+
+    // ...and a similar frame index for the caller-allocated save area
+    // that will be used to store the incoming registers.
+    int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
+    unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true);
+    FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
+
+    // Store the FPR varargs in the reserved frame slots.  (We store the
+    // GPRs as part of the prologue.)
+    if (NumFixedFPRs < SystemZ::NumArgFPRs) {
+      SDValue MemOps[SystemZ::NumArgFPRs];
+      for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
+        unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
+        int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true);
+        SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+        unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
+                                     &SystemZ::FP64BitRegClass);
+        SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
+        MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
+                                 MachinePointerInfo::getFixedStack(FI),
+                                 false, false, 0);
+
+      }
+      // Join the stores, which are independent of one another.
+      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                          &MemOps[NumFixedFPRs],
+                          SystemZ::NumArgFPRs - NumFixedFPRs);
+    }
+  }
+
+  return Chain;
+}
+
+SDValue
+SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
+                                 SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  DebugLoc &DL = CLI.DL;
+  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+  SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+  SDValue Chain = CLI.Chain;
+  SDValue Callee = CLI.Callee;
+  bool &isTailCall = CLI.IsTailCall;
+  CallingConv::ID CallConv = CLI.CallConv;
+  bool IsVarArg = CLI.IsVarArg;
+  MachineFunction &MF = DAG.getMachineFunction();
+  EVT PtrVT = getPointerTy();
+
+  // SystemZ target does not yet support tail call optimization.
+  isTailCall = false;
+
+  // Analyze the operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
+  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+
+  // Mark the start of the call.
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true));
+
+  // Copy argument values to their designated locations.
+  SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+  SDValue StackPtr;
+  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+    CCValAssign &VA = ArgLocs[I];
+    SDValue ArgValue = OutVals[I];
+
+    if (VA.getLocInfo() == CCValAssign::Indirect) {
+      // Store the argument in a stack slot and pass its address.
+      SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+      MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot,
+                                         MachinePointerInfo::getFixedStack(FI),
+                                         false, false, 0));
+      ArgValue = SpillSlot;
+    } else
+      ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
+
+    if (VA.isRegLoc())
+      // Queue up the argument copies and emit them at the end.
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
+    else {
+      assert(VA.isMemLoc() && "Argument not register or memory");
+
+      // Work out the address of the stack slot.  Unpromoted ints and
+      // floats are passed as right-justified 8-byte values.
+      if (!StackPtr.getNode())
+        StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
+      unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset();
+      if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
+        Offset += 4;
+      SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
+                                    DAG.getIntPtrConstant(Offset));
+
+      // Emit the store.
+      MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address,
+                                         MachinePointerInfo(),
+                                         false, false, 0));
+    }
+  }
+
+  // Join the stores, which are independent of one another.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes, chained and glued together.
+  SDValue Glue;
+  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
+    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
+                             RegsToPass[I].second, Glue);
+    Glue = Chain.getValue(1);
+  }
+
+  // Accept direct calls by converting symbolic call addresses to the
+  // associated Target* opcodes.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
+    Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+  } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
+    Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+  }
+
+  // The first call operand is the chain and the second is the target address.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
+    Ops.push_back(DAG.getRegister(RegsToPass[I].first,
+                                  RegsToPass[I].second.getValueType()));
+
+  // Glue the call to the argument copies, if any.
+  if (Glue.getNode())
+    Ops.push_back(Glue);
+
+  // Emit the call.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
+  Glue = Chain.getValue(1);
+
+  // Mark the end of the call, which is glued to the call itself.
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getConstant(NumBytes, PtrVT, true),
+                             DAG.getConstant(0, PtrVT, true),
+                             Glue);
+  Glue = Chain.getValue(1);
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RetLocs;
+  CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext());
+  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
+    CCValAssign &VA = RetLocs[I];
+
+    // Copy the value out, gluing the copy to the end of the call sequence.
+    SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
+                                          VA.getLocVT(), Glue);
+    Chain = RetValue.getValue(1);
+    Glue = RetValue.getValue(2);
+
+    // Convert the value of the return register into the value that's
+    // being returned.
+    InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
+  }
+
+  return Chain;
+}
+
+SDValue
+SystemZTargetLowering::LowerReturn(SDValue Chain,
+                                   CallingConv::ID CallConv, bool IsVarArg,
+                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                   const SmallVectorImpl<SDValue> &OutVals,
+                                   DebugLoc DL, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Assign locations to each returned value.
+  SmallVector<CCValAssign, 16> RetLocs;
+  CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext());
+  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
+
+  // Quick exit for void returns
+  if (RetLocs.empty())
+    return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
+
+  // Copy the result values into the output registers.
+  SDValue Glue;
+  SmallVector<SDValue, 4> RetOps;
+  RetOps.push_back(Chain);
+  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
+    CCValAssign &VA = RetLocs[I];
+    SDValue RetValue = OutVals[I];
+
+    // Make the return register live on exit.
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    // Promote the value as required.
+    RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
+
+    // Chain and glue the copies together.
+    unsigned Reg = VA.getLocReg();
+    Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
+    Glue = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
+  }
+
+  // Update chain and glue.
+  RetOps[0] = Chain;
+  if (Glue.getNode())
+    RetOps.push_back(Glue);
+
+  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other,
+                     RetOps.data(), RetOps.size());
+}
+
+// CC is a comparison that will be implemented using an integer or
+// floating-point comparison.  Return the condition code mask for
+// a branch on true.  In the integer case, CCMASK_CMP_UO is set for
+// unsigned comparisons and clear for signed ones.  In the floating-point
+// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
+static unsigned CCMaskForCondCode(ISD::CondCode CC) {
+#define CONV(X) \
+  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
+  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
+  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
+
+  switch (CC) {
+  default:
+    llvm_unreachable("Invalid integer condition!");
+
+  CONV(EQ);
+  CONV(NE);
+  CONV(GT);
+  CONV(GE);
+  CONV(LT);
+  CONV(LE);
+
+  case ISD::SETO:  return SystemZ::CCMASK_CMP_O;
+  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
+  }
+#undef CONV
+}
+
+// If a comparison described by IsUnsigned, CCMask, CmpOp0 and CmpOp1
+// is suitable for CLI(Y), CHHSI or CLHHSI, adjust the operands as necessary.
+static void adjustSubwordCmp(SelectionDAG &DAG, bool &IsUnsigned,
+                             SDValue &CmpOp0, SDValue &CmpOp1,
+                             unsigned &CCMask) {
+  // For us to make any changes, it must a comparison between a single-use
+  // load and a constant.
+  if (!CmpOp0.hasOneUse() ||
+      CmpOp0.getOpcode() != ISD::LOAD ||
+      CmpOp1.getOpcode() != ISD::Constant)
+    return;
+
+  // We must have an 8- or 16-bit load.
+  LoadSDNode *Load = cast<LoadSDNode>(CmpOp0);
+  unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits();
+  if (NumBits != 8 && NumBits != 16)
+    return;
+
+  // The load must be an extending one and the constant must be within the
+  // range of the unextended value.
+  ConstantSDNode *Constant = cast<ConstantSDNode>(CmpOp1);
+  uint64_t Value = Constant->getZExtValue();
+  uint64_t Mask = (1 << NumBits) - 1;
+  if (Load->getExtensionType() == ISD::SEXTLOAD) {
+    int64_t SignedValue = Constant->getSExtValue();
+    if (uint64_t(SignedValue) + (1 << (NumBits - 1)) > Mask)
+      return;
+    // Unsigned comparison between two sign-extended values is equivalent
+    // to unsigned comparison between two zero-extended values.
+    if (IsUnsigned)
+      Value &= Mask;
+    else if (CCMask == SystemZ::CCMASK_CMP_EQ ||
+             CCMask == SystemZ::CCMASK_CMP_NE)
+      // Any choice of IsUnsigned is OK for equality comparisons.
+      // We could use either CHHSI or CLHHSI for 16-bit comparisons,
+      // but since we use CLHHSI for zero extensions, it seems better
+      // to be consistent and do the same here.
+      Value &= Mask, IsUnsigned = true;
+    else if (NumBits == 8) {
+      // Try to treat the comparison as unsigned, so that we can use CLI.
+      // Adjust CCMask and Value as necessary.
+      if (Value == 0 && CCMask == SystemZ::CCMASK_CMP_LT)
+        // Test whether the high bit of the byte is set.
+        Value = 127, CCMask = SystemZ::CCMASK_CMP_GT, IsUnsigned = true;
+      else if (SignedValue == -1 && CCMask == SystemZ::CCMASK_CMP_GT)
+        // Test whether the high bit of the byte is clear.
+        Value = 128, CCMask = SystemZ::CCMASK_CMP_LT, IsUnsigned = true;
+      else
+        // No instruction exists for this combination.
+        return;
+    }
+  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
+    if (Value > Mask)
+      return;
+    // Signed comparison between two zero-extended values is equivalent
+    // to unsigned comparison.
+    IsUnsigned = true;
+  } else
+    return;
+
+  // Make sure that the first operand is an i32 of the right extension type.
+  ISD::LoadExtType ExtType = IsUnsigned ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
+  if (CmpOp0.getValueType() != MVT::i32 ||
+      Load->getExtensionType() != ExtType)
+    CmpOp0 = DAG.getExtLoad(ExtType, Load->getDebugLoc(), MVT::i32,
+                            Load->getChain(), Load->getBasePtr(),
+                            Load->getPointerInfo(), Load->getMemoryVT(),
+                            Load->isVolatile(), Load->isNonTemporal(),
+                            Load->getAlignment());
+
+  // Make sure that the second operand is an i32 with the right value.
+  if (CmpOp1.getValueType() != MVT::i32 ||
+      Value != Constant->getZExtValue())
+    CmpOp1 = DAG.getConstant(Value, MVT::i32);
+}
+
+// Return true if a comparison described by CCMask, CmpOp0 and CmpOp1
+// is an equality comparison that is better implemented using unsigned
+// rather than signed comparison instructions.
+static bool preferUnsignedComparison(SelectionDAG &DAG, SDValue CmpOp0,
+                                     SDValue CmpOp1, unsigned CCMask) {
+  // The test must be for equality or inequality.
+  if (CCMask != SystemZ::CCMASK_CMP_EQ && CCMask != SystemZ::CCMASK_CMP_NE)
+    return false;
+
+  if (CmpOp1.getOpcode() == ISD::Constant) {
+    uint64_t Value = cast<ConstantSDNode>(CmpOp1)->getSExtValue();
+
+    // If we're comparing with memory, prefer unsigned comparisons for
+    // values that are in the unsigned 16-bit range but not the signed
+    // 16-bit range.  We want to use CLFHSI and CLGHSI.
+    if (CmpOp0.hasOneUse() &&
+        ISD::isNormalLoad(CmpOp0.getNode()) &&
+        (Value >= 32768 && Value < 65536))
+      return true;
+
+    // Use unsigned comparisons for values that are in the CLGFI range
+    // but not in the CGFI range.
+    if (CmpOp0.getValueType() == MVT::i64 && (Value >> 31) == 1)
+      return true;
+
+    return false;
+  }
+
+  // Prefer CL for zero-extended loads.
+  if (CmpOp1.getOpcode() == ISD::ZERO_EXTEND ||
+      ISD::isZEXTLoad(CmpOp1.getNode()))
+    return true;
+
+  // ...and for "in-register" zero extensions.
+  if (CmpOp1.getOpcode() == ISD::AND && CmpOp1.getValueType() == MVT::i64) {
+    SDValue Mask = CmpOp1.getOperand(1);
+    if (Mask.getOpcode() == ISD::Constant &&
+        cast<ConstantSDNode>(Mask)->getZExtValue() == 0xffffffff)
+      return true;
+  }
+
+  return false;
+}
+
+// Return a target node that compares CmpOp0 and CmpOp1.  Set CCMask to the
+// 4-bit condition-code mask for CC.
+static SDValue emitCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
+                       ISD::CondCode CC, unsigned &CCMask) {
+  bool IsUnsigned = false;
+  CCMask = CCMaskForCondCode(CC);
+  if (!CmpOp0.getValueType().isFloatingPoint()) {
+    IsUnsigned = CCMask & SystemZ::CCMASK_CMP_UO;
+    CCMask &= ~SystemZ::CCMASK_CMP_UO;
+    adjustSubwordCmp(DAG, IsUnsigned, CmpOp0, CmpOp1, CCMask);
+    if (preferUnsignedComparison(DAG, CmpOp0, CmpOp1, CCMask))
+      IsUnsigned = true;
+  }
+
+  DebugLoc DL = CmpOp0.getDebugLoc();
+  return DAG.getNode((IsUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
+                     DL, MVT::Glue, CmpOp0, CmpOp1);
+}
+
+// Lower a binary operation that produces two VT results, one in each
+// half of a GR128 pair.  Op0 and Op1 are the VT operands to the operation,
+// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation
+// on the extended Op0 and (unextended) Op1.  Store the even register result
+// in Even and the odd register result in Odd.
+static void lowerGR128Binary(SelectionDAG &DAG, DebugLoc DL, EVT VT,
+                             unsigned Extend, unsigned Opcode,
+                             SDValue Op0, SDValue Op1,
+                             SDValue &Even, SDValue &Odd) {
+  SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0);
+  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped,
+                               SDValue(In128, 0), Op1);
+  bool Is32Bit = is32Bit(VT);
+  SDValue SubReg0 = DAG.getTargetConstant(SystemZ::even128(Is32Bit), VT);
+  SDValue SubReg1 = DAG.getTargetConstant(SystemZ::odd128(Is32Bit), VT);
+  SDNode *Reg0 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                    VT, Result, SubReg0);
+  SDNode *Reg1 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                    VT, Result, SubReg1);
+  Even = SDValue(Reg0, 0);
+  Odd = SDValue(Reg1, 0);
+}
+
+SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain    = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue CmpOp0   = Op.getOperand(2);
+  SDValue CmpOp1   = Op.getOperand(3);
+  SDValue Dest     = Op.getOperand(4);
+  DebugLoc DL      = Op.getDebugLoc();
+
+  unsigned CCMask;
+  SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask);
+  return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
+                     Chain, DAG.getConstant(CCMask, MVT::i32), Dest, Flags);
+}
+
+SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDValue CmpOp0   = Op.getOperand(0);
+  SDValue CmpOp1   = Op.getOperand(1);
+  SDValue TrueOp   = Op.getOperand(2);
+  SDValue FalseOp  = Op.getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  DebugLoc DL      = Op.getDebugLoc();
+
+  unsigned CCMask;
+  SDValue Flags = emitCmp(DAG, CmpOp0, CmpOp1, CC, CCMask);
+
+  SmallVector<SDValue, 4> Ops;
+  Ops.push_back(TrueOp);
+  Ops.push_back(FalseOp);
+  Ops.push_back(DAG.getConstant(CCMask, MVT::i32));
+  Ops.push_back(Flags);
+
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size());
+}
+
+SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
+                                                  SelectionDAG &DAG) const {
+  DebugLoc DL = Node->getDebugLoc();
+  const GlobalValue *GV = Node->getGlobal();
+  int64_t Offset = Node->getOffset();
+  EVT PtrVT = getPointerTy();
+  Reloc::Model RM = TM.getRelocationModel();
+  CodeModel::Model CM = TM.getCodeModel();
+
+  SDValue Result;
+  if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
+    // Make sure that the offset is aligned to a halfword.  If it isn't,
+    // create an "anchor" at the previous 12-bit boundary.
+    // FIXME check whether there is a better way of handling this.
+    if (Offset & 1) {
+      Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
+                                          Offset & ~uint64_t(0xfff));
+      Offset &= 0xfff;
+    } else {
+      Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Offset);
+      Offset = 0;
+    }
+    Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+  } else {
+    Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
+    Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+    Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
+                         MachinePointerInfo::getGOT(), false, false, false, 0);
+  }
+
+  // If there was a non-zero offset that we didn't fold, create an explicit
+  // addition for it.
+  if (Offset != 0)
+    Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
+                         DAG.getConstant(Offset, PtrVT));
+
+  return Result;
+}
+
+SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
+						     SelectionDAG &DAG) const {
+  DebugLoc DL = Node->getDebugLoc();
+  const GlobalValue *GV = Node->getGlobal();
+  EVT PtrVT = getPointerTy();
+  TLSModel::Model model = TM.getTLSModel(GV);
+
+  if (model != TLSModel::LocalExec)
+    llvm_unreachable("only local-exec TLS mode supported");
+
+  // The high part of the thread pointer is in access register 0.
+  SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
+                             DAG.getConstant(0, MVT::i32));
+  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
+
+  // The low part of the thread pointer is in access register 1.
+  SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
+                             DAG.getConstant(1, MVT::i32));
+  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
+
+  // Merge them into a single 64-bit address.
+  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
+				    DAG.getConstant(32, PtrVT));
+  SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
+
+  // Get the offset of GA from the thread pointer.
+  SystemZConstantPoolValue *CPV =
+    SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
+
+  // Force the offset into the constant pool and load it from there.
+  SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8);
+  SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
+			       CPAddr, MachinePointerInfo::getConstantPool(),
+			       false, false, false, 0);
+
+  // Add the base and offset together.
+  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
+}
+
+SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
+                                                 SelectionDAG &DAG) const {
+  DebugLoc DL = Node->getDebugLoc();
+  const BlockAddress *BA = Node->getBlockAddress();
+  int64_t Offset = Node->getOffset();
+  EVT PtrVT = getPointerTy();
+
+  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
+  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+  return Result;
+}
+
+SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
+                                              SelectionDAG &DAG) const {
+  DebugLoc DL = JT->getDebugLoc();
+  EVT PtrVT = getPointerTy();
+  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+
+  // Use LARL to load the address of the table.
+  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+}
+
+SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
+                                                 SelectionDAG &DAG) const {
+  DebugLoc DL = CP->getDebugLoc();
+  EVT PtrVT = getPointerTy();
+
+  SDValue Result;
+  if (CP->isMachineConstantPoolEntry())
+    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+				       CP->getAlignment());
+  else
+    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+				       CP->getAlignment(), CP->getOffset());
+
+  // Use LARL to load the address of the constant pool entry.
+  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
+}
+
+SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue In = Op.getOperand(0);
+  EVT InVT = In.getValueType();
+  EVT ResVT = Op.getValueType();
+
+  SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
+  SDValue Shift32 = DAG.getConstant(32, MVT::i64);
+  if (InVT == MVT::i32 && ResVT == MVT::f32) {
+    SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
+    SDValue Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, Shift32);
+    SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shift);
+    SDNode *Out = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                     MVT::f32, Out64, SubReg32);
+    return SDValue(Out, 0);
+  }
+  if (InVT == MVT::f32 && ResVT == MVT::i32) {
+    SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
+    SDNode *In64 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+                                      MVT::f64, SDValue(U64, 0), In, SubReg32);
+    SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, SDValue(In64, 0));
+    SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, Shift32);
+    SDValue Out = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
+    return Out;
+  }
+  llvm_unreachable("Unexpected bitcast combination");
+}
+
+SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SystemZMachineFunctionInfo *FuncInfo =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  EVT PtrVT = getPointerTy();
+
+  SDValue Chain   = Op.getOperand(0);
+  SDValue Addr    = Op.getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  DebugLoc DL     = Op.getDebugLoc();
+
+  // The initial values of each field.
+  const unsigned NumFields = 4;
+  SDValue Fields[NumFields] = {
+    DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), PtrVT),
+    DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), PtrVT),
+    DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
+    DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
+  };
+
+  // Store each field into its respective slot.
+  SDValue MemOps[NumFields];
+  unsigned Offset = 0;
+  for (unsigned I = 0; I < NumFields; ++I) {
+    SDValue FieldAddr = Addr;
+    if (Offset != 0)
+      FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
+                              DAG.getIntPtrConstant(Offset));
+    MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
+                             MachinePointerInfo(SV, Offset),
+                             false, false, 0);
+    Offset += 8;
+  }
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps, NumFields);
+}
+
+SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDValue Chain      = Op.getOperand(0);
+  SDValue DstPtr     = Op.getOperand(1);
+  SDValue SrcPtr     = Op.getOperand(2);
+  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+  DebugLoc DL        = Op.getDebugLoc();
+
+  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32),
+                       /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
+                       MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
+}
+
+SDValue SystemZTargetLowering::
+lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size  = Op.getOperand(1);
+  DebugLoc DL   = Op.getDebugLoc();
+
+  unsigned SPReg = getStackPointerRegisterToSaveRestore();
+
+  // Get a reference to the stack pointer.
+  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
+
+  // Get the new stack pointer value.
+  SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size);
+
+  // Copy the new stack pointer back.
+  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
+
+  // The allocated data lives above the 160 bytes allocated for the standard
+  // frame, plus any outgoing stack arguments.  We don't know how much that
+  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
+  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
+  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
+
+  SDValue Ops[2] = { Result, Chain };
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+  assert(!is32Bit(VT) && "Only support 64-bit UMUL_LOHI");
+
+  // UMUL_LOHI64 returns the low result in the odd register and the high
+  // result in the even register.  UMUL_LOHI is defined to return the
+  // low half first, so the results are in reverse order.
+  SDValue Ops[2];
+  lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
+                   Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+
+  // We use DSGF for 32-bit division.
+  if (is32Bit(VT)) {
+    Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
+    Op1 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op1);
+  }
+
+  // DSG(F) takes a 64-bit dividend, so the even register in the GR128
+  // input is "don't care".  The instruction returns the remainder in
+  // the even register and the quotient in the odd register.
+  SDValue Ops[2];
+  lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::SDIVREM64,
+                   Op0, Op1, Ops[1], Ops[0]);
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+
+  // DL(G) uses a double-width dividend, so we need to clear the even
+  // register in the GR128 input.  The instruction returns the remainder
+  // in the even register and the quotient in the odd register.
+  SDValue Ops[2];
+  if (is32Bit(VT))
+    lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32,
+                     Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+  else
+    lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64,
+                     Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
+
+  // Get the known-zero masks for each operand.
+  SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
+  APInt KnownZero[2], KnownOne[2];
+  DAG.ComputeMaskedBits(Ops[0], KnownZero[0], KnownOne[0]);
+  DAG.ComputeMaskedBits(Ops[1], KnownZero[1], KnownOne[1]);
+
+  // See if the upper 32 bits of one operand and the lower 32 bits of the
+  // other are known zero.  They are the low and high operands respectively.
+  uint64_t Masks[] = { KnownZero[0].getZExtValue(),
+                       KnownZero[1].getZExtValue() };
+  unsigned High, Low;
+  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
+    High = 1, Low = 0;
+  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
+    High = 0, Low = 1;
+  else
+    return Op;
+
+  SDValue LowOp = Ops[Low];
+  SDValue HighOp = Ops[High];
+
+  // If the high part is a constant, we're better off using IILH.
+  if (HighOp.getOpcode() == ISD::Constant)
+    return Op;
+
+  // If the low part is a constant that is outside the range of LHI,
+  // then we're better off using IILF.
+  if (LowOp.getOpcode() == ISD::Constant) {
+    int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
+    if (!isInt<16>(Value))
+      return Op;
+  }
+
+  // Check whether the high part is an AND that doesn't change the
+  // high 32 bits and just masks out low bits.  We can skip it if so.
+  if (HighOp.getOpcode() == ISD::AND &&
+      HighOp.getOperand(1).getOpcode() == ISD::Constant) {
+    ConstantSDNode *MaskNode = cast<ConstantSDNode>(HighOp.getOperand(1));
+    uint64_t Mask = MaskNode->getZExtValue() | Masks[High];
+    if ((Mask >> 32) == 0xffffffff)
+      HighOp = HighOp.getOperand(0);
+  }
+
+  // Take advantage of the fact that all GR32 operations only change the
+  // low 32 bits by truncating Low to an i32 and inserting it directly
+  // using a subreg.  The interesting cases are those where the truncation
+  // can be folded.
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
+  SDValue SubReg32 = DAG.getTargetConstant(SystemZ::subreg_32bit, MVT::i64);
+  SDNode *Result = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+                                      MVT::i64, HighOp, Low32, SubReg32);
+  return SDValue(Result, 0);
+}
+
+// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation.  Lower the first
+// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
+SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
+                                                SelectionDAG &DAG,
+                                                unsigned Opcode) const {
+  AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode());
+
+  // 32-bit operations need no code outside the main loop.
+  EVT NarrowVT = Node->getMemoryVT();
+  EVT WideVT = MVT::i32;
+  if (NarrowVT == WideVT)
+    return Op;
+
+  int64_t BitSize = NarrowVT.getSizeInBits();
+  SDValue ChainIn = Node->getChain();
+  SDValue Addr = Node->getBasePtr();
+  SDValue Src2 = Node->getVal();
+  MachineMemOperand *MMO = Node->getMemOperand();
+  DebugLoc DL = Node->getDebugLoc();
+  EVT PtrVT = Addr.getValueType();
+
+  // Convert atomic subtracts of constants into additions.
+  if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
+    if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Src2)) {
+      Opcode = SystemZISD::ATOMIC_LOADW_ADD;
+      Src2 = DAG.getConstant(-Const->getSExtValue(), Src2.getValueType());
+    }
+
+  // Get the address of the containing word.
+  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
+                                    DAG.getConstant(-4, PtrVT));
+
+  // Get the number of bits that the word must be rotated left in order
+  // to bring the field to the top bits of a GR32.
+  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
+                                 DAG.getConstant(3, PtrVT));
+  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
+
+  // Get the complementing shift amount, for rotating a field in the top
+  // bits back to its proper position.
+  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
+                                    DAG.getConstant(0, WideVT), BitShift);
+
+  // Extend the source operand to 32 bits and prepare it for the inner loop.
+  // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
+  // operations require the source to be shifted in advance.  (This shift
+  // can be folded if the source is constant.)  For AND and NAND, the lower
+  // bits must be set, while for other opcodes they should be left clear.
+  if (Opcode != SystemZISD::ATOMIC_SWAPW)
+    Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
+                       DAG.getConstant(32 - BitSize, WideVT));
+  if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
+      Opcode == SystemZISD::ATOMIC_LOADW_NAND)
+    Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
+                       DAG.getConstant(uint32_t(-1) >> BitSize, WideVT));
+
+  // Construct the ATOMIC_LOADW_* node.
+  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
+  SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
+                    DAG.getConstant(BitSize, WideVT) };
+  SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
+                                             array_lengthof(Ops),
+                                             NarrowVT, MMO);
+
+  // Rotate the result of the final CS so that the field is in the lower
+  // bits of a GR32, then truncate it.
+  SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
+                                    DAG.getConstant(BitSize, WideVT));
+  SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
+
+  SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
+  return DAG.getMergeValues(RetOps, 2, DL);
+}
+
+// Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation.  Lower the first two
+// into a fullword ATOMIC_CMP_SWAPW operation.
+SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  AtomicSDNode *Node = cast<AtomicSDNode>(Op.getNode());
+
+  // We have native support for 32-bit compare and swap.
+  EVT NarrowVT = Node->getMemoryVT();
+  EVT WideVT = MVT::i32;
+  if (NarrowVT == WideVT)
+    return Op;
+
+  int64_t BitSize = NarrowVT.getSizeInBits();
+  SDValue ChainIn = Node->getOperand(0);
+  SDValue Addr = Node->getOperand(1);
+  SDValue CmpVal = Node->getOperand(2);
+  SDValue SwapVal = Node->getOperand(3);
+  MachineMemOperand *MMO = Node->getMemOperand();
+  DebugLoc DL = Node->getDebugLoc();
+  EVT PtrVT = Addr.getValueType();
+
+  // Get the address of the containing word.
+  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
+                                    DAG.getConstant(-4, PtrVT));
+
+  // Get the number of bits that the word must be rotated left in order
+  // to bring the field to the top bits of a GR32.
+  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
+                                 DAG.getConstant(3, PtrVT));
+  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
+
+  // Get the complementing shift amount, for rotating a field in the top
+  // bits back to its proper position.
+  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
+                                    DAG.getConstant(0, WideVT), BitShift);
+
+  // Construct the ATOMIC_CMP_SWAPW node.
+  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
+  SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
+                    NegBitShift, DAG.getConstant(BitSize, WideVT) };
+  SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
+                                             VTList, Ops, array_lengthof(Ops),
+                                             NarrowVT, MMO);
+  return AtomicOp;
+}
+
+SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
+  return DAG.getCopyFromReg(Op.getOperand(0), Op.getDebugLoc(),
+                            SystemZ::R15D, Op.getValueType());
+}
+
+SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
+  return DAG.getCopyToReg(Op.getOperand(0), Op.getDebugLoc(),
+                          SystemZ::R15D, Op.getOperand(1));
+}
+
+SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  case ISD::BR_CC:
+    return lowerBR_CC(Op, DAG);
+  case ISD::SELECT_CC:
+    return lowerSELECT_CC(Op, DAG);
+  case ISD::GlobalAddress:
+    return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
+  case ISD::GlobalTLSAddress:
+    return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
+  case ISD::BlockAddress:
+    return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
+  case ISD::JumpTable:
+    return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
+  case ISD::ConstantPool:
+    return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
+  case ISD::BITCAST:
+    return lowerBITCAST(Op, DAG);
+  case ISD::VASTART:
+    return lowerVASTART(Op, DAG);
+  case ISD::VACOPY:
+    return lowerVACOPY(Op, DAG);
+  case ISD::DYNAMIC_STACKALLOC:
+    return lowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::UMUL_LOHI:
+    return lowerUMUL_LOHI(Op, DAG);
+  case ISD::SDIVREM:
+    return lowerSDIVREM(Op, DAG);
+  case ISD::UDIVREM:
+    return lowerUDIVREM(Op, DAG);
+  case ISD::OR:
+    return lowerOR(Op, DAG);
+  case ISD::ATOMIC_SWAP:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_SWAPW);
+  case ISD::ATOMIC_LOAD_ADD:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
+  case ISD::ATOMIC_LOAD_SUB:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
+  case ISD::ATOMIC_LOAD_AND:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
+  case ISD::ATOMIC_LOAD_OR:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
+  case ISD::ATOMIC_LOAD_XOR:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
+  case ISD::ATOMIC_LOAD_NAND:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
+  case ISD::ATOMIC_LOAD_MIN:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
+  case ISD::ATOMIC_LOAD_MAX:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
+  case ISD::ATOMIC_LOAD_UMIN:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
+  case ISD::ATOMIC_LOAD_UMAX:
+    return lowerATOMIC_LOAD(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
+  case ISD::ATOMIC_CMP_SWAP:
+    return lowerATOMIC_CMP_SWAP(Op, DAG);
+  case ISD::STACKSAVE:
+    return lowerSTACKSAVE(Op, DAG);
+  case ISD::STACKRESTORE:
+    return lowerSTACKRESTORE(Op, DAG);
+  default:
+    llvm_unreachable("Unexpected node to lower");
+  }
+}
+
+const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
+#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
+  switch (Opcode) {
+    OPCODE(RET_FLAG);
+    OPCODE(CALL);
+    OPCODE(PCREL_WRAPPER);
+    OPCODE(CMP);
+    OPCODE(UCMP);
+    OPCODE(BR_CCMASK);
+    OPCODE(SELECT_CCMASK);
+    OPCODE(ADJDYNALLOC);
+    OPCODE(EXTRACT_ACCESS);
+    OPCODE(UMUL_LOHI64);
+    OPCODE(SDIVREM64);
+    OPCODE(UDIVREM32);
+    OPCODE(UDIVREM64);
+    OPCODE(ATOMIC_SWAPW);
+    OPCODE(ATOMIC_LOADW_ADD);
+    OPCODE(ATOMIC_LOADW_SUB);
+    OPCODE(ATOMIC_LOADW_AND);
+    OPCODE(ATOMIC_LOADW_OR);
+    OPCODE(ATOMIC_LOADW_XOR);
+    OPCODE(ATOMIC_LOADW_NAND);
+    OPCODE(ATOMIC_LOADW_MIN);
+    OPCODE(ATOMIC_LOADW_MAX);
+    OPCODE(ATOMIC_LOADW_UMIN);
+    OPCODE(ATOMIC_LOADW_UMAX);
+    OPCODE(ATOMIC_CMP_SWAPW);
+  }
+  return NULL;
+#undef OPCODE
+}
+
+//===----------------------------------------------------------------------===//
+// Custom insertion
+//===----------------------------------------------------------------------===//
+
+// Create a new basic block after MBB.
+static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) {
+  MachineFunction &MF = *MBB->getParent();
+  MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock());
+  MF.insert(llvm::next(MachineFunction::iterator(MBB)), NewMBB);
+  return NewMBB;
+}
+
+// Split MBB after MI and return the new block (the one that contains
+// instructions after MI).
+static MachineBasicBlock *splitBlockAfter(MachineInstr *MI,
+                                          MachineBasicBlock *MBB) {
+  MachineBasicBlock *NewMBB = emitBlockAfter(MBB);
+  NewMBB->splice(NewMBB->begin(), MBB,
+                 llvm::next(MachineBasicBlock::iterator(MI)),
+                 MBB->end());
+  NewMBB->transferSuccessorsAndUpdatePHIs(MBB);
+  return NewMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
+MachineBasicBlock *
+SystemZTargetLowering::emitSelect(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const {
+  const SystemZInstrInfo *TII = TM.getInstrInfo();
+
+  unsigned DestReg  = MI->getOperand(0).getReg();
+  unsigned TrueReg  = MI->getOperand(1).getReg();
+  unsigned FalseReg = MI->getOperand(2).getReg();
+  unsigned CCMask   = MI->getOperand(3).getImm();
+  DebugLoc DL       = MI->getDebugLoc();
+
+  MachineBasicBlock *StartMBB = MBB;
+  MachineBasicBlock *JoinMBB  = splitBlockAfter(MI, MBB);
+  MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
+
+  //  StartMBB:
+  //   ...
+  //   TrueVal = ...
+  //   cmpTY ccX, r1, r2
+  //   jCC JoinMBB
+  //   # fallthrough to FalseMBB
+  MBB = StartMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(CCMask).addMBB(JoinMBB);
+  MBB->addSuccessor(JoinMBB);
+  MBB->addSuccessor(FalseMBB);
+
+  //  FalseMBB:
+  //   # fallthrough to JoinMBB
+  MBB = FalseMBB;
+  MBB->addSuccessor(JoinMBB);
+
+  //  JoinMBB:
+  //   %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
+  //  ...
+  MBB = JoinMBB;
+  BuildMI(*MBB, MBB->begin(), DL, TII->get(SystemZ::PHI), DestReg)
+    .addReg(TrueReg).addMBB(StartMBB)
+    .addReg(FalseReg).addMBB(FalseMBB);
+
+  MI->eraseFromParent();
+  return JoinMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
+// or ATOMIC_SWAP{,W} instruction MI.  BinOpcode is the instruction that
+// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
+// BitSize is the width of the field in bits, or 0 if this is a partword
+// ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
+// is one of the operands.  Invert says whether the field should be
+// inverted after performing BinOpcode (e.g. for NAND).
+MachineBasicBlock *
+SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
+                                            MachineBasicBlock *MBB,
+                                            unsigned BinOpcode,
+                                            unsigned BitSize,
+                                            bool Invert) const {
+  const SystemZInstrInfo *TII = TM.getInstrInfo();
+  MachineFunction &MF = *MBB->getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
+  bool IsSubWord = (BitSize < 32);
+
+  // Extract the operands.  Base can be a register or a frame index.
+  // Src2 can be a register or immediate.
+  unsigned Dest        = MI->getOperand(0).getReg();
+  MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
+  int64_t Disp         = MI->getOperand(2).getImm();
+  MachineOperand Src2  = earlyUseOperand(MI->getOperand(3));
+  unsigned BitShift    = (IsSubWord ? MI->getOperand(4).getReg() : 0);
+  unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
+  DebugLoc DL          = MI->getDebugLoc();
+  if (IsSubWord)
+    BitSize = MI->getOperand(6).getImm();
+
+  // Subword operations use 32-bit registers.
+  const TargetRegisterClass *RC = (BitSize <= 32 ?
+                                   &SystemZ::GR32BitRegClass :
+                                   &SystemZ::GR64BitRegClass);
+  unsigned LOpcode  = BitSize <= 32 ? SystemZ::L  : SystemZ::LG;
+  unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
+
+  // Get the right opcodes for the displacement.
+  LOpcode  = TII->getOpcodeForOffset(LOpcode,  Disp);
+  CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
+  assert(LOpcode && CSOpcode && "Displacement out of range");
+
+  // Create virtual registers for temporary results.
+  unsigned OrigVal       = MRI.createVirtualRegister(RC);
+  unsigned OldVal        = MRI.createVirtualRegister(RC);
+  unsigned NewVal        = (BinOpcode || IsSubWord ?
+                            MRI.createVirtualRegister(RC) : Src2.getReg());
+  unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
+  unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+
+  // Insert a basic block for the main loop.
+  MachineBasicBlock *StartMBB = MBB;
+  MachineBasicBlock *DoneMBB  = splitBlockAfter(MI, MBB);
+  MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB);
+
+  //  StartMBB:
+  //   ...
+  //   %OrigVal = L Disp(%Base)
+  //   # fall through to LoopMMB
+  MBB = StartMBB;
+  BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
+    .addOperand(Base).addImm(Disp).addReg(0);
+  MBB->addSuccessor(LoopMBB);
+
+  //  LoopMBB:
+  //   %OldVal        = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
+  //   %RotatedOldVal = RLL %OldVal, 0(%BitShift)
+  //   %RotatedNewVal = OP %RotatedOldVal, %Src2
+  //   %NewVal        = RLL %RotatedNewVal, 0(%NegBitShift)
+  //   %Dest          = CS %OldVal, %NewVal, Disp(%Base)
+  //   JNE LoopMBB
+  //   # fall through to DoneMMB
+  MBB = LoopMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
+    .addReg(OrigVal).addMBB(StartMBB)
+    .addReg(Dest).addMBB(LoopMBB);
+  if (IsSubWord)
+    BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
+      .addReg(OldVal).addReg(BitShift).addImm(0);
+  if (Invert) {
+    // Perform the operation normally and then invert every bit of the field.
+    unsigned Tmp = MRI.createVirtualRegister(RC);
+    BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
+      .addReg(RotatedOldVal).addOperand(Src2);
+    if (BitSize < 32)
+      // XILF with the upper BitSize bits set.
+      BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
+        .addReg(Tmp).addImm(uint32_t(~0 << (32 - BitSize)));
+    else if (BitSize == 32)
+      // XILF with every bit set.
+      BuildMI(MBB, DL, TII->get(SystemZ::XILF32), RotatedNewVal)
+        .addReg(Tmp).addImm(~uint32_t(0));
+    else {
+      // Use LCGR and add -1 to the result, which is more compact than
+      // an XILF, XILH pair.
+      unsigned Tmp2 = MRI.createVirtualRegister(RC);
+      BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
+      BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
+        .addReg(Tmp2).addImm(-1);
+    }
+  } else if (BinOpcode)
+    // A simply binary operation.
+    BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
+      .addReg(RotatedOldVal).addOperand(Src2);
+  else if (IsSubWord)
+    // Use RISBG to rotate Src2 into position and use it to replace the
+    // field in RotatedOldVal.
+    BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
+      .addReg(RotatedOldVal).addReg(Src2.getReg())
+      .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
+  if (IsSubWord)
+    BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
+      .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
+  BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
+    .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
+  BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+  MBB->addSuccessor(LoopMBB);
+  MBB->addSuccessor(DoneMBB);
+
+  MI->eraseFromParent();
+  return DoneMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo
+// ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI.  CompareOpcode is the
+// instruction that should be used to compare the current field with the
+// minimum or maximum value.  KeepOldMask is the BRC condition-code mask
+// for when the current field should be kept.  BitSize is the width of
+// the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
+MachineBasicBlock *
+SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
+                                            MachineBasicBlock *MBB,
+                                            unsigned CompareOpcode,
+                                            unsigned KeepOldMask,
+                                            unsigned BitSize) const {
+  const SystemZInstrInfo *TII = TM.getInstrInfo();
+  MachineFunction &MF = *MBB->getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
+  bool IsSubWord = (BitSize < 32);
+
+  // Extract the operands.  Base can be a register or a frame index.
+  unsigned Dest        = MI->getOperand(0).getReg();
+  MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
+  int64_t  Disp        = MI->getOperand(2).getImm();
+  unsigned Src2        = MI->getOperand(3).getReg();
+  unsigned BitShift    = (IsSubWord ? MI->getOperand(4).getReg() : 0);
+  unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0);
+  DebugLoc DL          = MI->getDebugLoc();
+  if (IsSubWord)
+    BitSize = MI->getOperand(6).getImm();
+
+  // Subword operations use 32-bit registers.
+  const TargetRegisterClass *RC = (BitSize <= 32 ?
+                                   &SystemZ::GR32BitRegClass :
+                                   &SystemZ::GR64BitRegClass);
+  unsigned LOpcode  = BitSize <= 32 ? SystemZ::L  : SystemZ::LG;
+  unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
+
+  // Get the right opcodes for the displacement.
+  LOpcode  = TII->getOpcodeForOffset(LOpcode,  Disp);
+  CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
+  assert(LOpcode && CSOpcode && "Displacement out of range");
+
+  // Create virtual registers for temporary results.
+  unsigned OrigVal       = MRI.createVirtualRegister(RC);
+  unsigned OldVal        = MRI.createVirtualRegister(RC);
+  unsigned NewVal        = MRI.createVirtualRegister(RC);
+  unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
+  unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
+  unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
+
+  // Insert 3 basic blocks for the loop.
+  MachineBasicBlock *StartMBB  = MBB;
+  MachineBasicBlock *DoneMBB   = splitBlockAfter(MI, MBB);
+  MachineBasicBlock *LoopMBB   = emitBlockAfter(StartMBB);
+  MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB);
+  MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB);
+
+  //  StartMBB:
+  //   ...
+  //   %OrigVal     = L Disp(%Base)
+  //   # fall through to LoopMMB
+  MBB = StartMBB;
+  BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
+    .addOperand(Base).addImm(Disp).addReg(0);
+  MBB->addSuccessor(LoopMBB);
+
+  //  LoopMBB:
+  //   %OldVal        = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
+  //   %RotatedOldVal = RLL %OldVal, 0(%BitShift)
+  //   CompareOpcode %RotatedOldVal, %Src2
+  //   BRCL KeepOldMask, UpdateMBB
+  MBB = LoopMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
+    .addReg(OrigVal).addMBB(StartMBB)
+    .addReg(Dest).addMBB(UpdateMBB);
+  if (IsSubWord)
+    BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
+      .addReg(OldVal).addReg(BitShift).addImm(0);
+  BuildMI(MBB, DL, TII->get(CompareOpcode))
+    .addReg(RotatedOldVal).addReg(Src2);
+  BuildMI(MBB, DL, TII->get(SystemZ::BRCL))
+    .addImm(KeepOldMask).addMBB(UpdateMBB);
+  MBB->addSuccessor(UpdateMBB);
+  MBB->addSuccessor(UseAltMBB);
+
+  //  UseAltMBB:
+  //   %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
+  //   # fall through to UpdateMMB
+  MBB = UseAltMBB;
+  if (IsSubWord)
+    BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
+      .addReg(RotatedOldVal).addReg(Src2)
+      .addImm(32).addImm(31 + BitSize).addImm(0);
+  MBB->addSuccessor(UpdateMBB);
+
+  //  UpdateMBB:
+  //   %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
+  //                        [ %RotatedAltVal, UseAltMBB ]
+  //   %NewVal        = RLL %RotatedNewVal, 0(%NegBitShift)
+  //   %Dest          = CS %OldVal, %NewVal, Disp(%Base)
+  //   JNE LoopMBB
+  //   # fall through to DoneMMB
+  MBB = UpdateMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
+    .addReg(RotatedOldVal).addMBB(LoopMBB)
+    .addReg(RotatedAltVal).addMBB(UseAltMBB);
+  if (IsSubWord)
+    BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
+      .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
+  BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
+    .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
+  BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+  MBB->addSuccessor(LoopMBB);
+  MBB->addSuccessor(DoneMBB);
+
+  MI->eraseFromParent();
+  return DoneMBB;
+}
+
+// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
+// instruction MI.
+MachineBasicBlock *
+SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
+                                          MachineBasicBlock *MBB) const {
+  const SystemZInstrInfo *TII = TM.getInstrInfo();
+  MachineFunction &MF = *MBB->getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  unsigned MaskNE = CCMaskForCondCode(ISD::SETNE);
+
+  // Extract the operands.  Base can be a register or a frame index.
+  unsigned Dest        = MI->getOperand(0).getReg();
+  MachineOperand Base  = earlyUseOperand(MI->getOperand(1));
+  int64_t  Disp        = MI->getOperand(2).getImm();
+  unsigned OrigCmpVal  = MI->getOperand(3).getReg();
+  unsigned OrigSwapVal = MI->getOperand(4).getReg();
+  unsigned BitShift    = MI->getOperand(5).getReg();
+  unsigned NegBitShift = MI->getOperand(6).getReg();
+  int64_t  BitSize     = MI->getOperand(7).getImm();
+  DebugLoc DL          = MI->getDebugLoc();
+
+  const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
+
+  // Get the right opcodes for the displacement.
+  unsigned LOpcode  = TII->getOpcodeForOffset(SystemZ::L,  Disp);
+  unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
+  assert(LOpcode && CSOpcode && "Displacement out of range");
+
+  // Create virtual registers for temporary results.
+  unsigned OrigOldVal   = MRI.createVirtualRegister(RC);
+  unsigned OldVal       = MRI.createVirtualRegister(RC);
+  unsigned CmpVal       = MRI.createVirtualRegister(RC);
+  unsigned SwapVal      = MRI.createVirtualRegister(RC);
+  unsigned StoreVal     = MRI.createVirtualRegister(RC);
+  unsigned RetryOldVal  = MRI.createVirtualRegister(RC);
+  unsigned RetryCmpVal  = MRI.createVirtualRegister(RC);
+  unsigned RetrySwapVal = MRI.createVirtualRegister(RC);
+
+  // Insert 2 basic blocks for the loop.
+  MachineBasicBlock *StartMBB = MBB;
+  MachineBasicBlock *DoneMBB  = splitBlockAfter(MI, MBB);
+  MachineBasicBlock *LoopMBB  = emitBlockAfter(StartMBB);
+  MachineBasicBlock *SetMBB   = emitBlockAfter(LoopMBB);
+
+  //  StartMBB:
+  //   ...
+  //   %OrigOldVal     = L Disp(%Base)
+  //   # fall through to LoopMMB
+  MBB = StartMBB;
+  BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
+    .addOperand(Base).addImm(Disp).addReg(0);
+  MBB->addSuccessor(LoopMBB);
+
+  //  LoopMBB:
+  //   %OldVal        = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
+  //   %CmpVal        = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ]
+  //   %SwapVal       = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
+  //   %Dest          = RLL %OldVal, BitSize(%BitShift)
+  //                      ^^ The low BitSize bits contain the field
+  //                         of interest.
+  //   %RetryCmpVal   = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0
+  //                      ^^ Replace the upper 32-BitSize bits of the
+  //                         comparison value with those that we loaded,
+  //                         so that we can use a full word comparison.
+  //   CR %Dest, %RetryCmpVal
+  //   JNE DoneMBB
+  //   # Fall through to SetMBB
+  MBB = LoopMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
+    .addReg(OrigOldVal).addMBB(StartMBB)
+    .addReg(RetryOldVal).addMBB(SetMBB);
+  BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal)
+    .addReg(OrigCmpVal).addMBB(StartMBB)
+    .addReg(RetryCmpVal).addMBB(SetMBB);
+  BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
+    .addReg(OrigSwapVal).addMBB(StartMBB)
+    .addReg(RetrySwapVal).addMBB(SetMBB);
+  BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest)
+    .addReg(OldVal).addReg(BitShift).addImm(BitSize);
+  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal)
+    .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
+  BuildMI(MBB, DL, TII->get(SystemZ::CR))
+    .addReg(Dest).addReg(RetryCmpVal);
+  BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(DoneMBB);
+  MBB->addSuccessor(DoneMBB);
+  MBB->addSuccessor(SetMBB);
+
+  //  SetMBB:
+  //   %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0
+  //                      ^^ Replace the upper 32-BitSize bits of the new
+  //                         value with those that we loaded.
+  //   %StoreVal    = RLL %RetrySwapVal, -BitSize(%NegBitShift)
+  //                      ^^ Rotate the new field to its proper position.
+  //   %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base)
+  //   JNE LoopMBB
+  //   # fall through to ExitMMB
+  MBB = SetMBB;
+  BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
+    .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0);
+  BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
+    .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
+  BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
+    .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp);
+  BuildMI(MBB, DL, TII->get(SystemZ::BRCL)).addImm(MaskNE).addMBB(LoopMBB);
+  MBB->addSuccessor(LoopMBB);
+  MBB->addSuccessor(DoneMBB);
+
+  MI->eraseFromParent();
+  return DoneMBB;
+}
+
+// Emit an extension from a GR32 or GR64 to a GR128.  ClearEven is true
+// if the high register of the GR128 value must be cleared or false if
+// it's "don't care".  SubReg is subreg_odd32 when extending a GR32
+// and subreg_odd when extending a GR64.
+MachineBasicBlock *
+SystemZTargetLowering::emitExt128(MachineInstr *MI,
+                                  MachineBasicBlock *MBB,
+                                  bool ClearEven, unsigned SubReg) const {
+  const SystemZInstrInfo *TII = TM.getInstrInfo();
+  MachineFunction &MF = *MBB->getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  unsigned Dest  = MI->getOperand(0).getReg();
+  unsigned Src   = MI->getOperand(1).getReg();
+  unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+
+  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
+  if (ClearEven) {
+    unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
+    unsigned Zero64   = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
+
+    BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
+      .addImm(0);
+    BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
+      .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_high);
+    In128 = NewIn128;
+  }
+  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
+    .addReg(In128).addReg(Src).addImm(SubReg);
+
+  MI->eraseFromParent();
+  return MBB;
+}
+
+MachineBasicBlock *SystemZTargetLowering::
+EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
+  switch (MI->getOpcode()) {
+  case SystemZ::Select32:
+  case SystemZ::SelectF32:
+  case SystemZ::Select64:
+  case SystemZ::SelectF64:
+  case SystemZ::SelectF128:
+    return emitSelect(MI, MBB);
+
+  case SystemZ::AEXT128_64:
+    return emitExt128(MI, MBB, false, SystemZ::subreg_low);
+  case SystemZ::ZEXT128_32:
+    return emitExt128(MI, MBB, true, SystemZ::subreg_low32);
+  case SystemZ::ZEXT128_64:
+    return emitExt128(MI, MBB, true, SystemZ::subreg_low);
+
+  case SystemZ::ATOMIC_SWAPW:
+    return emitAtomicLoadBinary(MI, MBB, 0, 0);
+  case SystemZ::ATOMIC_SWAP_32:
+    return emitAtomicLoadBinary(MI, MBB, 0, 32);
+  case SystemZ::ATOMIC_SWAP_64:
+    return emitAtomicLoadBinary(MI, MBB, 0, 64);
+
+  case SystemZ::ATOMIC_LOADW_AR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
+  case SystemZ::ATOMIC_LOADW_AFI:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
+  case SystemZ::ATOMIC_LOAD_AR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
+  case SystemZ::ATOMIC_LOAD_AHI:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
+  case SystemZ::ATOMIC_LOAD_AFI:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
+  case SystemZ::ATOMIC_LOAD_AGR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
+  case SystemZ::ATOMIC_LOAD_AGHI:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
+  case SystemZ::ATOMIC_LOAD_AGFI:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
+
+  case SystemZ::ATOMIC_LOADW_SR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
+  case SystemZ::ATOMIC_LOAD_SR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
+  case SystemZ::ATOMIC_LOAD_SGR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
+
+  case SystemZ::ATOMIC_LOADW_NR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
+  case SystemZ::ATOMIC_LOADW_NILH:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0);
+  case SystemZ::ATOMIC_LOAD_NR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
+  case SystemZ::ATOMIC_LOAD_NILL32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32);
+  case SystemZ::ATOMIC_LOAD_NILH32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32);
+  case SystemZ::ATOMIC_LOAD_NILF32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32);
+  case SystemZ::ATOMIC_LOAD_NGR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
+  case SystemZ::ATOMIC_LOAD_NILL:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64);
+  case SystemZ::ATOMIC_LOAD_NILH:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64);
+  case SystemZ::ATOMIC_LOAD_NIHL:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64);
+  case SystemZ::ATOMIC_LOAD_NIHH:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64);
+  case SystemZ::ATOMIC_LOAD_NILF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64);
+  case SystemZ::ATOMIC_LOAD_NIHF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64);
+
+  case SystemZ::ATOMIC_LOADW_OR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
+  case SystemZ::ATOMIC_LOADW_OILH:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 0);
+  case SystemZ::ATOMIC_LOAD_OR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
+  case SystemZ::ATOMIC_LOAD_OILL32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL32, 32);
+  case SystemZ::ATOMIC_LOAD_OILH32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH32, 32);
+  case SystemZ::ATOMIC_LOAD_OILF32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF32, 32);
+  case SystemZ::ATOMIC_LOAD_OGR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
+  case SystemZ::ATOMIC_LOAD_OILL:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 64);
+  case SystemZ::ATOMIC_LOAD_OILH:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 64);
+  case SystemZ::ATOMIC_LOAD_OIHL:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL, 64);
+  case SystemZ::ATOMIC_LOAD_OIHH:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH, 64);
+  case SystemZ::ATOMIC_LOAD_OILF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 64);
+  case SystemZ::ATOMIC_LOAD_OIHF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF, 64);
+
+  case SystemZ::ATOMIC_LOADW_XR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
+  case SystemZ::ATOMIC_LOADW_XILF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 0);
+  case SystemZ::ATOMIC_LOAD_XR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
+  case SystemZ::ATOMIC_LOAD_XILF32:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF32, 32);
+  case SystemZ::ATOMIC_LOAD_XGR:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
+  case SystemZ::ATOMIC_LOAD_XILF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 64);
+  case SystemZ::ATOMIC_LOAD_XIHF:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF, 64);
+
+  case SystemZ::ATOMIC_LOADW_NRi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
+  case SystemZ::ATOMIC_LOADW_NILHi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 0, true);
+  case SystemZ::ATOMIC_LOAD_NRi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
+  case SystemZ::ATOMIC_LOAD_NILL32i:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL32, 32, true);
+  case SystemZ::ATOMIC_LOAD_NILH32i:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH32, 32, true);
+  case SystemZ::ATOMIC_LOAD_NILF32i:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF32, 32, true);
+  case SystemZ::ATOMIC_LOAD_NGRi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
+  case SystemZ::ATOMIC_LOAD_NILLi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 64, true);
+  case SystemZ::ATOMIC_LOAD_NILHi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 64, true);
+  case SystemZ::ATOMIC_LOAD_NIHLi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL, 64, true);
+  case SystemZ::ATOMIC_LOAD_NIHHi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH, 64, true);
+  case SystemZ::ATOMIC_LOAD_NILFi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 64, true);
+  case SystemZ::ATOMIC_LOAD_NIHFi:
+    return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF, 64, true);
+
+  case SystemZ::ATOMIC_LOADW_MIN:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+                                SystemZ::CCMASK_CMP_LE, 0);
+  case SystemZ::ATOMIC_LOAD_MIN_32:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+                                SystemZ::CCMASK_CMP_LE, 32);
+  case SystemZ::ATOMIC_LOAD_MIN_64:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
+                                SystemZ::CCMASK_CMP_LE, 64);
+
+  case SystemZ::ATOMIC_LOADW_MAX:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+                                SystemZ::CCMASK_CMP_GE, 0);
+  case SystemZ::ATOMIC_LOAD_MAX_32:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
+                                SystemZ::CCMASK_CMP_GE, 32);
+  case SystemZ::ATOMIC_LOAD_MAX_64:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
+                                SystemZ::CCMASK_CMP_GE, 64);
+
+  case SystemZ::ATOMIC_LOADW_UMIN:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+                                SystemZ::CCMASK_CMP_LE, 0);
+  case SystemZ::ATOMIC_LOAD_UMIN_32:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+                                SystemZ::CCMASK_CMP_LE, 32);
+  case SystemZ::ATOMIC_LOAD_UMIN_64:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
+                                SystemZ::CCMASK_CMP_LE, 64);
+
+  case SystemZ::ATOMIC_LOADW_UMAX:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+                                SystemZ::CCMASK_CMP_GE, 0);
+  case SystemZ::ATOMIC_LOAD_UMAX_32:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
+                                SystemZ::CCMASK_CMP_GE, 32);
+  case SystemZ::ATOMIC_LOAD_UMAX_64:
+    return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
+                                SystemZ::CCMASK_CMP_GE, 64);
+
+  case SystemZ::ATOMIC_CMP_SWAPW:
+    return emitAtomicCmpSwapW(MI, MBB);
+  default:
+    llvm_unreachable("Unexpected instr type to insert");
+  }
+}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
new file mode 100644
index 0000000..eea820c
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -0,0 +1,212 @@
+//===-- SystemZISelLowering.h - SystemZ DAG lowering interface --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that SystemZ uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H
+#define LLVM_TARGET_SystemZ_ISELLOWERING_H
+
+#include "SystemZ.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+namespace SystemZISD {
+  enum {
+    FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+    // Return with a flag operand.  Operand 0 is the chain operand.
+    RET_FLAG,
+
+    // Calls a function.  Operand 0 is the chain operand and operand 1
+    // is the target address.  The arguments start at operand 2.
+    // There is an optional glue operand at the end.
+    CALL,
+
+    // Wraps a TargetGlobalAddress that should be loaded using PC-relative
+    // accesses (LARL).  Operand 0 is the address.
+    PCREL_WRAPPER,
+
+    // Signed integer and floating-point comparisons.  The operands are the
+    // two values to compare.
+    CMP,
+
+    // Likewise unsigned integer comparison.
+    UCMP,
+
+    // Branches if a condition is true.  Operand 0 is the chain operand;
+    // operand 1 is the 4-bit condition-code mask, with bit N in
+    // big-endian order meaning "branch if CC=N"; operand 2 is the
+    // target block and operand 3 is the flag operand.
+    BR_CCMASK,
+
+    // Selects between operand 0 and operand 1.  Operand 2 is the
+    // mask of condition-code values for which operand 0 should be
+    // chosen over operand 1; it has the same form as BR_CCMASK.
+    // Operand 3 is the flag operand.
+    SELECT_CCMASK,
+
+    // Evaluates to the gap between the stack pointer and the
+    // base of the dynamically-allocatable area.
+    ADJDYNALLOC,
+
+    // Extracts the value of a 32-bit access register.  Operand 0 is
+    // the number of the register.
+    EXTRACT_ACCESS,
+
+    // Wrappers around the ISD opcodes of the same name.  The output and
+    // first input operands are GR128s.  The trailing numbers are the
+    // widths of the second operand in bits.
+    UMUL_LOHI64,
+    SDIVREM64,
+    UDIVREM32,
+    UDIVREM64,
+
+    // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
+    // ATOMIC_LOAD_<op>.
+    //
+    // Operand 0: the address of the containing 32-bit-aligned field
+    // Operand 1: the second operand of <op>, in the high bits of an i32
+    //            for everything except ATOMIC_SWAPW
+    // Operand 2: how many bits to rotate the i32 left to bring the first
+    //            operand into the high bits
+    // Operand 3: the negative of operand 2, for rotating the other way
+    // Operand 4: the width of the field in bits (8 or 16)
+    ATOMIC_SWAPW = ISD::FIRST_TARGET_MEMORY_OPCODE,
+    ATOMIC_LOADW_ADD,
+    ATOMIC_LOADW_SUB,
+    ATOMIC_LOADW_AND,
+    ATOMIC_LOADW_OR,
+    ATOMIC_LOADW_XOR,
+    ATOMIC_LOADW_NAND,
+    ATOMIC_LOADW_MIN,
+    ATOMIC_LOADW_MAX,
+    ATOMIC_LOADW_UMIN,
+    ATOMIC_LOADW_UMAX,
+
+    // A wrapper around the inner loop of an ATOMIC_CMP_SWAP.
+    //
+    // Operand 0: the address of the containing 32-bit-aligned field
+    // Operand 1: the compare value, in the low bits of an i32
+    // Operand 2: the swap value, in the low bits of an i32
+    // Operand 3: how many bits to rotate the i32 left to bring the first
+    //            operand into the high bits
+    // Operand 4: the negative of operand 2, for rotating the other way
+    // Operand 5: the width of the field in bits (8 or 16)
+    ATOMIC_CMP_SWAPW
+  };
+}
+
+class SystemZSubtarget;
+class SystemZTargetMachine;
+
+class SystemZTargetLowering : public TargetLowering {
+public:
+  explicit SystemZTargetLowering(SystemZTargetMachine &TM);
+
+  // Override TargetLowering.
+  virtual MVT getScalarShiftAmountTy(EVT LHSTy) const LLVM_OVERRIDE {
+    return MVT::i32;
+  }
+  virtual EVT getSetCCResultType(EVT VT) const {
+    return MVT::i32;
+  }
+  virtual bool isFMAFasterThanMulAndAdd(EVT) const LLVM_OVERRIDE {
+    return true;
+  }
+  virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+  virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE;
+  virtual std::pair<unsigned, const TargetRegisterClass *>
+    getRegForInlineAsmConstraint(const std::string &Constraint,
+                                 EVT VT) const LLVM_OVERRIDE;
+  virtual TargetLowering::ConstraintType
+    getConstraintType(const std::string &Constraint) const LLVM_OVERRIDE;
+  virtual TargetLowering::ConstraintWeight
+    getSingleConstraintMatchWeight(AsmOperandInfo &info,
+                                   const char *constraint) const LLVM_OVERRIDE;
+  virtual void
+    LowerAsmOperandForConstraint(SDValue Op,
+                                 std::string &Constraint,
+                                 std::vector<SDValue> &Ops,
+                                 SelectionDAG &DAG) const LLVM_OVERRIDE;
+  virtual MachineBasicBlock *
+    EmitInstrWithCustomInserter(MachineInstr *MI,
+                                MachineBasicBlock *BB) const LLVM_OVERRIDE;
+  virtual SDValue LowerOperation(SDValue Op,
+                                 SelectionDAG &DAG) const LLVM_OVERRIDE;
+  virtual SDValue
+    LowerFormalArguments(SDValue Chain,
+                         CallingConv::ID CallConv, bool isVarArg,
+                         const SmallVectorImpl<ISD::InputArg> &Ins,
+                         DebugLoc DL, SelectionDAG &DAG,
+                         SmallVectorImpl<SDValue> &InVals) const LLVM_OVERRIDE;
+  virtual SDValue
+    LowerCall(CallLoweringInfo &CLI,
+              SmallVectorImpl<SDValue> &InVals) const LLVM_OVERRIDE;
+
+  virtual SDValue
+    LowerReturn(SDValue Chain,
+                CallingConv::ID CallConv, bool IsVarArg,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                DebugLoc DL, SelectionDAG &DAG) const LLVM_OVERRIDE;
+
+private:
+  const SystemZSubtarget &Subtarget;
+  const SystemZTargetMachine &TM;
+
+  // Implement LowerOperation for individual opcodes.
+  SDValue lowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
+                             SelectionDAG &DAG) const;
+  SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
+                                SelectionDAG &DAG) const;
+  SDValue lowerBlockAddress(BlockAddressSDNode *Node,
+                            SelectionDAG &DAG) const;
+  SDValue lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const;
+  SDValue lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const;
+  SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG,
+                           unsigned Opcode) const;
+  SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
+
+  // Implement EmitInstrWithCustomInserter for individual operation types.
+  MachineBasicBlock *emitSelect(MachineInstr *MI,
+                                MachineBasicBlock *BB) const;
+  MachineBasicBlock *emitExt128(MachineInstr *MI,
+                                MachineBasicBlock *MBB,
+                                bool ClearEven, unsigned SubReg) const;
+  MachineBasicBlock *emitAtomicLoadBinary(MachineInstr *MI,
+                                          MachineBasicBlock *BB,
+                                          unsigned BinOpcode, unsigned BitSize,
+                                          bool Invert = false) const;
+  MachineBasicBlock *emitAtomicLoadMinMax(MachineInstr *MI,
+                                          MachineBasicBlock *MBB,
+                                          unsigned CompareOpcode,
+                                          unsigned KeepOldMask,
+                                          unsigned BitSize) const;
+  MachineBasicBlock *emitAtomicCmpSwapW(MachineInstr *MI,
+                                        MachineBasicBlock *BB) const;
+};
+} // end namespace llvm
+
+#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
new file mode 100644
index 0000000..fb699b9
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -0,0 +1,48 @@
+//===-- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZINSTRBUILDER_H
+#define SYSTEMZINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// Add a BDX memory reference for frame object FI to MIB.
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI) {
+  MachineInstr *MI = MIB;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned Flags = 0;
+  if (MCID.mayLoad())
+    Flags |= MachineMemOperand::MOLoad;
+  if (MCID.mayStore())
+    Flags |= MachineMemOperand::MOStore;
+  int64_t Offset = 0;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo(
+                              PseudoSourceValue::getFixedStack(FI), Offset),
+                            Flags, MFFrame->getObjectSize(FI),
+                            MFFrame->getObjectAlignment(FI));
+  return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
new file mode 100644
index 0000000..7c9f0e6
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -0,0 +1,318 @@
+//==- SystemZInstrFP.td - Floating-point SystemZ instructions --*- tblgen-*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Control-flow instructions
+//===----------------------------------------------------------------------===//
+
+// C's ?: operator for floating-point operands.
+def SelectF32  : SelectWrapper<FP32>;
+def SelectF64  : SelectWrapper<FP64>;
+def SelectF128 : SelectWrapper<FP128>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero.
+let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+  def LZER : InherentRRE<"lzer", 0xB374, FP32,  (fpimm0)>;
+  def LZDR : InherentRRE<"lzdr", 0xB375, FP64,  (fpimm0)>;
+  def LZXR : InherentRRE<"lzxr", 0xB376, FP128, (fpimm0)>;
+}
+
+// Moves between two floating-point registers.
+let neverHasSideEffects = 1 in {
+  def LER : UnaryRR <"ler", 0x38,   null_frag, FP32,  FP32>;
+  def LDR : UnaryRR <"ldr", 0x28,   null_frag, FP64,  FP64>;
+  def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>;
+}
+
+// Moves between 64-bit integer and floating-point registers.
+def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>;
+def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>;
+
+// fcopysign with an FP32 result.
+let isCodeGenOnly = 1 in {
+  def CPSDRss : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP32>;
+  def CPSDRsd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP32, FP64>;
+}
+
+// The sign of an FP128 is in the high register.  Give the CPSDRsd
+// operands in R1, R2, R3 order.
+def : Pat<(fcopysign FP32:$src1, FP128:$src2),
+          (CPSDRsd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP32:$src1)>;
+
+// fcopysign with an FP64 result.
+let isCodeGenOnly = 1 in
+  def CPSDRds : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP32>;
+def CPSDRdd : BinaryRevRRF<"cpsdr", 0xB372, fcopysign, FP64, FP64>;
+
+// The sign of an FP128 is in the high register.  Give the CPSDRdd
+// operands in R1, R2, R3 order.
+def : Pat<(fcopysign FP64:$src1, FP128:$src2),
+          (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high), FP64:$src1)>;
+
+// fcopysign with an FP128 result.  Use "upper" as the high half and leave
+// the low half as-is.
+class CopySign128<RegisterOperand cls, dag upper>
+  : Pat<(fcopysign FP128:$src1, cls:$src2),
+        (INSERT_SUBREG FP128:$src1, upper, subreg_high)>;
+
+// Give the CPSDR* operands in R1, R2, R3 order.
+def : CopySign128<FP32,  (CPSDRds FP32:$src2,
+                                  (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
+def : CopySign128<FP64,  (CPSDRdd FP64:$src2,
+                                  (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
+def : CopySign128<FP128, (CPSDRdd (EXTRACT_SUBREG FP128:$src2, subreg_high),
+                                  (EXTRACT_SUBREG FP128:$src1, subreg_high))>;
+
+//===----------------------------------------------------------------------===//
+// Load instructions
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
+  defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32>;
+  defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64>;
+
+  // These instructions are split after register allocation, so we don't
+  // want a custom inserter.
+  let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+    def LX : Pseudo<(outs FP128:$dst), (ins bdxaddr20only128:$src),
+                     [(set FP128:$dst, (load bdxaddr20only128:$src))]>;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Store instructions
+//===----------------------------------------------------------------------===//
+
+let SimpleBDXStore = 1 in {
+  defm STE : StoreRXPair<"ste", 0x70, 0xED66, store, FP32>;
+  defm STD : StoreRXPair<"std", 0x60, 0xED67, store, FP64>;
+
+  // These instructions are split after register allocation, so we don't
+  // want a custom inserter.
+  let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+    def STX : Pseudo<(outs), (ins FP128:$src, bdxaddr20only128:$dst),
+                     [(store FP128:$src, bdxaddr20only128:$dst)]>;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Convert floating-point values to narrower representations, rounding
+// according to the current mode.  The destination of LEXBR and LDXBR
+// is a 128-bit value, but only the first register of the pair is used.
+def LEDBR : UnaryRRE<"ledbr", 0xB344, fround,    FP32,  FP64>;
+def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
+def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
+
+def : Pat<(f32 (fround FP128:$src)),
+          (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_32bit)>;
+def : Pat<(f64 (fround FP128:$src)),
+          (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_high)>;
+
+// Extend register floating-point values to wider representations.
+def LDEBR : UnaryRRE<"ldebr", 0xB304, fextend, FP64,  FP32>;
+def LXEBR : UnaryRRE<"lxebr", 0xB306, fextend, FP128, FP32>;
+def LXDBR : UnaryRRE<"lxdbr", 0xB305, fextend, FP128, FP64>;
+
+// Extend memory floating-point values to wider representations.
+def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64>;
+def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128>;
+def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128>;
+
+// Convert a signed integer register value to a floating-point one.
+let Defs = [PSW] in {
+  def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32,  GR32>;
+  def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64,  GR32>;
+  def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
+
+  def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32,  GR64>;
+  def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64,  GR64>;
+  def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
+}
+
+// Convert a floating-point register value to a signed integer value,
+// with the second operand (modifier M3) specifying the rounding mode.
+let Defs = [PSW] in {
+  def CFEBR : UnaryRRF<"cfebr", 0xB398, GR32, FP32>;
+  def CFDBR : UnaryRRF<"cfdbr", 0xB399, GR32, FP64>;
+  def CFXBR : UnaryRRF<"cfxbr", 0xB39A, GR32, FP128>;
+
+  def CGEBR : UnaryRRF<"cgebr", 0xB3A8, GR64, FP32>;
+  def CGDBR : UnaryRRF<"cgdbr", 0xB3A9, GR64, FP64>;
+  def CGXBR : UnaryRRF<"cgxbr", 0xB3AA, GR64, FP128>;
+}
+
+// fp_to_sint always rounds towards zero, which is modifier value 5.
+def : Pat<(i32 (fp_to_sint FP32:$src)),  (CFEBR FP32:$src,  5)>;
+def : Pat<(i32 (fp_to_sint FP64:$src)),  (CFDBR FP64:$src,  5)>;
+def : Pat<(i32 (fp_to_sint FP128:$src)), (CFXBR FP128:$src, 5)>;
+
+def : Pat<(i64 (fp_to_sint FP32:$src)),  (CGEBR FP32:$src,  5)>;
+def : Pat<(i64 (fp_to_sint FP64:$src)),  (CGDBR FP64:$src,  5)>;
+def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR FP128:$src, 5)>;
+
+//===----------------------------------------------------------------------===//
+// Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Negation (Load Complement).
+let Defs = [PSW] in {
+  def LCEBR : UnaryRRE<"lcebr", 0xB303, fneg, FP32,  FP32>;
+  def LCDBR : UnaryRRE<"lcdbr", 0xB313, fneg, FP64,  FP64>;
+  def LCXBR : UnaryRRE<"lcxbr", 0xB343, fneg, FP128, FP128>;
+}
+
+// Absolute value (Load Positive).
+let Defs = [PSW] in {
+  def LPEBR : UnaryRRE<"lpebr", 0xB300, fabs, FP32,  FP32>;
+  def LPDBR : UnaryRRE<"lpdbr", 0xB310, fabs, FP64,  FP64>;
+  def LPXBR : UnaryRRE<"lpxbr", 0xB340, fabs, FP128, FP128>;
+}
+
+// Negative absolute value (Load Negative).
+let Defs = [PSW] in {
+  def LNEBR : UnaryRRE<"lnebr", 0xB301, fnabs, FP32,  FP32>;
+  def LNDBR : UnaryRRE<"lndbr", 0xB311, fnabs, FP64,  FP64>;
+  def LNXBR : UnaryRRE<"lnxbr", 0xB341, fnabs, FP128, FP128>;
+}
+
+// Square root.
+def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32,  FP32>;
+def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64,  FP64>;
+def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>;
+
+def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32>;
+def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64>;
+
+// Round to an integer, with the second operand (modifier M3) specifying
+// the rounding mode.
+//
+// These forms always check for inexact conditions.  z196 added versions
+// that allow this to suppressed (as for fnearbyint), but we don't yet
+// support -march=z196.
+let Defs = [PSW] in {
+  def FIEBR : UnaryRRF<"fiebr", 0xB357, FP32,  FP32>;
+  def FIDBR : UnaryRRF<"fidbr", 0xB35F, FP64,  FP64>;
+  def FIXBR : UnaryRRF<"fixbr", 0xB347, FP128, FP128>;
+}
+
+// frint rounds according to the current mode (modifier 0) and detects
+// inexact conditions.
+def : Pat<(frint FP32:$src),  (FIEBR FP32:$src,  0)>;
+def : Pat<(frint FP64:$src),  (FIDBR FP64:$src,  0)>;
+def : Pat<(frint FP128:$src), (FIXBR FP128:$src, 0)>;
+
+//===----------------------------------------------------------------------===//
+// Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition.
+let Defs = [PSW] in {
+  let isCommutable = 1 in {
+    def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32,  FP32>;
+    def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64,  FP64>;
+    def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>;
+  }
+  def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load>;
+  def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load>;
+}
+
+// Subtraction.
+let Defs = [PSW] in {
+  def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32,  FP32>;
+  def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64,  FP64>;
+  def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>;
+
+  def SEB : BinaryRXE<"seb",  0xED0B, fsub, FP32, load>;
+  def SDB : BinaryRXE<"sdb",  0xED1B, fsub, FP64, load>;
+}
+
+// Multiplication.
+let isCommutable = 1 in {
+  def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32,  FP32>;
+  def MDBR  : BinaryRRE<"mdbr",  0xB31C, fmul, FP64,  FP64>;
+  def MXBR  : BinaryRRE<"mxbr",  0xB34C, fmul, FP128, FP128>;
+}
+def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load>;
+def MDB  : BinaryRXE<"mdb",  0xED1C, fmul, FP64, load>;
+
+// f64 multiplication of two FP32 registers.
+def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
+def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))),
+          (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+                                FP32:$src1, subreg_32bit), FP32:$src2)>;
+
+// f64 multiplication of an FP32 register and an f32 memory.
+def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load>;
+def : Pat<(fmul (f64 (fextend FP32:$src1)),
+                (f64 (extloadf32 bdxaddr12only:$addr))),
+          (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_32bit),
+                bdxaddr12only:$addr)>;
+
+// f128 multiplication of two FP64 registers.
+def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
+def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (fextend FP64:$src2))),
+          (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
+                                FP64:$src1, subreg_high), FP64:$src2)>;
+
+// f128 multiplication of an FP64 register and an f64 memory.
+def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load>;
+def : Pat<(fmul (f128 (fextend FP64:$src1)),
+                (f128 (extloadf64 bdxaddr12only:$addr))),
+          (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_high),
+                bdxaddr12only:$addr)>;
+
+// Fused multiply-add.
+def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32>;
+def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64>;
+
+def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load>;
+def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load>;
+
+// Fused multiply-subtract.
+def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32>;
+def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64>;
+
+def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load>;
+def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load>;
+
+// Division.
+def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32,  FP32>;
+def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64,  FP64>;
+def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>;
+
+def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load>;
+def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load>;
+
+//===----------------------------------------------------------------------===//
+// Comparisons
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+  def CEBR : CompareRRE<"cebr", 0xB309, z_cmp, FP32,  FP32>;
+  def CDBR : CompareRRE<"cdbr", 0xB319, z_cmp, FP64,  FP64>;
+  def CXBR : CompareRRE<"cxbr", 0xB349, z_cmp, FP128, FP128>;
+
+  def CEB : CompareRXE<"ceb", 0xED09, z_cmp, FP32, load>;
+  def CDB : CompareRXE<"cdb", 0xED19, z_cmp, FP64, load>;
+}
+
+//===----------------------------------------------------------------------===//
+// Peepholes
+//===----------------------------------------------------------------------===//
+
+def : Pat<(f32  fpimmneg0), (LCEBR (LZER))>;
+def : Pat<(f64  fpimmneg0), (LCDBR (LZDR))>;
+def : Pat<(f128 fpimmneg0), (LCXBR (LZXR))>;
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
new file mode 100644
index 0000000..b32b7eb
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -0,0 +1,987 @@
+//==- SystemZInstrFormats.td - SystemZ Instruction Formats --*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Basic SystemZ instruction definition
+//===----------------------------------------------------------------------===//
+
+class InstSystemZ<int size, dag outs, dag ins, string asmstr,
+                  list<dag> pattern> : Instruction {
+  let Namespace = "SystemZ";
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let Size = size;
+  let Pattern = pattern;
+  let AsmString = asmstr;
+
+  // Used to identify a group of related instructions, such as ST and STY.
+  string Function = "";
+
+  // "12" for an instruction that has a ...Y equivalent, "20" for that
+  // ...Y equivalent.
+  string PairType = "none";
+
+  // True if this instruction is a simple D(X,B) load of a register
+  // (with no sign or zero extension).
+  bit SimpleBDXLoad = 0;
+
+  // True if this instruction is a simple D(X,B) store of a register
+  // (with no truncation).
+  bit SimpleBDXStore = 0;
+
+  // True if this instruction has a 20-bit displacement field.
+  bit Has20BitOffset = 0;
+
+  // True if addresses in this instruction have an index register.
+  bit HasIndex = 0;
+
+  // True if this is a 128-bit pseudo instruction that combines two 64-bit
+  // operations.
+  bit Is128Bit = 0;
+
+  let TSFlags{0} = SimpleBDXLoad;
+  let TSFlags{1} = SimpleBDXStore;
+  let TSFlags{2} = Has20BitOffset;
+  let TSFlags{3} = HasIndex;
+  let TSFlags{4} = Is128Bit;
+}
+
+//===----------------------------------------------------------------------===//
+// Mappings between instructions
+//===----------------------------------------------------------------------===//
+
+// Return the version of an instruction that has an unsigned 12-bit
+// displacement.
+def getDisp12Opcode : InstrMapping {
+  let FilterClass = "InstSystemZ";
+  let RowFields = ["Function"];
+  let ColFields = ["PairType"];
+  let KeyCol = ["20"];
+  let ValueCols = [["12"]];
+}
+
+// Return the version of an instruction that has a signed 20-bit displacement.
+def getDisp20Opcode : InstrMapping {
+  let FilterClass = "InstSystemZ";
+  let RowFields = ["Function"];
+  let ColFields = ["PairType"];
+  let KeyCol = ["12"];
+  let ValueCols = [["20"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction formats
+//===----------------------------------------------------------------------===//
+//
+// Formats are specified using operand field declarations of the form:
+//
+//   bits<4> Rn : register input or output for operand n
+//   bits<m> In : immediate value of width m for operand n
+//   bits<4> Bn : base register for address operand n
+//   bits<m> Dn : displacement value of width m for address operand n
+//   bits<4> Xn : index register for address operand n
+//   bits<4> Mn : mode value for operand n
+//
+// The operand numbers ("n" in the list above) follow the architecture manual,
+// but the fields are always declared in assembly order, so there are some
+// cases where operand "2" comes after operand "3".  For address operands,
+// the base register field is declared first, followed by the displacement,
+// followed by the index (if any).  This matches the bdaddr* and bdxaddr*
+// orders.
+//
+//===----------------------------------------------------------------------===//
+
+class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> R1;
+  bits<16> I2;
+
+  let Inst{31-24} = op{11-4};
+  let Inst{23-20} = R1;
+  let Inst{19-16} = op{3-0};
+  let Inst{15-0}  = I2;
+}
+
+class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> R1;
+  bits<4> R2;
+  bits<8> I3;
+  bits<8> I4;
+  bits<8> I5;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = R2;
+  let Inst{31-24} = I3;
+  let Inst{23-16} = I4;
+  let Inst{15-8}  = I5;
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> R1;
+  bits<32> I2;
+
+  let Inst{47-40} = op{11-4};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = op{3-0};
+  let Inst{31-0}  = I2;
+}
+
+class InstRR<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<2, outs, ins, asmstr, pattern> {
+  field bits<16> Inst;
+
+  bits<4> R1;
+  bits<4> R2;
+
+  let Inst{15-8} = op;
+  let Inst{7-4}  = R1;
+  let Inst{3-0}  = R2;
+}
+
+class InstRRD<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> R1;
+  bits<4> R3;
+  bits<4> R2;
+
+  let Inst{31-16} = op;
+  let Inst{15-12} = R1;
+  let Inst{11-8}  = 0;
+  let Inst{7-4}   = R3;
+  let Inst{3-0}   = R2;
+}
+
+class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> R1;
+  bits<4> R2;
+
+  let Inst{31-16} = op;
+  let Inst{15-8}  = 0;
+  let Inst{7-4}   = R1;
+  let Inst{3-0}   = R2;
+}
+
+class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> R1;
+  bits<4> R2;
+  bits<4> R3;
+
+  let Inst{31-16} = op;
+  let Inst{15-12} = R3;
+  let Inst{11-8}  = 0;
+  let Inst{7-4}   = R1;
+  let Inst{3-0}   = R2;
+}
+
+class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> R1;
+  bits<4> B2;
+  bits<12> D2;
+  bits<4> X2;
+
+  let Inst{31-24} = op;
+  let Inst{23-20} = R1;
+  let Inst{19-16} = X2;
+  let Inst{15-12} = B2;
+  let Inst{11-0}  = D2;
+
+  let HasIndex = 1;
+}
+
+class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> R1;
+  bits<4> B2;
+  bits<12> D2;
+  bits<4> X2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = X2;
+  let Inst{31-28} = B2;
+  let Inst{27-16} = D2;
+  let Inst{15-8}  = 0;
+  let Inst{7-0}   = op{7-0};
+
+  let HasIndex = 1;
+}
+
+class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> R1;
+  bits<4> R3;
+  bits<4> B2;
+  bits<12> D2;
+  bits<4> X2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R3;
+  let Inst{35-32} = X2;
+  let Inst{31-28} = B2;
+  let Inst{27-16} = D2;
+  let Inst{15-12} = R1;
+  let Inst{11-8}  = 0;
+  let Inst{7-0}   = op{7-0};
+
+  let HasIndex = 1;
+}
+
+class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> R1;
+  bits<4> B2;
+  bits<20> D2;
+  bits<4> X2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = X2;
+  let Inst{31-28} = B2;
+  let Inst{27-16} = D2{11-0};
+  let Inst{15-8}  = D2{19-12};
+  let Inst{7-0}   = op{7-0};
+
+  let Has20BitOffset = 1;
+  let HasIndex = 1;
+}
+
+class InstRS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> R1;
+  bits<4> R3;
+  bits<4> B2;
+  bits<12> D2;
+
+  let Inst{31-24} = op;
+  let Inst{23-20} = R1;
+  let Inst{19-16} = R3;
+  let Inst{15-12} = B2;
+  let Inst{11-0}  = D2;
+}
+
+class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> R1;
+  bits<4> R3;
+  bits<4> B2;
+  bits<20> D2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = R3;
+  let Inst{31-28} = B2;
+  let Inst{27-16} = D2{11-0};
+  let Inst{15-8}  = D2{19-12};
+  let Inst{7-0}   = op{7-0};
+
+  let Has20BitOffset = 1;
+}
+
+class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+
+  bits<4> B1;
+  bits<12> D1;
+  bits<8> I2;
+
+  let Inst{31-24} = op;
+  let Inst{23-16} = I2;
+  let Inst{15-12} = B1;
+  let Inst{11-0}  = D1;
+}
+
+class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> B1;
+  bits<12> D1;
+  bits<16> I2;
+
+  let Inst{47-32} = op;
+  let Inst{31-28} = B1;
+  let Inst{27-16} = D1;
+  let Inst{15-0}  = I2;
+}
+
+class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+
+  bits<4> B1;
+  bits<20> D1;
+  bits<8> I2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-32} = I2;
+  let Inst{31-28} = B1;
+  let Inst{27-16} = D1{11-0};
+  let Inst{15-8}  = D1{19-12};
+  let Inst{7-0}   = op{7-0};
+
+  let Has20BitOffset = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions with semantics
+//===----------------------------------------------------------------------===//
+//
+// These classes have the form <Category><Format>, where <Format> is one
+// of the formats defined above and where <Category> describes the inputs
+// and outputs.  <Category> can be one of:
+//
+//   Inherent:
+//     One register output operand and no input operands.
+//
+//   Store:
+//     One register or immediate input operand and one address input operand.
+//     The instruction stores the first operand to the address.
+//
+//     This category is used for both pure and truncating stores.
+//
+//   LoadMultiple:
+//     One address input operand and two explicit output operands.
+//     The instruction loads a range of registers from the address,
+//     with the explicit operands giving the first and last register
+//     to load.  Other loaded registers are added as implicit definitions.
+//
+//   StoreMultiple:
+//     Two explicit input register operands and an address operand.
+//     The instruction stores a range of registers to the address,
+//     with the explicit operands giving the first and last register
+//     to store.  Other stored registers are added as implicit uses.
+//
+//   Unary:
+//     One register output operand and one input operand.  The input
+//     operand may be a register, immediate or memory.
+//
+//   Binary:
+//     One register output operand and two input operands.  The first
+//     input operand is always a register and he second may be a register,
+//     immediate or memory.
+//
+//   Shift:
+//     One register output operand and two input operands.  The first
+//     input operand is a register and the second has the same form as
+//     an address (although it isn't actually used to address memory).
+//
+//   Compare:
+//     Two input operands.  The first operand is always a register,
+//     the second may be a register, immediate or memory.
+//
+//   Ternary:
+//     One register output operand and three register input operands.
+//
+//   CmpSwap:
+//     One output operand and three input operands.  The first two
+//     operands are registers and the third is an address.  The instruction
+//     both reads from and writes to the address.
+//
+//   RotateSelect:
+//     One output operand and five input operands.  The first two operands
+//     are registers and the other three are immediates.
+//
+// The format determines which input operands are tied to output operands,
+// and also determines the shape of any address operand.
+//
+// Multiclasses of the form <Category><Format>Pair define two instructions,
+// one with <Category><Format> and one with <Category><Format>Y.  The name
+// of the first instruction has no suffix, the name of the second has
+// an extra "y".
+//
+//===----------------------------------------------------------------------===//
+
+class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+                  dag src>
+  : InstRRE<opcode, (outs cls:$dst), (ins),
+            mnemonic#"\t$dst",
+            [(set cls:$dst, src)]> {
+  let R2 = 0;
+}
+
+class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
+  : InstRSY<opcode, (outs cls:$dst1, cls:$dst2), (ins bdaddr20only:$addr),
+            mnemonic#"\t$dst1, $dst2, $addr", []> {
+  let mayLoad = 1;
+}
+
+class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+                 RegisterOperand cls>
+  : InstRIL<opcode, (outs), (ins cls:$src, pcrel32:$addr),
+            mnemonic#"\t$src, $addr",
+            [(operator cls:$src, pcrel32:$addr)]> {
+  let mayStore = 1;
+  // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+  // However, BDXs have two extra operands and are therefore 6 units more
+  // complex.
+  let AddedComplexity = 7;
+}
+
+class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+              RegisterOperand cls, AddressingMode mode = bdxaddr12only>
+  : InstRX<opcode, (outs), (ins cls:$src, mode:$addr),
+           mnemonic#"\t$src, $addr",
+           [(operator cls:$src, mode:$addr)]> {
+  let mayStore = 1;
+}
+
+class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               RegisterOperand cls, AddressingMode mode = bdxaddr20only>
+  : InstRXY<opcode, (outs), (ins cls:$src, mode:$addr),
+            mnemonic#"\t$src, $addr",
+            [(operator cls:$src, mode:$addr)]> {
+  let mayStore = 1;
+}
+
+multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+                       SDPatternOperator operator, RegisterOperand cls> {
+  let Function = mnemonic ## #cls in {
+    let PairType = "12" in
+      def "" : StoreRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>;
+    let PairType = "20" in
+      def Y  : StoreRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>;
+  }
+}
+
+class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
+  : InstRSY<opcode, (outs), (ins cls:$from, cls:$to, bdaddr20only:$addr),
+            mnemonic#"\t$from, $to, $addr", []> {
+  let mayStore = 1;
+}
+
+class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+              Immediate imm, AddressingMode mode = bdaddr12only>
+  : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
+           mnemonic#"\t$addr, $src",
+           [(operator imm:$src, mode:$addr)]> {
+  let mayStore = 1;
+}
+
+class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               Immediate imm, AddressingMode mode = bdaddr20only>
+  : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
+            mnemonic#"\t$addr, $src",
+            [(operator imm:$src, mode:$addr)]> {
+  let mayStore = 1;
+}
+
+class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               Immediate imm>
+  : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src),
+            mnemonic#"\t$addr, $src",
+            [(operator imm:$src, bdaddr12only:$addr)]> {
+  let mayStore = 1;
+}
+
+multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
+                       SDPatternOperator operator, Immediate imm> {
+  let Function = mnemonic in {
+    let PairType = "12" in
+      def "" : StoreSI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
+    let PairType = "20" in
+      def Y  : StoreSIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>;
+  }
+}
+
+class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+              RegisterOperand cls1, RegisterOperand cls2>
+  : InstRR<opcode, (outs cls1:$dst), (ins cls2:$src),
+           mnemonic#"\t$dst, $src",
+           [(set cls1:$dst, (operator cls2:$src))]>;
+
+class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRE<opcode, (outs cls1:$dst), (ins cls2:$src),
+            mnemonic#"\t$dst, $src",
+            [(set cls1:$dst, (operator cls2:$src))]>;
+
+class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+               RegisterOperand cls2>
+  : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src, uimm8zx4:$mode),
+            mnemonic#"\t$dst, $mode, $src", []>;
+
+class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+              RegisterOperand cls, Immediate imm>
+  : InstRI<opcode, (outs cls:$dst), (ins imm:$src),
+           mnemonic#"\t$dst, $src",
+           [(set cls:$dst, (operator imm:$src))]>;
+
+class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+               RegisterOperand cls, Immediate imm>
+  : InstRIL<opcode, (outs cls:$dst), (ins imm:$src),
+            mnemonic#"\t$dst, $src",
+            [(set cls:$dst, (operator imm:$src))]>;
+
+class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+                 RegisterOperand cls>
+  : InstRIL<opcode, (outs cls:$dst), (ins pcrel32:$addr),
+            mnemonic#"\t$dst, $addr",
+            [(set cls:$dst, (operator pcrel32:$addr))]> {
+  let mayLoad = 1;
+  // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+  // However, BDXs have two extra operands and are therefore 6 units more
+  // complex.
+  let AddedComplexity = 7;
+}
+
+class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+              RegisterOperand cls, AddressingMode mode = bdxaddr12only>
+  : InstRX<opcode, (outs cls:$dst), (ins mode:$addr),
+           mnemonic#"\t$dst, $addr",
+           [(set cls:$dst, (operator mode:$addr))]> {
+  let mayLoad = 1;
+}
+
+class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               RegisterOperand cls>
+  : InstRXE<opcode, (outs cls:$dst), (ins bdxaddr12only:$addr),
+            mnemonic#"\t$dst, $addr",
+            [(set cls:$dst, (operator bdxaddr12only:$addr))]> {
+  let mayLoad = 1;
+}
+
+class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               RegisterOperand cls, AddressingMode mode = bdxaddr20only>
+  : InstRXY<opcode, (outs cls:$dst), (ins mode:$addr),
+            mnemonic#"\t$dst, $addr",
+            [(set cls:$dst, (operator mode:$addr))]> {
+  let mayLoad = 1;
+}
+
+multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+                       SDPatternOperator operator, RegisterOperand cls> {
+  let Function = mnemonic ## #cls in {
+    let PairType = "12" in
+      def "" : UnaryRX<mnemonic, rxOpcode, operator, cls, bdxaddr12pair>;
+    let PairType = "20" in
+      def Y  : UnaryRXY<mnemonic#"y", rxyOpcode, operator, cls, bdxaddr20pair>;
+  }
+}
+
+class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+               RegisterOperand cls1, RegisterOperand cls2>
+  : InstRR<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2),
+           mnemonic#"\t$dst, $src2",
+           [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRE<opcode, (outs cls1:$dst), (ins cls1:$src1, cls2:$src2),
+            mnemonic#"\t$dst, $src2",
+            [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+// Here the assembly and dag operands are in natural order,
+// but the first input operand maps to R3 and the second to R2.
+// This is used for "CPSDR R1, R3, R2", which is equivalent to
+// R1 = copysign (R3, R2).
+//
+// Direct uses of the instruction must pass operands in encoding order --
+// R1, R2, R3 -- so they must pass the source operands in reverse order.
+class BinaryRevRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                   RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRF<opcode, (outs cls1:$dst), (ins cls2:$src2, cls1:$src1),
+            mnemonic#"\t$dst, $src1, $src2",
+            [(set cls1:$dst, (operator cls1:$src1, cls2:$src2))]>;
+
+class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+               RegisterOperand cls, Immediate imm>
+  : InstRI<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2),
+           mnemonic#"\t$dst, $src2",
+           [(set cls:$dst, (operator cls:$src1, imm:$src2))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+                RegisterOperand cls, Immediate imm>
+  : InstRIL<opcode, (outs cls:$dst), (ins cls:$src1, imm:$src2),
+            mnemonic#"\t$dst, $src2",
+            [(set cls:$dst, (operator cls:$src1, imm:$src2))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+               RegisterOperand cls, SDPatternOperator load,
+               AddressingMode mode = bdxaddr12only>
+  : InstRX<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
+           mnemonic#"\t$dst, $src2",
+           [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+  let mayLoad = 1;
+}
+
+class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                  RegisterOperand cls, SDPatternOperator load>
+  : InstRXE<opcode, (outs cls:$dst), (ins cls:$src1, bdxaddr12only:$src2),
+            mnemonic#"\t$dst, $src2",
+            [(set cls:$dst, (operator cls:$src1,
+                                      (load bdxaddr12only:$src2)))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+  let mayLoad = 1;
+}
+
+class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                RegisterOperand cls, SDPatternOperator load,
+                AddressingMode mode = bdxaddr20only>
+  : InstRXY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
+            mnemonic#"\t$dst, $src2",
+            [(set cls:$dst, (operator cls:$src1, (load mode:$src2)))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+  let mayLoad = 1;
+}
+
+multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+                        SDPatternOperator operator, RegisterOperand cls,
+                        SDPatternOperator load> {
+  let Function = mnemonic ## #cls in {
+    let PairType = "12" in
+      def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bdxaddr12pair>;
+    let PairType = "20" in
+      def Y  : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load,
+                         bdxaddr20pair>;
+  }
+}
+
+class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+               Operand imm, AddressingMode mode = bdaddr12only>
+  : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
+           mnemonic#"\t$addr, $src",
+           [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> {
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                Operand imm, AddressingMode mode = bdaddr20only>
+  : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
+            mnemonic#"\t$addr, $src",
+            [(store (operator (load mode:$addr), imm:$src), mode:$addr)]> {
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
+                        bits<16> siyOpcode, SDPatternOperator operator,
+                        Operand imm> {
+  let Function = mnemonic ## #cls in {
+    let PairType = "12" in
+      def "" : BinarySI<mnemonic, siOpcode, operator, imm, bdaddr12pair>;
+    let PairType = "20" in
+      def Y  : BinarySIY<mnemonic#"y", siyOpcode, operator, imm, bdaddr20pair>;
+  }
+}
+
+class ShiftRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+              RegisterOperand cls, AddressingMode mode>
+  : InstRS<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
+           mnemonic#"\t$dst, $src2",
+           [(set cls:$dst, (operator cls:$src1, mode:$src2))]> {
+  let R3 = 0;
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+class ShiftRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               RegisterOperand cls, AddressingMode mode>
+  : InstRSY<opcode, (outs cls:$dst), (ins cls:$src1, mode:$src2),
+            mnemonic#"\t$dst, $src1, $src2",
+            [(set cls:$dst, (operator cls:$src1, mode:$src2))]>;
+
+class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+                RegisterOperand cls1, RegisterOperand cls2>
+  : InstRR<opcode, (outs), (ins cls1:$src1, cls2:$src2),
+           mnemonic#"\t$src1, $src2",
+           [(operator cls1:$src1, cls2:$src2)]>;
+
+class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRE<opcode, (outs), (ins cls1:$src1, cls2:$src2),
+            mnemonic#"\t$src1, $src2",
+            [(operator cls1:$src1, cls2:$src2)]>;
+
+class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+                RegisterOperand cls, Immediate imm>
+  : InstRI<opcode, (outs), (ins cls:$src1, imm:$src2),
+           mnemonic#"\t$src1, $src2",
+           [(operator cls:$src1, imm:$src2)]>;
+
+class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+                 RegisterOperand cls, Immediate imm>
+  : InstRIL<opcode, (outs), (ins cls:$src1, imm:$src2),
+            mnemonic#"\t$src1, $src2",
+            [(operator cls:$src1, imm:$src2)]>;
+
+class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
+                   RegisterOperand cls, SDPatternOperator load>
+  : InstRIL<opcode, (outs), (ins cls:$src1, pcrel32:$src2),
+            mnemonic#"\t$src1, $src2",
+            [(operator cls:$src1, (load pcrel32:$src2))]> {
+  let mayLoad = 1;
+  // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+  // However, BDXs have two extra operands and are therefore 6 units more
+  // complex.
+  let AddedComplexity = 7;
+}
+
+class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+                RegisterOperand cls, SDPatternOperator load,
+                AddressingMode mode = bdxaddr12only>
+  : InstRX<opcode, (outs), (ins cls:$src1, mode:$src2),
+           mnemonic#"\t$src1, $src2",
+           [(operator cls:$src1, (load mode:$src2))]> {
+  let mayLoad = 1;
+}
+
+class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 RegisterOperand cls, SDPatternOperator load>
+  : InstRXE<opcode, (outs), (ins cls:$src1, bdxaddr12only:$src2),
+            mnemonic#"\t$src1, $src2",
+            [(operator cls:$src1, (load bdxaddr12only:$src2))]> {
+  let mayLoad = 1;
+}
+
+class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 RegisterOperand cls, SDPatternOperator load,
+                 AddressingMode mode = bdxaddr20only>
+  : InstRXY<opcode, (outs), (ins cls:$src1, mode:$src2),
+            mnemonic#"\t$src1, $src2",
+            [(operator cls:$src1, (load mode:$src2))]> {
+  let mayLoad = 1;
+}
+
+multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
+                         SDPatternOperator operator, RegisterOperand cls,
+                         SDPatternOperator load> {
+  let Function = mnemonic ## #cls in {
+    let PairType = "12" in
+      def "" : CompareRX<mnemonic, rxOpcode, operator, cls,
+                         load, bdxaddr12pair>;
+    let PairType = "20" in
+      def Y  : CompareRXY<mnemonic#"y", rxyOpcode, operator, cls,
+                          load, bdxaddr20pair>;
+  }
+}
+
+class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+                SDPatternOperator load, Immediate imm,
+                AddressingMode mode = bdaddr12only>
+  : InstSI<opcode, (outs), (ins mode:$addr, imm:$src),
+           mnemonic#"\t$addr, $src",
+           [(operator (load mode:$addr), imm:$src)]> {
+  let mayLoad = 1;
+}
+
+class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 SDPatternOperator load, Immediate imm>
+  : InstSIL<opcode, (outs), (ins bdaddr12only:$addr, imm:$src),
+            mnemonic#"\t$addr, $src",
+            [(operator (load bdaddr12only:$addr), imm:$src)]> {
+  let mayLoad = 1;
+}
+
+class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 SDPatternOperator load, Immediate imm,
+                 AddressingMode mode = bdaddr20only>
+  : InstSIY<opcode, (outs), (ins mode:$addr, imm:$src),
+            mnemonic#"\t$addr, $src",
+            [(operator (load mode:$addr), imm:$src)]> {
+  let mayLoad = 1;
+}
+
+multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
+                         SDPatternOperator operator, SDPatternOperator load,
+                         Immediate imm> {
+  let Function = mnemonic in {
+    let PairType = "12" in
+      def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>;
+    let PairType = "20" in
+      def Y  : CompareSIY<mnemonic#"y", siyOpcode, operator, load, imm,
+                          bdaddr20pair>;
+  }
+}
+
+class TernaryRRD<string mnemonic, bits<16> opcode,
+                 SDPatternOperator operator, RegisterOperand cls>
+  : InstRRD<opcode, (outs cls:$dst), (ins cls:$src1, cls:$src2, cls:$src3),
+            mnemonic#"\t$dst, $src2, $src3",
+            [(set cls:$dst, (operator cls:$src1, cls:$src2, cls:$src3))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 RegisterOperand cls, SDPatternOperator load>
+  : InstRXF<opcode, (outs cls:$dst),
+            (ins cls:$src1, cls:$src2, bdxaddr12only:$src3),
+            mnemonic#"\t$dst, $src2, $src3",
+            [(set cls:$dst, (operator cls:$src1, cls:$src2,
+                                      (load bdxaddr12only:$src3)))]> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+  let mayLoad = 1;
+}
+
+class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+                RegisterOperand cls, AddressingMode mode = bdaddr12only>
+  : InstRS<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr),
+           mnemonic#"\t$dst, $new, $ptr",
+           [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> {
+  let Constraints = "$old = $dst";
+  let DisableEncoding = "$old";
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 RegisterOperand cls, AddressingMode mode = bdaddr20only>
+  : InstRSY<opcode, (outs cls:$dst), (ins cls:$old, cls:$new, mode:$ptr),
+            mnemonic#"\t$dst, $new, $ptr",
+            [(set cls:$dst, (operator mode:$ptr, cls:$old, cls:$new))]> {
+  let Constraints = "$old = $dst";
+  let DisableEncoding = "$old";
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
+                         SDPatternOperator operator, RegisterOperand cls> {
+  let Function = mnemonic ## #cls in {
+    let PairType = "12" in
+      def "" : CmpSwapRS<mnemonic, rsOpcode, operator, cls, bdaddr12pair>;
+    let PairType = "20" in
+      def Y  : CmpSwapRSY<mnemonic#"y", rsyOpcode, operator, cls, bdaddr20pair>;
+  }
+}
+
+class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+                       RegisterOperand cls2>
+  : InstRIEf<opcode, (outs cls1:$dst),
+             (ins cls1:$src1, cls2:$src2,
+                  uimm8zx6:$imm1, uimm8zx6:$imm2, uimm8zx6:$imm3),
+             mnemonic#"\t$dst, $src2, $imm1, $imm2, $imm3", []> {
+  let Constraints = "$src1 = $dst";
+  let DisableEncoding = "$src1";
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+//
+// Convenience instructions that get lowered to real instructions
+// by either SystemZTargetLowering::EmitInstrWithCustomInserter()
+// or SystemZInstrInfo::expandPostRAPseudo().
+//
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag outs, dag ins, list<dag> pattern>
+  : InstSystemZ<0, outs, ins, "", pattern> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
+
+// Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is
+// the value of the PSW's 2-bit condition code field.
+class SelectWrapper<RegisterOperand cls>
+  : Pseudo<(outs cls:$dst), (ins cls:$src1, cls:$src2, i8imm:$cc),
+           [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, imm:$cc))]> {
+  let usesCustomInserter = 1;
+  // Although the instructions used by these nodes do not in themselves
+  // change the PSW, the insertion requires new blocks, and the PSW cannot
+  // be live across them.
+  let Defs = [PSW];
+  let Uses = [PSW];
+}
+
+// OPERATOR is ATOMIC_SWAP or an ATOMIC_LOAD_* operation.  PAT and OPERAND
+// describe the second (non-memory) operand.
+class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls,
+                       dag pat, DAGOperand operand>
+  : Pseudo<(outs cls:$dst), (ins bdaddr20only:$ptr, operand:$src2),
+           [(set cls:$dst, (operator bdaddr20only:$ptr, pat))]> {
+  let Defs = [PSW];
+  let Has20BitOffset = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let usesCustomInserter = 1;
+}
+
+// Specializations of AtomicLoadWBinary.
+class AtomicLoadBinaryReg32<SDPatternOperator operator>
+  : AtomicLoadBinary<operator, GR32, (i32 GR32:$src2), GR32>;
+class AtomicLoadBinaryImm32<SDPatternOperator operator, Immediate imm>
+  : AtomicLoadBinary<operator, GR32, (i32 imm:$src2), imm>;
+class AtomicLoadBinaryReg64<SDPatternOperator operator>
+  : AtomicLoadBinary<operator, GR64, (i64 GR64:$src2), GR64>;
+class AtomicLoadBinaryImm64<SDPatternOperator operator, Immediate imm>
+  : AtomicLoadBinary<operator, GR64, (i64 imm:$src2), imm>;
+
+// OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation.  PAT and OPERAND
+// describe the second (non-memory) operand.
+class AtomicLoadWBinary<SDPatternOperator operator, dag pat,
+                        DAGOperand operand>
+  : Pseudo<(outs GR32:$dst),
+           (ins bdaddr20only:$ptr, operand:$src2, ADDR32:$bitshift,
+                ADDR32:$negbitshift, uimm32:$bitsize),
+           [(set GR32:$dst, (operator bdaddr20only:$ptr, pat, ADDR32:$bitshift,
+                                      ADDR32:$negbitshift, uimm32:$bitsize))]> {
+  let Defs = [PSW];
+  let Has20BitOffset = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let usesCustomInserter = 1;
+}
+
+// Specializations of AtomicLoadWBinary.
+class AtomicLoadWBinaryReg<SDPatternOperator operator>
+  : AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>;
+class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
+  : AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
new file mode 100644
index 0000000..0718c83
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -0,0 +1,444 @@
+//===-- SystemZInstrInfo.cpp - SystemZ instruction information ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZInstrInfo.h"
+#include "SystemZInstrBuilder.h"
+
+#define GET_INSTRINFO_CTOR
+#define GET_INSTRMAP_INFO
+#include "SystemZGenInstrInfo.inc"
+
+using namespace llvm;
+
+SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
+  : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
+    RI(tm, *this) {
+}
+
+// MI is a 128-bit load or store.  Split it into two 64-bit loads or stores,
+// each having the opcode given by NewOpcode.
+void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
+                                 unsigned NewOpcode) const {
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineFunction &MF = *MBB->getParent();
+
+  // Get two load or store instructions.  Use the original instruction for one
+  // of them (arbitarily the second here) and create a clone for the other.
+  MachineInstr *EarlierMI = MF.CloneMachineInstr(MI);
+  MBB->insert(MI, EarlierMI);
+
+  // Set up the two 64-bit registers.
+  MachineOperand &HighRegOp = EarlierMI->getOperand(0);
+  MachineOperand &LowRegOp = MI->getOperand(0);
+  HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_high));
+  LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_low));
+
+  // The address in the first (high) instruction is already correct.
+  // Adjust the offset in the second (low) instruction.
+  MachineOperand &HighOffsetOp = EarlierMI->getOperand(2);
+  MachineOperand &LowOffsetOp = MI->getOperand(2);
+  LowOffsetOp.setImm(LowOffsetOp.getImm() + 8);
+
+  // Set the opcodes.
+  unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm());
+  unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm());
+  assert(HighOpcode && LowOpcode && "Both offsets should be in range");
+
+  EarlierMI->setDesc(get(HighOpcode));
+  MI->setDesc(get(LowOpcode));
+}
+
+// Split ADJDYNALLOC instruction MI.
+void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineFunction &MF = *MBB->getParent();
+  MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  MachineOperand &OffsetMO = MI->getOperand(2);
+
+  uint64_t Offset = (MFFrame->getMaxCallFrameSize() +
+                     SystemZMC::CallFrameSize +
+                     OffsetMO.getImm());
+  unsigned NewOpcode = getOpcodeForOffset(SystemZ::LA, Offset);
+  assert(NewOpcode && "No support for huge argument lists yet");
+  MI->setDesc(get(NewOpcode));
+  OffsetMO.setImm(Offset);
+}
+
+// If MI is a simple load or store for a frame object, return the register
+// it loads or stores and set FrameIndex to the index of the frame object.
+// Return 0 otherwise.
+//
+// Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
+static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, int Flag) {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if ((MCID.TSFlags & Flag) &&
+      MI->getOperand(1).isFI() &&
+      MI->getOperand(2).getImm() == 0 &&
+      MI->getOperand(3).getReg() == 0) {
+    FrameIndex = MI->getOperand(1).getIndex();
+    return MI->getOperand(0).getReg();
+  }
+  return 0;
+}
+
+unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                               int &FrameIndex) const {
+  return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXLoad);
+}
+
+unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
+  return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore);
+}
+
+bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                     MachineBasicBlock *&TBB,
+                                     MachineBasicBlock *&FBB,
+                                     SmallVectorImpl<MachineOperand> &Cond,
+                                     bool AllowModify) const {
+  // Most of the code and comments here are boilerplate.
+
+  // Start from the bottom of the block and work up, examining the
+  // terminator instructions.
+  MachineBasicBlock::iterator I = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+
+    // Working from the bottom, when we see a non-terminator instruction, we're
+    // done.
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    // A terminator that isn't a branch can't easily be handled by this
+    // analysis.
+    unsigned ThisCond;
+    const MachineOperand *ThisTarget;
+    if (!isBranch(I, ThisCond, ThisTarget))
+      return true;
+
+    // Can't handle indirect branches.
+    if (!ThisTarget->isMBB())
+      return true;
+
+    if (ThisCond == SystemZ::CCMASK_ANY) {
+      // Handle unconditional branches.
+      if (!AllowModify) {
+        TBB = ThisTarget->getMBB();
+        continue;
+      }
+
+      // If the block has any instructions after a JMP, delete them.
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+
+      Cond.clear();
+      FBB = 0;
+
+      // Delete the JMP if it's equivalent to a fall-through.
+      if (MBB.isLayoutSuccessor(ThisTarget->getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        continue;
+      }
+
+      // TBB is used to indicate the unconditinal destination.
+      TBB = ThisTarget->getMBB();
+      continue;
+    }
+
+    // Working from the bottom, handle the first conditional branch.
+    if (Cond.empty()) {
+      // FIXME: add X86-style branch swap
+      FBB = TBB;
+      TBB = ThisTarget->getMBB();
+      Cond.push_back(MachineOperand::CreateImm(ThisCond));
+      continue;
+    }
+
+    // Handle subsequent conditional branches.
+    assert(Cond.size() == 1);
+    assert(TBB);
+
+    // Only handle the case where all conditional branches branch to the same
+    // destination.
+    if (TBB != ThisTarget->getMBB())
+      return true;
+
+    // If the conditions are the same, we can leave them alone.
+    unsigned OldCond = Cond[0].getImm();
+    if (OldCond == ThisCond)
+      continue;
+
+    // FIXME: Try combining conditions like X86 does.  Should be easy on Z!
+  }
+
+  return false;
+}
+
+unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  // Most of the code and comments here are boilerplate.
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    unsigned Cond;
+    const MachineOperand *Target;
+    if (!isBranch(I, Cond, Target))
+      break;
+    if (!Target->isMBB())
+      break;
+    // Remove the branch.
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+
+  return Count;
+}
+
+unsigned
+SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                               MachineBasicBlock *FBB,
+                               const SmallVectorImpl<MachineOperand> &Cond,
+                               DebugLoc DL) const {
+  // In this function we output 32-bit branches, which should always
+  // have enough range.  They can be shortened and relaxed by later code
+  // in the pipeline, if desired.
+
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "SystemZ branch conditions have one component!");
+
+  if (Cond.empty()) {
+    // Unconditional branch?
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(TBB);
+    return 1;
+  }
+
+  // Conditional branch.
+  unsigned Count = 0;
+  unsigned CC = Cond[0].getImm();
+  BuildMI(&MBB, DL, get(SystemZ::BRCL)).addImm(CC).addMBB(TBB);
+  ++Count;
+
+  if (FBB) {
+    // Two-way Conditional branch. Insert the second branch.
+    BuildMI(&MBB, DL, get(SystemZ::JG)).addMBB(FBB);
+    ++Count;
+  }
+  return Count;
+}
+
+void
+SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+			      MachineBasicBlock::iterator MBBI, DebugLoc DL,
+			      unsigned DestReg, unsigned SrcReg,
+			      bool KillSrc) const {
+  // Split 128-bit GPR moves into two 64-bit moves.  This handles ADDR128 too.
+  if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) {
+    copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_high),
+                RI.getSubReg(SrcReg, SystemZ::subreg_high), KillSrc);
+    copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_low),
+                RI.getSubReg(SrcReg, SystemZ::subreg_low), KillSrc);
+    return;
+  }
+
+  // Everything else needs only one instruction.
+  unsigned Opcode;
+  if (SystemZ::GR32BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::LR;
+  else if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::LGR;
+  else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::LER;
+  else if (SystemZ::FP64BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::LDR;
+  else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::LXR;
+  else
+    llvm_unreachable("Impossible reg-to-reg copy");
+
+  BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void
+SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+				      MachineBasicBlock::iterator MBBI,
+				      unsigned SrcReg, bool isKill,
+				      int FrameIdx,
+				      const TargetRegisterClass *RC,
+				      const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Callers may expect a single instruction, so keep 128-bit moves
+  // together for now and lower them after register allocation.
+  unsigned LoadOpcode, StoreOpcode;
+  getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
+  addFrameReference(BuildMI(MBB, MBBI, DL, get(StoreOpcode))
+		    .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
+}
+
+void
+SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+				       MachineBasicBlock::iterator MBBI,
+				       unsigned DestReg, int FrameIdx,
+				       const TargetRegisterClass *RC,
+				       const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Callers may expect a single instruction, so keep 128-bit moves
+  // together for now and lower them after register allocation.
+  unsigned LoadOpcode, StoreOpcode;
+  getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
+  addFrameReference(BuildMI(MBB, MBBI, DL, get(LoadOpcode), DestReg),
+                    FrameIdx);
+}
+
+bool
+SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+  switch (MI->getOpcode()) {
+  case SystemZ::L128:
+    splitMove(MI, SystemZ::LG);
+    return true;
+
+  case SystemZ::ST128:
+    splitMove(MI, SystemZ::STG);
+    return true;
+
+  case SystemZ::LX:
+    splitMove(MI, SystemZ::LD);
+    return true;
+
+  case SystemZ::STX:
+    splitMove(MI, SystemZ::STD);
+    return true;
+
+  case SystemZ::ADJDYNALLOC:
+    splitAdjDynAlloc(MI);
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+bool SystemZInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 1 && "Invalid branch condition!");
+  Cond[0].setImm(Cond[0].getImm() ^ SystemZ::CCMASK_ANY);
+  return false;
+}
+
+bool SystemZInstrInfo::isBranch(const MachineInstr *MI, unsigned &Cond,
+                                const MachineOperand *&Target) const {
+  switch (MI->getOpcode()) {
+  case SystemZ::BR:
+  case SystemZ::J:
+  case SystemZ::JG:
+    Cond = SystemZ::CCMASK_ANY;
+    Target = &MI->getOperand(0);
+    return true;
+
+  case SystemZ::BRC:
+  case SystemZ::BRCL:
+    Cond = MI->getOperand(0).getImm();
+    Target = &MI->getOperand(1);
+    return true;
+
+  default:
+    assert(!MI->getDesc().isBranch() && "Unknown branch opcode");
+    return false;
+  }
+}
+
+void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
+                                           unsigned &LoadOpcode,
+                                           unsigned &StoreOpcode) const {
+  if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) {
+    LoadOpcode = SystemZ::L;
+    StoreOpcode = SystemZ::ST32;
+  } else if (RC == &SystemZ::GR64BitRegClass ||
+             RC == &SystemZ::ADDR64BitRegClass) {
+    LoadOpcode = SystemZ::LG;
+    StoreOpcode = SystemZ::STG;
+  } else if (RC == &SystemZ::GR128BitRegClass ||
+             RC == &SystemZ::ADDR128BitRegClass) {
+    LoadOpcode = SystemZ::L128;
+    StoreOpcode = SystemZ::ST128;
+  } else if (RC == &SystemZ::FP32BitRegClass) {
+    LoadOpcode = SystemZ::LE;
+    StoreOpcode = SystemZ::STE;
+  } else if (RC == &SystemZ::FP64BitRegClass) {
+    LoadOpcode = SystemZ::LD;
+    StoreOpcode = SystemZ::STD;
+  } else if (RC == &SystemZ::FP128BitRegClass) {
+    LoadOpcode = SystemZ::LX;
+    StoreOpcode = SystemZ::STX;
+  } else
+    llvm_unreachable("Unsupported regclass to load or store");
+}
+
+unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
+                                              int64_t Offset) const {
+  const MCInstrDesc &MCID = get(Opcode);
+  int64_t Offset2 = (MCID.TSFlags & SystemZII::Is128Bit ? Offset + 8 : Offset);
+  if (isUInt<12>(Offset) && isUInt<12>(Offset2)) {
+    // Get the instruction to use for unsigned 12-bit displacements.
+    int Disp12Opcode = SystemZ::getDisp12Opcode(Opcode);
+    if (Disp12Opcode >= 0)
+      return Disp12Opcode;
+
+    // All address-related instructions can use unsigned 12-bit
+    // displacements.
+    return Opcode;
+  }
+  if (isInt<20>(Offset) && isInt<20>(Offset2)) {
+    // Get the instruction to use for signed 20-bit displacements.
+    int Disp20Opcode = SystemZ::getDisp20Opcode(Opcode);
+    if (Disp20Opcode >= 0)
+      return Disp20Opcode;
+
+    // Check whether Opcode allows signed 20-bit displacements.
+    if (MCID.TSFlags & SystemZII::Has20BitOffset)
+      return Opcode;
+  }
+  return 0;
+}
+
+void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MBBI,
+                                     unsigned Reg, uint64_t Value) const {
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  unsigned Opcode;
+  if (isInt<16>(Value))
+    Opcode = SystemZ::LGHI;
+  else if (SystemZ::isImmLL(Value))
+    Opcode = SystemZ::LLILL;
+  else if (SystemZ::isImmLH(Value)) {
+    Opcode = SystemZ::LLILH;
+    Value >>= 16;
+  } else {
+    assert(isInt<32>(Value) && "Huge values not handled yet");
+    Opcode = SystemZ::LGFI;
+  }
+  BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value);
+}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
new file mode 100644
index 0000000..0fc4761
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -0,0 +1,123 @@
+//===-- SystemZInstrInfo.h - SystemZ instruction information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H
+#define LLVM_TARGET_SYSTEMZINSTRINFO_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SystemZGenInstrInfo.inc"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+namespace SystemZII {
+  enum {
+    // See comments in SystemZInstrFormats.td.
+    SimpleBDXLoad  = (1 << 0),
+    SimpleBDXStore = (1 << 1),
+    Has20BitOffset = (1 << 2),
+    HasIndex       = (1 << 3),
+    Is128Bit       = (1 << 4)
+  };
+  // SystemZ MachineOperand target flags.
+  enum {
+    // Masks out the bits for the access model.
+    MO_SYMBOL_MODIFIER = (1 << 0),
+
+    // @GOT (aka @GOTENT)
+    MO_GOT = (1 << 0)
+  };
+}
+
+class SystemZInstrInfo : public SystemZGenInstrInfo {
+  const SystemZRegisterInfo RI;
+
+  void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const;
+  void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const;
+
+public:
+  explicit SystemZInstrInfo(SystemZTargetMachine &TM);
+
+  // Override TargetInstrInfo.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const LLVM_OVERRIDE;
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const LLVM_OVERRIDE;
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
+                             MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const LLVM_OVERRIDE;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const LLVM_OVERRIDE;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const LLVM_OVERRIDE;
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const LLVM_OVERRIDE;
+  virtual void
+    storeRegToStackSlot(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator MBBI,
+                        unsigned SrcReg, bool isKill, int FrameIndex,
+                        const TargetRegisterClass *RC,
+                        const TargetRegisterInfo *TRI) const LLVM_OVERRIDE;
+  virtual void
+    loadRegFromStackSlot(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI,
+                         unsigned DestReg, int FrameIdx,
+                         const TargetRegisterClass *RC,
+                         const TargetRegisterInfo *TRI) const LLVM_OVERRIDE;
+  virtual bool
+    expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const LLVM_OVERRIDE;
+  virtual bool
+    ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+    LLVM_OVERRIDE;
+
+  // Return the SystemZRegisterInfo, which this class owns.
+  const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
+
+  // Return true if MI is a conditional or unconditional branch.
+  // When returning true, set Cond to the mask of condition-code
+  // values on which the instruction will branch, and set Target
+  // to the operand that contains the branch target.  This target
+  // can be a register or a basic block.
+  bool isBranch(const MachineInstr *MI, unsigned &Cond,
+                const MachineOperand *&Target) const;
+
+  // Get the load and store opcodes for a given register class.
+  void getLoadStoreOpcodes(const TargetRegisterClass *RC,
+                           unsigned &LoadOpcode, unsigned &StoreOpcode) const;
+
+  // Opcode is the opcode of an instruction that has an address operand,
+  // and the caller wants to perform that instruction's operation on an
+  // address that has displacement Offset.  Return the opcode of a suitable
+  // instruction (which might be Opcode itself) or 0 if no such instruction
+  // exists.
+  unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const;
+
+  // Emit code before MBBI in MI to move immediate value Value into
+  // physical register Reg.
+  void loadImmediate(MachineBasicBlock &MBB,
+                     MachineBasicBlock::iterator MBBI,
+                     unsigned Reg, uint64_t Value) const;
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
new file mode 100644
index 0000000..7ffa382
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -0,0 +1,955 @@
+//===-- SystemZInstrInfo.td - General SystemZ instructions ----*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
+                              [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+                              [(callseq_end timm:$amt1, timm:$amt2)]>;
+
+let neverHasSideEffects = 1 in {
+  // Takes as input the value of the stack pointer after a dynamic allocation
+  // has been made.  Sets the output to the address of the dynamically-
+  // allocated area itself, skipping the outgoing arguments.
+  //
+  // This expands to an LA or LAY instruction.  We restrict the offset
+  // to the range of LA and keep the LAY range in reserve for when
+  // the size of the outgoing arguments is added.
+  def ADJDYNALLOC : Pseudo<(outs GR64:$dst), (ins dynalloc12only:$src),
+                           [(set GR64:$dst, dynalloc12only:$src)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Control flow instructions
+//===----------------------------------------------------------------------===//
+
+// A return instruction.  R1 is the condition-code mask (all 1s)
+// and R2 is the target address, which is always stored in %r14.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1,
+    R1 = 15, R2 = 14, isCodeGenOnly = 1 in {
+  def RET : InstRR<0x07, (outs), (ins), "br\t%r14", [(z_retflag)]>;
+}
+
+// Unconditional branches.  R1 is the condition-code mask (all 1s).
+let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
+  let isIndirectBranch = 1 in
+    def BR : InstRR<0x07, (outs), (ins ADDR64:$dst),
+                    "br\t$dst", [(brind ADDR64:$dst)]>;
+
+  // An assembler extended mnemonic for BRC.  Use a separate instruction for
+  // the asm parser, so that we don't relax Js to external symbols into JGs.
+  let isCodeGenOnly = 1 in
+    def J : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>;
+  let isAsmParserOnly = 1 in
+    def AsmJ : InstRI<0xA74, (outs), (ins brtarget16:$dst), "j\t$dst", []>;
+
+  // An assembler extended mnemonic for BRCL.  (The extension is "G"
+  // rather than "L" because "JL" is "Jump if Less".)
+  def JG : InstRIL<0xC04, (outs), (ins brtarget32:$dst),
+                   "jg\t$dst", [(br bb:$dst)]>;
+}
+
+// Conditional branches.  It's easier for LLVM to handle these branches
+// in their raw BRC/BRCL form, with the 4-bit condition-code mask being
+// the first operand.  It seems friendlier to use mnemonic forms like
+// JE and JLH when writing out the assembly though.
+multiclass CondBranches<Operand imm, string short, string long> {
+  let isBranch = 1, isTerminator = 1, Uses = [PSW] in {
+    def "" : InstRI<0xA74, (outs), (ins imm:$cond, brtarget16:$dst), short, []>;
+    def L  : InstRIL<0xC04, (outs), (ins imm:$cond, brtarget32:$dst), long, []>;
+  }
+}
+let isCodeGenOnly = 1 in
+  defm BRC : CondBranches<cond4, "j$cond\t$dst", "jg$cond\t$dst">;
+let isAsmParserOnly = 1 in
+  defm AsmBRC : CondBranches<uimm8zx4, "brc\t$cond, $dst", "brcl\t$cond, $dst">;
+
+def : Pat<(z_br_ccmask cond4:$cond, bb:$dst), (BRCL cond4:$cond, bb:$dst)>;
+
+// Define AsmParser mnemonics for each condition code.
+multiclass CondExtendedMnemonic<bits<4> Cond, string name> {
+  let R1 = Cond in {
+    def "" : InstRI<0xA74, (outs), (ins brtarget16:$dst),
+                    "j"##name##"\t$dst", []>;
+    def L  : InstRIL<0xC04, (outs), (ins brtarget32:$dst),
+                    "jg"##name##"\t$dst", []>;
+  }
+}
+let isAsmParserOnly = 1 in {
+  defm AsmJO   : CondExtendedMnemonic<1,  "o">;
+  defm AsmJH   : CondExtendedMnemonic<2,  "h">;
+  defm AsmJNLE : CondExtendedMnemonic<3,  "nle">;
+  defm AsmJL   : CondExtendedMnemonic<4,  "l">;
+  defm AsmJNHE : CondExtendedMnemonic<5,  "nhe">;
+  defm AsmJLH  : CondExtendedMnemonic<6,  "lh">;
+  defm AsmJNE  : CondExtendedMnemonic<7,  "ne">;
+  defm AsmJE   : CondExtendedMnemonic<8,  "e">;
+  defm AsmJNLH : CondExtendedMnemonic<9,  "nlh">;
+  defm AsmJHE  : CondExtendedMnemonic<10, "he">;
+  defm AsmJNL  : CondExtendedMnemonic<11, "nl">;
+  defm AsmJLE  : CondExtendedMnemonic<12, "le">;
+  defm AsmJNH  : CondExtendedMnemonic<13, "nh">;
+  defm AsmJNO  : CondExtendedMnemonic<14, "no">;
+}
+
+def Select32 : SelectWrapper<GR32>;
+def Select64 : SelectWrapper<GR64>;
+
+//===----------------------------------------------------------------------===//
+// Call instructions
+//===----------------------------------------------------------------------===//
+
+// The definitions here are for the call-clobbered registers.
+let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
+                        F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D],
+    R1 = 14, isCodeGenOnly = 1 in {
+  def BRAS  : InstRI<0xA75, (outs), (ins pcrel16call:$dst, variable_ops),
+                     "bras\t%r14, $dst", []>;
+  def BRASL : InstRIL<0xC05, (outs), (ins pcrel32call:$dst, variable_ops),
+                      "brasl\t%r14, $dst", [(z_call pcrel32call:$dst)]>;
+  def BASR  : InstRR<0x0D, (outs), (ins ADDR64:$dst, variable_ops),
+                     "basr\t%r14, $dst", [(z_call ADDR64:$dst)]>;
+}
+
+// Define the general form of the call instructions for the asm parser.
+// These instructions don't hard-code %r14 as the return address register.
+let isAsmParserOnly = 1 in {
+  def AsmBRAS  : InstRI<0xA75, (outs), (ins GR64:$save, brtarget16:$dst),
+                        "bras\t$save, $dst", []>;
+  def AsmBRASL : InstRIL<0xC05, (outs), (ins GR64:$save, brtarget32:$dst),
+                        "brasl\t$save, $dst", []>;
+  def AsmBASR  : InstRR<0x0D, (outs), (ins GR64:$save, ADDR64:$dst),
+                        "basr\t$save, $dst", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Register moves.
+let neverHasSideEffects = 1 in {
+  def LR  : UnaryRR <"lr",  0x18,   null_frag, GR32, GR32>;
+  def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>;
+}
+
+// Immediate moves.
+let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+  // 16-bit sign-extended immediates.
+  def LHI  : UnaryRI<"lhi",  0xA78, bitconvert, GR32, imm32sx16>;
+  def LGHI : UnaryRI<"lghi", 0xA79, bitconvert, GR64, imm64sx16>;
+
+  // Other 16-bit immediates.
+  def LLILL : UnaryRI<"llill", 0xA5F, bitconvert, GR64, imm64ll16>;
+  def LLILH : UnaryRI<"llilh", 0xA5E, bitconvert, GR64, imm64lh16>;
+  def LLIHL : UnaryRI<"llihl", 0xA5D, bitconvert, GR64, imm64hl16>;
+  def LLIHH : UnaryRI<"llihh", 0xA5C, bitconvert, GR64, imm64hh16>;
+
+  // 32-bit immediates.
+  def LGFI  : UnaryRIL<"lgfi",  0xC01, bitconvert, GR64, imm64sx32>;
+  def LLILF : UnaryRIL<"llilf", 0xC0F, bitconvert, GR64, imm64lf32>;
+  def LLIHF : UnaryRIL<"llihf", 0xC0E, bitconvert, GR64, imm64hf32>;
+}
+
+// Register loads.
+let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
+  defm L   : UnaryRXPair<"l", 0x58, 0xE358, load, GR32>;
+  def  LRL : UnaryRILPC<"lrl", 0xC4D, aligned_load, GR32>;
+
+  def LG   : UnaryRXY<"lg", 0xE304, load, GR64>;
+  def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>;
+
+  // These instructions are split after register allocation, so we don't
+  // want a custom inserter.
+  let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+    def L128 : Pseudo<(outs GR128:$dst), (ins bdxaddr20only128:$src),
+                      [(set GR128:$dst, (load bdxaddr20only128:$src))]>;
+  }
+}
+
+// Register stores.
+let SimpleBDXStore = 1 in {
+  let isCodeGenOnly = 1 in {
+    defm ST32   : StoreRXPair<"st", 0x50, 0xE350, store, GR32>;
+    def  STRL32 : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
+  }
+
+  def STG   : StoreRXY<"stg", 0xE324, store, GR64>;
+  def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
+
+  // These instructions are split after register allocation, so we don't
+  // want a custom inserter.
+  let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
+    def ST128 : Pseudo<(outs), (ins GR128:$src, bdxaddr20only128:$dst),
+                       [(store GR128:$src, bdxaddr20only128:$dst)]>;
+  }
+}
+
+// 8-bit immediate stores to 8-bit fields.
+defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>;
+
+// 16-bit immediate stores to 16-, 32- or 64-bit fields.
+def MVHHI : StoreSIL<"mvhhi", 0xE544, truncstorei16, imm32sx16trunc>;
+def MVHI  : StoreSIL<"mvhi",  0xE54C, store,         imm32sx16>;
+def MVGHI : StoreSIL<"mvghi", 0xE548, store,         imm64sx16>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+
+// 32-bit extensions from registers.
+let neverHasSideEffects = 1 in {
+  def LBR : UnaryRRE<"lbr", 0xB926, sext8,  GR32, GR32>;
+  def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>;
+}
+
+// 64-bit extensions from registers.
+let neverHasSideEffects = 1 in {
+  def LGBR : UnaryRRE<"lgbr", 0xB906, sext8,  GR64, GR64>;
+  def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>;
+  def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>;
+}
+
+// Match 32-to-64-bit sign extensions in which the source is already
+// in a 64-bit register.
+def : Pat<(sext_inreg GR64:$src, i32),
+          (LGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+
+// 32-bit extensions from memory.
+def  LB   : UnaryRXY<"lb", 0xE376, sextloadi8, GR32>;
+defm LH   : UnaryRXPair<"lh", 0x48, 0xE378, sextloadi16, GR32>;
+def  LHRL : UnaryRILPC<"lhrl", 0xC45, aligned_sextloadi16, GR32>;
+
+// 64-bit extensions from memory.
+def LGB   : UnaryRXY<"lgb", 0xE377, sextloadi8,  GR64>;
+def LGH   : UnaryRXY<"lgh", 0xE315, sextloadi16, GR64>;
+def LGF   : UnaryRXY<"lgf", 0xE314, sextloadi32, GR64>;
+def LGHRL : UnaryRILPC<"lghrl", 0xC44, aligned_sextloadi16, GR64>;
+def LGFRL : UnaryRILPC<"lgfrl", 0xC4C, aligned_sextloadi32, GR64>;
+
+// If the sign of a load-extend operation doesn't matter, use the signed ones.
+// There's not really much to choose between the sign and zero extensions,
+// but LH is more compact than LLH for small offsets.
+def : Pat<(i32 (extloadi8  bdxaddr20only:$src)), (LB  bdxaddr20only:$src)>;
+def : Pat<(i32 (extloadi16 bdxaddr12pair:$src)), (LH  bdxaddr12pair:$src)>;
+def : Pat<(i32 (extloadi16 bdxaddr20pair:$src)), (LHY bdxaddr20pair:$src)>;
+
+def : Pat<(i64 (extloadi8  bdxaddr20only:$src)), (LGB bdxaddr20only:$src)>;
+def : Pat<(i64 (extloadi16 bdxaddr20only:$src)), (LGH bdxaddr20only:$src)>;
+def : Pat<(i64 (extloadi32 bdxaddr20only:$src)), (LGF bdxaddr20only:$src)>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+// 32-bit extensions from registers.
+let neverHasSideEffects = 1 in {
+  def LLCR : UnaryRRE<"llcr", 0xB994, zext8,  GR32, GR32>;
+  def LLHR : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>;
+}
+
+// 64-bit extensions from registers.
+let neverHasSideEffects = 1 in {
+  def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8,  GR64, GR64>;
+  def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>;
+  def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>;
+}
+
+// Match 32-to-64-bit zero extensions in which the source is already
+// in a 64-bit register.
+def : Pat<(and GR64:$src, 0xffffffff),
+          (LLGFR (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+
+// 32-bit extensions from memory.
+def LLC   : UnaryRXY<"llc", 0xE394, zextloadi8,  GR32>;
+def LLH   : UnaryRXY<"llh", 0xE395, zextloadi16, GR32>;
+def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_zextloadi16, GR32>;
+
+// 64-bit extensions from memory.
+def LLGC   : UnaryRXY<"llgc", 0xE390, zextloadi8,  GR64>;
+def LLGH   : UnaryRXY<"llgh", 0xE391, zextloadi16, GR64>;
+def LLGF   : UnaryRXY<"llgf", 0xE316, zextloadi32, GR64>;
+def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_zextloadi16, GR64>;
+def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_zextloadi32, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+// Truncations of 64-bit registers to 32-bit registers.
+def : Pat<(i32 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, subreg_32bit)>;
+
+// Truncations of 32-bit registers to memory.
+let isCodeGenOnly = 1 in {
+  defm STC32   : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8,  GR32>;
+  defm STH32   : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR32>;
+  def  STHRL32 : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR32>;
+}
+
+// Truncations of 64-bit registers to memory.
+defm STC   : StoreRXPair<"stc", 0x42, 0xE372, truncstorei8,  GR64>;
+defm STH   : StoreRXPair<"sth", 0x40, 0xE370, truncstorei16, GR64>;
+def  STHRL : StoreRILPC<"sthrl", 0xC47, aligned_truncstorei16, GR64>;
+defm ST    : StoreRXPair<"st", 0x50, 0xE350, truncstorei32, GR64>;
+def  STRL  : StoreRILPC<"strl", 0xC4F, aligned_truncstorei32, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Multi-register loads.
+def LMG : LoadMultipleRSY<"lmg", 0xEB04, GR64>;
+
+// Multi-register stores.
+def STMG : StoreMultipleRSY<"stmg", 0xEB24, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+// Byte-swapping register moves.
+let neverHasSideEffects = 1 in {
+  def LRVR  : UnaryRRE<"lrvr",  0xB91F, bswap, GR32, GR32>;
+  def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>;
+}
+
+// Byte-swapping loads.
+def LRV  : UnaryRXY<"lrv",  0xE31E, loadu<bswap>, GR32>;
+def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap>, GR64>;
+
+// Byte-swapping stores.
+def STRV  : StoreRXY<"strv",  0xE33E, storeu<bswap>, GR32>;
+def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap>, GR64>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+// Load BDX-style addresses.
+let neverHasSideEffects = 1, Function = "la" in {
+  let PairType = "12" in
+    def LA : InstRX<0x41, (outs GR64:$dst), (ins laaddr12pair:$src),
+                    "la\t$dst, $src",
+                    [(set GR64:$dst, laaddr12pair:$src)]>;
+  let PairType = "20" in
+    def LAY : InstRXY<0xE371, (outs GR64:$dst), (ins laaddr20pair:$src),
+                      "lay\t$dst, $src",
+                      [(set GR64:$dst, laaddr20pair:$src)]>;
+}
+
+// Load a PC-relative address.  There's no version of this instruction
+// with a 16-bit offset, so there's no relaxation.
+let neverHasSideEffects = 1 in {
+  def LARL : InstRIL<0xC00, (outs GR64:$dst), (ins pcrel32:$src),
+                     "larl\t$dst, $src",
+                     [(set GR64:$dst, pcrel32:$src)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Negation
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+  def LCR   : UnaryRR <"lcr",   0x13,   ineg,      GR32, GR32>;
+  def LCGR  : UnaryRRE<"lcgr",  0xB903, ineg,      GR64, GR64>;
+  def LCGFR : UnaryRRE<"lcgfr", 0xB913, null_frag, GR64, GR32>;
+}
+defm : SXU<ineg, LCGFR>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1 in
+  defm IC32 : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR32, zextloadi8>;
+defm IC : BinaryRXPair<"ic", 0x43, 0xE373, inserti8, GR64, zextloadi8>;
+
+defm : InsertMem<"inserti8", IC32,  GR32, zextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", IC32Y, GR32, zextloadi8, bdxaddr20pair>;
+
+defm : InsertMem<"inserti8", IC,  GR64, zextloadi8, bdxaddr12pair>;
+defm : InsertMem<"inserti8", ICY, GR64, zextloadi8, bdxaddr20pair>;
+
+// Insertions of a 16-bit immediate, leaving other bits unaffected.
+// We don't have or_as_insert equivalents of these operations because
+// OI is available instead.
+let isCodeGenOnly = 1 in {
+  def IILL32 : BinaryRI<"iill", 0xA53, insertll, GR32, imm32ll16>;
+  def IILH32 : BinaryRI<"iilh", 0xA52, insertlh, GR32, imm32lh16>;
+}
+def IILL : BinaryRI<"iill", 0xA53, insertll, GR64, imm64ll16>;
+def IILH : BinaryRI<"iilh", 0xA52, insertlh, GR64, imm64lh16>;
+def IIHL : BinaryRI<"iihl", 0xA51, inserthl, GR64, imm64hl16>;
+def IIHH : BinaryRI<"iihh", 0xA50, inserthh, GR64, imm64hh16>;
+
+// ...likewise for 32-bit immediates.  For GR32s this is a general
+// full-width move.  (We use IILF rather than something like LLILF
+// for 32-bit moves because IILF leaves the upper 32 bits of the
+// GR64 unchanged.)
+let isCodeGenOnly = 1 in {
+  def IILF32 : UnaryRIL<"iilf", 0xC09, bitconvert, GR32, uimm32>;
+}
+def IILF : BinaryRIL<"iilf", 0xC09, insertlf, GR64, imm64lf32>;
+def IIHF : BinaryRIL<"iihf", 0xC08, inserthf, GR64, imm64hf32>;
+
+// An alternative model of inserthf, with the first operand being
+// a zero-extended value.
+def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
+          (IIHF (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit),
+                imm64hf32:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+// Plain addition.
+let Defs = [PSW] in {
+  // Addition of a register.
+  let isCommutable = 1 in {
+    def AR  : BinaryRR <"ar",  0x1A,   add, GR32, GR32>;
+    def AGR : BinaryRRE<"agr", 0xB908, add, GR64, GR64>;
+  }
+  def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>;
+
+  // Addition of signed 16-bit immediates.
+  def AHI  : BinaryRI<"ahi",  0xA7A, add, GR32, imm32sx16>;
+  def AGHI : BinaryRI<"aghi", 0xA7B, add, GR64, imm64sx16>;
+
+  // Addition of signed 32-bit immediates.
+  def AFI  : BinaryRIL<"afi",  0xC29, add, GR32, simm32>;
+  def AGFI : BinaryRIL<"agfi", 0xC28, add, GR64, imm64sx32>;
+
+  // Addition of memory.
+  defm AH  : BinaryRXPair<"ah", 0x4A, 0xE37A, add, GR32, sextloadi16>;
+  defm A   : BinaryRXPair<"a",  0x5A, 0xE35A, add, GR32, load>;
+  def  AGF : BinaryRXY<"agf", 0xE318, add, GR64, sextloadi32>;
+  def  AG  : BinaryRXY<"ag",  0xE308, add, GR64, load>;
+
+  // Addition to memory.
+  def ASI  : BinarySIY<"asi",  0xEB6A, add, imm32sx8>;
+  def AGSI : BinarySIY<"agsi", 0xEB7A, add, imm64sx8>;
+}
+defm : SXB<add, GR64, AGFR>;
+
+// Addition producing a carry.
+let Defs = [PSW] in {
+  // Addition of a register.
+  let isCommutable = 1 in {
+    def ALR  : BinaryRR <"alr",  0x1E,   addc, GR32, GR32>;
+    def ALGR : BinaryRRE<"algr", 0xB90A, addc, GR64, GR64>;
+  }
+  def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>;
+
+  // Addition of unsigned 32-bit immediates.
+  def ALFI  : BinaryRIL<"alfi",  0xC2B, addc, GR32, uimm32>;
+  def ALGFI : BinaryRIL<"algfi", 0xC2A, addc, GR64, imm64zx32>;
+
+  // Addition of memory.
+  defm AL   : BinaryRXPair<"al", 0x5E, 0xE35E, addc, GR32, load>;
+  def  ALGF : BinaryRXY<"algf", 0xE31A, addc, GR64, zextloadi32>;
+  def  ALG  : BinaryRXY<"alg",  0xE30A, addc, GR64, load>;
+}
+defm : ZXB<addc, GR64, ALGFR>;
+
+// Addition producing and using a carry.
+let Defs = [PSW], Uses = [PSW] in {
+  // Addition of a register.
+  def ALCR  : BinaryRRE<"alcr",  0xB998, adde, GR32, GR32>;
+  def ALCGR : BinaryRRE<"alcgr", 0xB988, adde, GR64, GR64>;
+
+  // Addition of memory.
+  def ALC  : BinaryRXY<"alc",  0xE398, adde, GR32, load>;
+  def ALCG : BinaryRXY<"alcg", 0xE388, adde, GR64, load>;
+}
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+// Plain substraction.  Although immediate forms exist, we use the
+// add-immediate instruction instead.
+let Defs = [PSW] in {
+  // Subtraction of a register.
+  def SR   : BinaryRR <"sr",   0x1B,   sub,       GR32, GR32>;
+  def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>;
+  def SGR  : BinaryRRE<"sgr",  0xB909, sub,       GR64, GR64>;
+
+  // Subtraction of memory.
+  defm S   : BinaryRXPair<"s", 0x5B, 0xE35B, sub, GR32, load>;
+  def  SGF : BinaryRXY<"sgf", 0xE319, sub, GR64, sextloadi32>;
+  def  SG  : BinaryRXY<"sg",  0xE309, sub, GR64, load>;
+}
+defm : SXB<sub, GR64, SGFR>;
+
+// Subtraction producing a carry.
+let Defs = [PSW] in {
+  // Subtraction of a register.
+  def SLR   : BinaryRR <"slr",   0x1F,   subc,      GR32, GR32>;
+  def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>;
+  def SLGR  : BinaryRRE<"slgr",  0xB90B, subc,      GR64, GR64>;
+
+  // Subtraction of unsigned 32-bit immediates.  These don't match
+  // subc because we prefer addc for constants.
+  def SLFI  : BinaryRIL<"slfi",  0xC25, null_frag, GR32, uimm32>;
+  def SLGFI : BinaryRIL<"slgfi", 0xC24, null_frag, GR64, imm64zx32>;
+
+  // Subtraction of memory.
+  defm SL   : BinaryRXPair<"sl", 0x5F, 0xE35F, subc, GR32, load>;
+  def  SLGF : BinaryRXY<"slgf", 0xE31B, subc, GR64, zextloadi32>;
+  def  SLG  : BinaryRXY<"slg",  0xE30B, subc, GR64, load>;
+}
+defm : ZXB<subc, GR64, SLGFR>;
+
+// Subtraction producing and using a carry.
+let Defs = [PSW], Uses = [PSW] in {
+  // Subtraction of a register.
+  def SLBR  : BinaryRRE<"slbr",  0xB999, sube, GR32, GR32>;
+  def SLGBR : BinaryRRE<"slbgr", 0xB989, sube, GR64, GR64>;
+
+  // Subtraction of memory.
+  def SLB  : BinaryRXY<"slb",  0xE399, sube, GR32, load>;
+  def SLBG : BinaryRXY<"slbg", 0xE389, sube, GR64, load>;
+}
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+  // ANDs of a register.
+  let isCommutable = 1 in {
+    def NR  : BinaryRR <"nr",  0x14,   and, GR32, GR32>;
+    def NGR : BinaryRRE<"ngr", 0xB980, and, GR64, GR64>;
+  }
+
+  // ANDs of a 16-bit immediate, leaving other bits unaffected.
+  let isCodeGenOnly = 1 in {
+    def NILL32 : BinaryRI<"nill", 0xA57, and, GR32, imm32ll16c>;
+    def NILH32 : BinaryRI<"nilh", 0xA56, and, GR32, imm32lh16c>;
+  }
+  def NILL : BinaryRI<"nill", 0xA57, and, GR64, imm64ll16c>;
+  def NILH : BinaryRI<"nilh", 0xA56, and, GR64, imm64lh16c>;
+  def NIHL : BinaryRI<"nihl", 0xA55, and, GR64, imm64hl16c>;
+  def NIHH : BinaryRI<"nihh", 0xA54, and, GR64, imm64hh16c>;
+
+  // ANDs of a 32-bit immediate, leaving other bits unaffected.
+  let isCodeGenOnly = 1 in
+    def NILF32 : BinaryRIL<"nilf", 0xC0B, and, GR32, uimm32>;
+  def NILF : BinaryRIL<"nilf", 0xC0B, and, GR64, imm64lf32c>;
+  def NIHF : BinaryRIL<"nihf", 0xC0A, and, GR64, imm64hf32c>;
+
+  // ANDs of memory.
+  defm N  : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load>;
+  def  NG : BinaryRXY<"ng", 0xE380, and, GR64, load>;
+
+  // AND to memory
+  defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>;
+}
+defm : RMWIByte<and, bdaddr12pair, NI>;
+defm : RMWIByte<and, bdaddr20pair, NIY>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+  // ORs of a register.
+  let isCommutable = 1 in {
+    def OR  : BinaryRR <"or",  0x16,   or, GR32, GR32>;
+    def OGR : BinaryRRE<"ogr", 0xB981, or, GR64, GR64>;
+  }
+
+  // ORs of a 16-bit immediate, leaving other bits unaffected.
+  let isCodeGenOnly = 1 in {
+    def OILL32 : BinaryRI<"oill", 0xA5B, or, GR32, imm32ll16>;
+    def OILH32 : BinaryRI<"oilh", 0xA5A, or, GR32, imm32lh16>;
+  }
+  def OILL : BinaryRI<"oill", 0xA5B, or, GR64, imm64ll16>;
+  def OILH : BinaryRI<"oilh", 0xA5A, or, GR64, imm64lh16>;
+  def OIHL : BinaryRI<"oihl", 0xA59, or, GR64, imm64hl16>;
+  def OIHH : BinaryRI<"oihh", 0xA58, or, GR64, imm64hh16>;
+
+  // ORs of a 32-bit immediate, leaving other bits unaffected.
+  let isCodeGenOnly = 1 in
+    def OILF32 : BinaryRIL<"oilf", 0xC0D, or, GR32, uimm32>;
+  def OILF : BinaryRIL<"oilf", 0xC0D, or, GR64, imm64lf32>;
+  def OIHF : BinaryRIL<"oihf", 0xC0C, or, GR64, imm64hf32>;
+
+  // ORs of memory.
+  defm O  : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load>;
+  def  OG : BinaryRXY<"og", 0xE381, or, GR64, load>;
+
+  // OR to memory
+  defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>;
+}
+defm : RMWIByte<or, bdaddr12pair, OI>;
+defm : RMWIByte<or, bdaddr20pair, OIY>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+let Defs = [PSW] in {
+  // XORs of a register.
+  let isCommutable = 1 in {
+    def XR  : BinaryRR <"xr",  0x17,   xor, GR32, GR32>;
+    def XGR : BinaryRRE<"xgr", 0xB982, xor, GR64, GR64>;
+  }
+
+  // XORs of a 32-bit immediate, leaving other bits unaffected.
+  let isCodeGenOnly = 1 in
+    def XILF32 : BinaryRIL<"xilf", 0xC07, xor, GR32, uimm32>;
+  def XILF : BinaryRIL<"xilf", 0xC07, xor, GR64, imm64lf32>;
+  def XIHF : BinaryRIL<"xihf", 0xC06, xor, GR64, imm64hf32>;
+
+  // XORs of memory.
+  defm X  : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load>;
+  def  XG : BinaryRXY<"xg", 0xE382, xor, GR64, load>;
+
+  // XOR to memory
+  defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>;
+}
+defm : RMWIByte<xor, bdaddr12pair, XI>;
+defm : RMWIByte<xor, bdaddr20pair, XIY>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+// Multiplication of a register.
+let isCommutable = 1 in {
+  def MSR  : BinaryRRE<"msr",  0xB252, mul, GR32, GR32>;
+  def MSGR : BinaryRRE<"msgr", 0xB90C, mul, GR64, GR64>;
+}
+def MSGFR : BinaryRRE<"msgfr", 0xB91C, null_frag, GR64, GR32>;
+defm : SXB<mul, GR64, MSGFR>;
+
+// Multiplication of a signed 16-bit immediate.
+def MHI  : BinaryRI<"mhi",  0xA7C, mul, GR32, imm32sx16>;
+def MGHI : BinaryRI<"mghi", 0xA7D, mul, GR64, imm64sx16>;
+
+// Multiplication of a signed 32-bit immediate.
+def MSFI  : BinaryRIL<"msfi",  0xC21, mul, GR32, simm32>;
+def MSGFI : BinaryRIL<"msgfi", 0xC20, mul, GR64, imm64sx32>;
+
+// Multiplication of memory.
+defm MH   : BinaryRXPair<"mh", 0x4C, 0xE37C, mul, GR32, sextloadi16>;
+defm MS   : BinaryRXPair<"ms", 0x71, 0xE351, mul, GR32, load>;
+def  MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, sextloadi32>;
+def  MSG  : BinaryRXY<"msg",  0xE30C, mul, GR64, load>;
+
+// Multiplication of a register, producing two results.
+def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>;
+
+// Multiplication of memory, producing two results.
+def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+// Division and remainder, from registers.
+def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag,   GR128, GR32>;
+def DSGR  : BinaryRRE<"dsgr",  0xB90D, z_sdivrem64, GR128, GR64>;
+def DLR   : BinaryRRE<"dlr",   0xB997, z_udivrem32, GR128, GR32>;
+def DLGR  : BinaryRRE<"dlgr",  0xB987, z_udivrem64, GR128, GR64>;
+defm : SXB<z_sdivrem64, GR128, DSGFR>;
+
+// Division and remainder, from memory.
+def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem64, GR128, sextloadi32>;
+def DSG  : BinaryRXY<"dsg",  0xE30D, z_sdivrem64, GR128, load>;
+def DL   : BinaryRXY<"dl",   0xE397, z_udivrem32, GR128, load>;
+def DLG  : BinaryRXY<"dlg",  0xE387, z_udivrem64, GR128, load>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+// Shift left.
+let neverHasSideEffects = 1 in {
+  def SLL  : ShiftRS <"sll",  0x89,   shl, GR32, shift12only>;
+  def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64, shift20only>;
+}
+
+// Logical shift right.
+let neverHasSideEffects = 1 in {
+  def SRL  : ShiftRS <"srl",  0x88,   srl, GR32, shift12only>;
+  def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64, shift20only>;
+}
+
+// Arithmetic shift right.
+let Defs = [PSW] in {
+  def SRA  : ShiftRS <"sra",  0x8A,   sra, GR32, shift12only>;
+  def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64, shift20only>;
+}
+
+// Rotate left.
+let neverHasSideEffects = 1 in {
+  def RLL  : ShiftRSY<"rll",  0xEB1D, rotl, GR32, shift20only>;
+  def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64, shift20only>;
+}
+
+// Rotate second operand left and inserted selected bits into first operand.
+// These can act like 32-bit operands provided that the constant start and
+// end bits (operands 2 and 3) are in the range [32, 64)
+let Defs = [PSW] in {
+  let isCodeGenOnly = 1 in
+    def RISBG32 : RotateSelectRIEf<"risbg",  0xEC55, GR32, GR32>;
+  def RISBG : RotateSelectRIEf<"risbg",  0xEC55, GR64, GR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+// Signed comparisons.
+let Defs = [PSW] in {
+  // Comparison with a register.
+  def CR   : CompareRR <"cr",   0x19,   z_cmp,     GR32, GR32>;
+  def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>;
+  def CGR  : CompareRRE<"cgr",  0xB920, z_cmp,     GR64, GR64>;
+
+  // Comparison with a signed 16-bit immediate.
+  def CHI  : CompareRI<"chi",  0xA7E, z_cmp, GR32, imm32sx16>;
+  def CGHI : CompareRI<"cghi", 0xA7F, z_cmp, GR64, imm64sx16>;
+
+  // Comparison with a signed 32-bit immediate.
+  def CFI  : CompareRIL<"cfi",  0xC2D, z_cmp, GR32, simm32>;
+  def CGFI : CompareRIL<"cgfi", 0xC2C, z_cmp, GR64, imm64sx32>;
+
+  // Comparison with memory.
+  defm CH    : CompareRXPair<"ch", 0x49, 0xE379, z_cmp, GR32, sextloadi16>;
+  defm C     : CompareRXPair<"c",  0x59, 0xE359, z_cmp, GR32, load>;
+  def  CGH   : CompareRXY<"cgh", 0xE334, z_cmp, GR64, sextloadi16>;
+  def  CGF   : CompareRXY<"cgf", 0xE330, z_cmp, GR64, sextloadi32>;
+  def  CG    : CompareRXY<"cg",  0xE320, z_cmp, GR64, load>;
+  def  CHRL  : CompareRILPC<"chrl",  0xC65, z_cmp, GR32, aligned_sextloadi16>;
+  def  CRL   : CompareRILPC<"crl",   0xC6D, z_cmp, GR32, aligned_load>;
+  def  CGHRL : CompareRILPC<"cghrl", 0xC64, z_cmp, GR64, aligned_sextloadi16>;
+  def  CGFRL : CompareRILPC<"cgfrl", 0xC6C, z_cmp, GR64, aligned_sextloadi32>;
+  def  CGRL  : CompareRILPC<"cgrl",  0xC68, z_cmp, GR64, aligned_load>;
+
+  // Comparison between memory and a signed 16-bit immediate.
+  def CHHSI : CompareSIL<"chhsi", 0xE554, z_cmp, sextloadi16, imm32sx16>;
+  def CHSI  : CompareSIL<"chsi",  0xE55C, z_cmp, load,        imm32sx16>;
+  def CGHSI : CompareSIL<"cghsi", 0xE558, z_cmp, load,        imm64sx16>;
+}
+defm : SXB<z_cmp, GR64, CGFR>;
+
+// Unsigned comparisons.
+let Defs = [PSW] in {
+  // Comparison with a register.
+  def CLR   : CompareRR <"clr",   0x15,   z_ucmp,    GR32, GR32>;
+  def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>;
+  def CLGR  : CompareRRE<"clgr",  0xB921, z_ucmp,    GR64, GR64>;
+
+  // Comparison with a signed 32-bit immediate.
+  def CLFI  : CompareRIL<"clfi",  0xC2F, z_ucmp, GR32, uimm32>;
+  def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>;
+
+  // Comparison with memory.
+  defm CL     : CompareRXPair<"cl", 0x55, 0xE355, z_ucmp, GR32, load>;
+  def  CLGF   : CompareRXY<"clgf", 0xE331, z_ucmp, GR64, zextloadi32>;
+  def  CLG    : CompareRXY<"clg",  0xE321, z_ucmp, GR64, load>;
+  def  CLHRL  : CompareRILPC<"clhrl",  0xC67, z_ucmp, GR32,
+                             aligned_zextloadi16>;
+  def  CLRL   : CompareRILPC<"clrl",   0xC6F, z_ucmp, GR32,
+                             aligned_load>;
+  def  CLGHRL : CompareRILPC<"clghrl", 0xC66, z_ucmp, GR64,
+                             aligned_zextloadi16>;
+  def  CLGFRL : CompareRILPC<"clgfrl", 0xC6E, z_ucmp, GR64,
+                             aligned_zextloadi32>;
+  def  CLGRL  : CompareRILPC<"clgrl",  0xC6A, z_ucmp, GR64,
+                             aligned_load>;
+
+  // Comparison between memory and an unsigned 8-bit immediate.
+  defm CLI : CompareSIPair<"cli", 0x95, 0xEB55, z_ucmp, zextloadi8, imm32zx8>;
+
+  // Comparison between memory and an unsigned 16-bit immediate.
+  def CLHHSI : CompareSIL<"clhhsi", 0xE555, z_ucmp, zextloadi16, imm32zx16>;
+  def CLFHSI : CompareSIL<"clfhsi", 0xE55D, z_ucmp, load,        imm32zx16>;
+  def CLGHSI : CompareSIL<"clghsi", 0xE559, z_ucmp, load,        imm64zx16>;
+}
+defm : ZXB<z_ucmp, GR64, CLGFR>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def ATOMIC_SWAPW        : AtomicLoadWBinaryReg<z_atomic_swapw>;
+def ATOMIC_SWAP_32      : AtomicLoadBinaryReg32<atomic_swap_32>;
+def ATOMIC_SWAP_64      : AtomicLoadBinaryReg64<atomic_swap_64>;
+
+def ATOMIC_LOADW_AR     : AtomicLoadWBinaryReg<z_atomic_loadw_add>;
+def ATOMIC_LOADW_AFI    : AtomicLoadWBinaryImm<z_atomic_loadw_add, simm32>;
+def ATOMIC_LOAD_AR      : AtomicLoadBinaryReg32<atomic_load_add_32>;
+def ATOMIC_LOAD_AHI     : AtomicLoadBinaryImm32<atomic_load_add_32, imm32sx16>;
+def ATOMIC_LOAD_AFI     : AtomicLoadBinaryImm32<atomic_load_add_32, simm32>;
+def ATOMIC_LOAD_AGR     : AtomicLoadBinaryReg64<atomic_load_add_64>;
+def ATOMIC_LOAD_AGHI    : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx16>;
+def ATOMIC_LOAD_AGFI    : AtomicLoadBinaryImm64<atomic_load_add_64, imm64sx32>;
+
+def ATOMIC_LOADW_SR     : AtomicLoadWBinaryReg<z_atomic_loadw_sub>;
+def ATOMIC_LOAD_SR      : AtomicLoadBinaryReg32<atomic_load_sub_32>;
+def ATOMIC_LOAD_SGR     : AtomicLoadBinaryReg64<atomic_load_sub_64>;
+
+def ATOMIC_LOADW_NR     : AtomicLoadWBinaryReg<z_atomic_loadw_and>;
+def ATOMIC_LOADW_NILH   : AtomicLoadWBinaryImm<z_atomic_loadw_and, imm32lh16c>;
+def ATOMIC_LOAD_NR      : AtomicLoadBinaryReg32<atomic_load_and_32>;
+def ATOMIC_LOAD_NILL32  : AtomicLoadBinaryImm32<atomic_load_and_32, imm32ll16c>;
+def ATOMIC_LOAD_NILH32  : AtomicLoadBinaryImm32<atomic_load_and_32, imm32lh16c>;
+def ATOMIC_LOAD_NILF32  : AtomicLoadBinaryImm32<atomic_load_and_32, uimm32>;
+def ATOMIC_LOAD_NGR     : AtomicLoadBinaryReg64<atomic_load_and_64>;
+def ATOMIC_LOAD_NILL    : AtomicLoadBinaryImm64<atomic_load_and_64, imm64ll16c>;
+def ATOMIC_LOAD_NILH    : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lh16c>;
+def ATOMIC_LOAD_NIHL    : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hl16c>;
+def ATOMIC_LOAD_NIHH    : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hh16c>;
+def ATOMIC_LOAD_NILF    : AtomicLoadBinaryImm64<atomic_load_and_64, imm64lf32c>;
+def ATOMIC_LOAD_NIHF    : AtomicLoadBinaryImm64<atomic_load_and_64, imm64hf32c>;
+
+def ATOMIC_LOADW_OR     : AtomicLoadWBinaryReg<z_atomic_loadw_or>;
+def ATOMIC_LOADW_OILH   : AtomicLoadWBinaryImm<z_atomic_loadw_or, imm32lh16>;
+def ATOMIC_LOAD_OR      : AtomicLoadBinaryReg32<atomic_load_or_32>;
+def ATOMIC_LOAD_OILL32  : AtomicLoadBinaryImm32<atomic_load_or_32, imm32ll16>;
+def ATOMIC_LOAD_OILH32  : AtomicLoadBinaryImm32<atomic_load_or_32, imm32lh16>;
+def ATOMIC_LOAD_OILF32  : AtomicLoadBinaryImm32<atomic_load_or_32, uimm32>;
+def ATOMIC_LOAD_OGR     : AtomicLoadBinaryReg64<atomic_load_or_64>;
+def ATOMIC_LOAD_OILL    : AtomicLoadBinaryImm64<atomic_load_or_64, imm64ll16>;
+def ATOMIC_LOAD_OILH    : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lh16>;
+def ATOMIC_LOAD_OIHL    : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hl16>;
+def ATOMIC_LOAD_OIHH    : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hh16>;
+def ATOMIC_LOAD_OILF    : AtomicLoadBinaryImm64<atomic_load_or_64, imm64lf32>;
+def ATOMIC_LOAD_OIHF    : AtomicLoadBinaryImm64<atomic_load_or_64, imm64hf32>;
+
+def ATOMIC_LOADW_XR     : AtomicLoadWBinaryReg<z_atomic_loadw_xor>;
+def ATOMIC_LOADW_XILF   : AtomicLoadWBinaryImm<z_atomic_loadw_xor, uimm32>;
+def ATOMIC_LOAD_XR      : AtomicLoadBinaryReg32<atomic_load_xor_32>;
+def ATOMIC_LOAD_XILF32  : AtomicLoadBinaryImm32<atomic_load_xor_32, uimm32>;
+def ATOMIC_LOAD_XGR     : AtomicLoadBinaryReg64<atomic_load_xor_64>;
+def ATOMIC_LOAD_XILF    : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64lf32>;
+def ATOMIC_LOAD_XIHF    : AtomicLoadBinaryImm64<atomic_load_xor_64, imm64hf32>;
+
+def ATOMIC_LOADW_NRi    : AtomicLoadWBinaryReg<z_atomic_loadw_nand>;
+def ATOMIC_LOADW_NILHi  : AtomicLoadWBinaryImm<z_atomic_loadw_nand,
+                                               imm32lh16c>;
+def ATOMIC_LOAD_NRi     : AtomicLoadBinaryReg32<atomic_load_nand_32>;
+def ATOMIC_LOAD_NILL32i : AtomicLoadBinaryImm32<atomic_load_nand_32,
+                                                imm32ll16c>;
+def ATOMIC_LOAD_NILH32i : AtomicLoadBinaryImm32<atomic_load_nand_32,
+                                                imm32lh16c>;
+def ATOMIC_LOAD_NILF32i : AtomicLoadBinaryImm32<atomic_load_nand_32, uimm32>;
+def ATOMIC_LOAD_NGRi    : AtomicLoadBinaryReg64<atomic_load_nand_64>;
+def ATOMIC_LOAD_NILLi   : AtomicLoadBinaryImm64<atomic_load_nand_64,
+                                                imm64ll16c>;
+def ATOMIC_LOAD_NILHi   : AtomicLoadBinaryImm64<atomic_load_nand_64,
+                                                imm64lh16c>;
+def ATOMIC_LOAD_NIHLi   : AtomicLoadBinaryImm64<atomic_load_nand_64,
+                                                imm64hl16c>;
+def ATOMIC_LOAD_NIHHi   : AtomicLoadBinaryImm64<atomic_load_nand_64,
+                                                imm64hh16c>;
+def ATOMIC_LOAD_NILFi   : AtomicLoadBinaryImm64<atomic_load_nand_64,
+                                                imm64lf32c>;
+def ATOMIC_LOAD_NIHFi   : AtomicLoadBinaryImm64<atomic_load_nand_64,
+                                                imm64hf32c>;
+
+def ATOMIC_LOADW_MIN    : AtomicLoadWBinaryReg<z_atomic_loadw_min>;
+def ATOMIC_LOAD_MIN_32  : AtomicLoadBinaryReg32<atomic_load_min_32>;
+def ATOMIC_LOAD_MIN_64  : AtomicLoadBinaryReg64<atomic_load_min_64>;
+
+def ATOMIC_LOADW_MAX    : AtomicLoadWBinaryReg<z_atomic_loadw_max>;
+def ATOMIC_LOAD_MAX_32  : AtomicLoadBinaryReg32<atomic_load_max_32>;
+def ATOMIC_LOAD_MAX_64  : AtomicLoadBinaryReg64<atomic_load_max_64>;
+
+def ATOMIC_LOADW_UMIN   : AtomicLoadWBinaryReg<z_atomic_loadw_umin>;
+def ATOMIC_LOAD_UMIN_32 : AtomicLoadBinaryReg32<atomic_load_umin_32>;
+def ATOMIC_LOAD_UMIN_64 : AtomicLoadBinaryReg64<atomic_load_umin_64>;
+
+def ATOMIC_LOADW_UMAX   : AtomicLoadWBinaryReg<z_atomic_loadw_umax>;
+def ATOMIC_LOAD_UMAX_32 : AtomicLoadBinaryReg32<atomic_load_umax_32>;
+def ATOMIC_LOAD_UMAX_64 : AtomicLoadBinaryReg64<atomic_load_umax_64>;
+
+def ATOMIC_CMP_SWAPW
+  : Pseudo<(outs GR32:$dst), (ins bdaddr20only:$addr, GR32:$cmp, GR32:$swap,
+                                  ADDR32:$bitshift, ADDR32:$negbitshift,
+                                  uimm32:$bitsize),
+           [(set GR32:$dst,
+                 (z_atomic_cmp_swapw bdaddr20only:$addr, GR32:$cmp, GR32:$swap,
+                                     ADDR32:$bitshift, ADDR32:$negbitshift,
+                                     uimm32:$bitsize))]> {
+  let Defs = [PSW];
+  let mayLoad = 1;
+  let mayStore = 1;
+  let usesCustomInserter = 1;
+}
+
+let Defs = [PSW] in {
+  defm CS  : CmpSwapRSPair<"cs", 0xBA, 0xEB14, atomic_cmp_swap_32, GR32>;
+  def  CSG : CmpSwapRSY<"csg", 0xEB30, atomic_cmp_swap_64, GR64>;
+}
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Read a 32-bit access register into a GR32.  As with all GR32 operations,
+// the upper 32 bits of the enclosing GR64 remain unchanged, which is useful
+// when a 64-bit address is stored in a pair of access registers.
+def EAR : InstRRE<0xB24F, (outs GR32:$dst), (ins access_reg:$src),
+                  "ear\t$dst, $src",
+                  [(set GR32:$dst, (z_extract_access access_reg:$src))]>;
+
+// Find leftmost one, AKA count leading zeros.  The instruction actually
+// returns a pair of GR64s, the first giving the number of leading zeros
+// and the second giving a copy of the source with the leftmost one bit
+// cleared.  We only use the first result here.
+let Defs = [PSW] in {
+  def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>;
+}
+def : Pat<(ctlz GR64:$src),
+          (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_high)>;
+
+// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
+def : Pat<(i64 (anyext GR32:$src)),
+          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>;
+
+// There are no 32-bit equivalents of LLILL and LLILH, so use a full
+// 64-bit move followed by a subreg.  This preserves the invariant that
+// all GR32 operations only modify the low 32 bits.
+def : Pat<(i32 imm32ll16:$src),
+          (EXTRACT_SUBREG (LLILL (LL16 imm:$src)), subreg_32bit)>;
+def : Pat<(i32 imm32lh16:$src),
+          (EXTRACT_SUBREG (LLILH (LH16 imm:$src)), subreg_32bit)>;
+
+// Extend GR32s and GR64s to GR128s.
+let usesCustomInserter = 1 in {
+  def AEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
+  def ZEXT128_32 : Pseudo<(outs GR128:$dst), (ins GR32:$src), []>;
+  def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Peepholes.
+//===----------------------------------------------------------------------===//
+
+// Use AL* for GR64 additions of unsigned 32-bit values.
+defm : ZXB<add, GR64, ALGFR>;
+def  : Pat<(add GR64:$src1, imm64zx32:$src2),
+           (ALGFI GR64:$src1, imm64zx32:$src2)>;
+def  : Pat<(add GR64:$src1, (zextloadi32 bdxaddr20only:$addr)),
+           (ALGF GR64:$src1, bdxaddr20only:$addr)>;
+
+// Use SL* for GR64 subtractions of unsigned 32-bit values.
+defm : ZXB<sub, GR64, SLGFR>;
+def  : Pat<(add GR64:$src1, imm64zx32n:$src2),
+           (SLGFI GR64:$src1, imm64zx32n:$src2)>;
+def  : Pat<(sub GR64:$src1, (zextloadi32 bdxaddr20only:$addr)),
+           (SLGF GR64:$src1, bdxaddr20only:$addr)>;
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp
new file mode 100644
index 0000000..5d83321
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -0,0 +1,116 @@
+//===-- SystemZMCInstLower.cpp - Lower MachineInstr to MCInst -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCInstLower.h"
+#include "SystemZAsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+// Where relaxable pairs of reloc-generating instructions exist,
+// we tend to use the longest form by default, since that produces
+// correct assembly in cases where no relaxation is performed.
+// If Opcode is one such instruction, return the opcode for the
+// shortest possible form instead, otherwise return Opcode itself.
+static unsigned getShortenedInstr(unsigned Opcode) {
+  switch (Opcode) {
+  case SystemZ::BRCL:  return SystemZ::BRC;
+  case SystemZ::JG:    return SystemZ::J;
+  case SystemZ::BRASL: return SystemZ::BRAS;
+  }
+  return Opcode;
+}
+
+// Return the VK_* enumeration for MachineOperand target flags Flags.
+static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) {
+  switch (Flags & SystemZII::MO_SYMBOL_MODIFIER) {
+    case 0:
+      return MCSymbolRefExpr::VK_None;
+    case SystemZII::MO_GOT:
+      return MCSymbolRefExpr::VK_GOT;
+  }
+  llvm_unreachable("Unrecognised MO_ACCESS_MODEL");
+}
+
+SystemZMCInstLower::SystemZMCInstLower(Mangler *mang, MCContext &ctx,
+                                       SystemZAsmPrinter &asmprinter)
+  : Mang(mang), Ctx(ctx), AsmPrinter(asmprinter) {}
+
+MCOperand SystemZMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
+                                                 const MCSymbol *Symbol,
+                                                 int64_t Offset) const {
+  MCSymbolRefExpr::VariantKind Kind = getVariantKind(MO.getTargetFlags());
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, Kind, Ctx);
+  if (Offset) {
+    const MCExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx);
+    Expr = MCBinaryExpr::CreateAdd(Expr, OffsetExpr, Ctx);
+  }
+  return MCOperand::CreateExpr(Expr);
+}
+
+MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const {
+  switch (MO.getType()) {
+  default:
+    llvm_unreachable("unknown operand type");
+
+  case MachineOperand::MO_Register:
+    // Ignore all implicit register operands.
+    if (MO.isImplicit())
+      return MCOperand();
+    return MCOperand::CreateReg(MO.getReg());
+
+  case MachineOperand::MO_Immediate:
+    return MCOperand::CreateImm(MO.getImm());
+
+  case MachineOperand::MO_MachineBasicBlock:
+    return lowerSymbolOperand(MO, MO.getMBB()->getSymbol(),
+                              /* MO has no offset field */0);
+
+  case MachineOperand::MO_GlobalAddress:
+    return lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal()),
+                              MO.getOffset());
+
+  case MachineOperand::MO_ExternalSymbol: {
+    StringRef Name = MO.getSymbolName();
+    return lowerSymbolOperand(MO, AsmPrinter.GetExternalSymbolSymbol(Name),
+                              MO.getOffset());
+  }
+
+  case MachineOperand::MO_JumpTableIndex:
+    return lowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()),
+                              /* MO has no offset field */0);
+
+  case MachineOperand::MO_ConstantPoolIndex:
+    return lowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()),
+                              MO.getOffset());
+
+  case MachineOperand::MO_BlockAddress: {
+    const BlockAddress *BA = MO.getBlockAddress();
+    return lowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol(BA),
+                              MO.getOffset());
+  }
+  }
+}
+
+void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+  unsigned Opcode = MI->getOpcode();
+  // When emitting binary code, start with the shortest form of an instruction
+  // and then relax it where necessary.
+  if (!AsmPrinter.OutStreamer.hasRawTextSupport())
+    Opcode = getShortenedInstr(Opcode);
+  OutMI.setOpcode(Opcode);
+  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI->getOperand(I);
+    MCOperand MCOp = lowerOperand(MO);
+    if (MCOp.isValid())
+      OutMI.addOperand(MCOp);
+  }
+}
diff --git a/lib/Target/SystemZ/SystemZMCInstLower.h b/lib/Target/SystemZ/SystemZMCInstLower.h
new file mode 100644
index 0000000..afa72f3
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZMCInstLower.h
@@ -0,0 +1,47 @@
+//===-- SystemZMCInstLower.h - Lower MachineInstr to MCInst ----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEMZMCINSTLOWER_H
+#define LLVM_SYSTEMZMCINSTLOWER_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class MCContext;
+class MCInst;
+class MCOperand;
+class MCSymbol;
+class MachineInstr;
+class MachineOperand;
+class Mangler;
+class SystemZAsmPrinter;
+
+class LLVM_LIBRARY_VISIBILITY SystemZMCInstLower {
+  Mangler *Mang;
+  MCContext &Ctx;
+  SystemZAsmPrinter &AsmPrinter;
+
+public:
+  SystemZMCInstLower(Mangler *mang, MCContext &ctx,
+                     SystemZAsmPrinter &asmPrinter);
+
+  // Lower MachineInstr MI to MCInst OutMI.
+  void lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+  // Return an MCOperand for MO.  Return an empty operand if MO is implicit.
+  MCOperand lowerOperand(const MachineOperand& MO) const;
+
+  // Return an MCOperand for MO, given that it equals Symbol + Offset.
+  MCOperand lowerSymbolOperand(const MachineOperand &MO,
+                               const MCSymbol *Symbol, int64_t Offset) const;
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
new file mode 100644
index 0000000..1dc05a7e
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -0,0 +1,74 @@
+//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMACHINEFUNCTIONINFO_H
+#define SYSTEMZMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class SystemZMachineFunctionInfo : public MachineFunctionInfo {
+  unsigned SavedGPRFrameSize;
+  unsigned LowSavedGPR;
+  unsigned HighSavedGPR;
+  unsigned VarArgsFirstGPR;
+  unsigned VarArgsFirstFPR;
+  unsigned VarArgsFrameIndex;
+  unsigned RegSaveFrameIndex;
+  bool ManipulatesSP;
+
+public:
+  explicit SystemZMachineFunctionInfo(MachineFunction &MF)
+    : SavedGPRFrameSize(0), LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0),
+      VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0),
+      ManipulatesSP(false) {}
+
+  // Get and set the number of bytes allocated by generic code to store
+  // call-saved GPRs.
+  unsigned getSavedGPRFrameSize() const { return SavedGPRFrameSize; }
+  void setSavedGPRFrameSize(unsigned bytes) { SavedGPRFrameSize = bytes; }
+
+  // Get and set the first call-saved GPR that should be saved and restored
+  // by this function.  This is 0 if no GPRs need to be saved or restored.
+  unsigned getLowSavedGPR() const { return LowSavedGPR; }
+  void setLowSavedGPR(unsigned Reg) { LowSavedGPR = Reg; }
+
+  // Get and set the last call-saved GPR that should be saved and restored
+  // by this function.
+  unsigned getHighSavedGPR() const { return HighSavedGPR; }
+  void setHighSavedGPR(unsigned Reg) { HighSavedGPR = Reg; }
+
+  // Get and set the number of fixed (as opposed to variable) arguments
+  // that are passed in GPRs to this function.
+  unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
+  void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; }
+
+  // Likewise FPRs.
+  unsigned getVarArgsFirstFPR() const { return VarArgsFirstFPR; }
+  void setVarArgsFirstFPR(unsigned FPR) { VarArgsFirstFPR = FPR; }
+
+  // Get and set the frame index of the first stack vararg.
+  unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; }
+
+  // Get and set the frame index of the register save area
+  // (i.e. the incoming stack pointer).
+  unsigned getRegSaveFrameIndex() const { return RegSaveFrameIndex; }
+  void setRegSaveFrameIndex(unsigned FI) { RegSaveFrameIndex = FI; }
+
+  // Get and set whether the function directly manipulates the stack pointer,
+  // e.g. through STACKSAVE or STACKRESTORE.
+  bool getManipulatesSP() const { return ManipulatesSP; }
+  void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
new file mode 100644
index 0000000..0abc3f7
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -0,0 +1,435 @@
+//===-- SystemZOperands.td - SystemZ instruction operands ----*- tblgen-*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Class definitions
+//===----------------------------------------------------------------------===//
+
+class ImmediateAsmOperand<string name>
+  : AsmOperandClass {
+  let Name = name;
+  let RenderMethod = "addImmOperands";
+}
+
+// Constructs both a DAG pattern and instruction operand for an immediate
+// of type VT.  PRED returns true if a node is acceptable and XFORM returns
+// the operand value associated with the node.  ASMOP is the name of the
+// associated asm operand, and also forms the basis of the asm print method.
+class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop>
+  : PatLeaf<(vt imm), pred, xform>, Operand<vt> {
+  let PrintMethod = "print"##asmop##"Operand";
+  let ParserMatchClass = !cast<AsmOperandClass>(asmop);
+}
+
+// Constructs both a DAG pattern and instruction operand for a PC-relative
+// address with address size VT.  SELF is the name of the operand.
+class PCRelAddress<ValueType vt, string self>
+  : ComplexPattern<vt, 1, "selectPCRelAddress", [z_pcrel_wrapper]>,
+    Operand<vt> {
+  let MIOperandInfo = (ops !cast<Operand>(self));
+}
+
+// Constructs an AsmOperandClass for addressing mode FORMAT, treating the
+// registers as having BITSIZE bits and displacements as having DISPSIZE bits.
+class AddressAsmOperand<string format, string bitsize, string dispsize>
+  : AsmOperandClass {
+  let Name = format##bitsize##"Disp"##dispsize;
+  let ParserMethod = "parse"##format##bitsize;
+  let RenderMethod = "add"##format##"Operands";
+}
+
+// Constructs both a DAG pattern and instruction operand for an addressing mode.
+// The mode is selected by custom code in selectTYPE...SUFFIX().  The address
+// registers have BITSIZE bits and displacements have DISPSIZE bits.  NUMOPS is
+// the number of operands that make up an address and OPERANDS lists the types
+// of those operands using (ops ...).  FORMAT is the type of addressing mode,
+// which needs to match the names used in AddressAsmOperand.
+class AddressingMode<string type, string bitsize, string dispsize,
+                     string suffix, int numops, string format, dag operands>
+  : ComplexPattern<!cast<ValueType>("i"##bitsize), numops,
+                   "select"##type##dispsize##suffix,
+                   [add, sub, or, frameindex, z_adjdynalloc]>,
+    Operand<!cast<ValueType>("i"##bitsize)> {
+  let PrintMethod = "print"##format##"Operand";
+  let MIOperandInfo = operands;
+  let ParserMatchClass =
+    !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize);
+}
+
+// An addressing mode with a base and displacement but no index.
+class BDMode<string type, string bitsize, string dispsize, string suffix>
+  : AddressingMode<type, bitsize, dispsize, suffix, 2, "BDAddr",
+                   (ops !cast<RegisterOperand>("ADDR"##bitsize),
+                        !cast<Immediate>("disp"##dispsize##"imm"##bitsize))>;
+
+// An addressing mode with a base, displacement and index.
+class BDXMode<string type, string bitsize, string dispsize, string suffix>
+  : AddressingMode<type, bitsize, dispsize, suffix, 3, "BDXAddr",
+                   (ops !cast<RegisterOperand>("ADDR"##bitsize),
+                        !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
+                        !cast<RegisterOperand>("ADDR"##bitsize))>;
+
+//===----------------------------------------------------------------------===//
+// Extracting immediate operands from nodes
+// These all create MVT::i64 nodes to ensure the value is not sign-extended
+// when converted from an SDNode to a MachineOperand later on.
+//===----------------------------------------------------------------------===//
+
+// Bits 0-15 (counting from the lsb).
+def LL16 : SDNodeXForm<imm, [{
+  uint64_t Value = N->getZExtValue() & 0x000000000000FFFFULL;
+  return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Bits 16-31 (counting from the lsb).
+def LH16 : SDNodeXForm<imm, [{
+  uint64_t Value = (N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16;
+  return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Bits 32-47 (counting from the lsb).
+def HL16 : SDNodeXForm<imm, [{
+  uint64_t Value = (N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32;
+  return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Bits 48-63 (counting from the lsb).
+def HH16 : SDNodeXForm<imm, [{
+  uint64_t Value = (N->getZExtValue() & 0xFFFF000000000000ULL) >> 48;
+  return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Low 32 bits.
+def LF32 : SDNodeXForm<imm, [{
+  uint64_t Value = N->getZExtValue() & 0x00000000FFFFFFFFULL;
+  return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// High 32 bits.
+def HF32 : SDNodeXForm<imm, [{
+  uint64_t Value = N->getZExtValue() >> 32;
+  return CurDAG->getTargetConstant(Value, MVT::i64);
+}]>;
+
+// Truncate an immediate to a 8-bit signed quantity.
+def SIMM8 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(int8_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 8-bit unsigned quantity.
+def UIMM8 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 16-bit signed quantity.
+def SIMM16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 16-bit unsigned quantity.
+def UIMM16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(uint16_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 32-bit signed quantity.
+def SIMM32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(int32_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Truncate an immediate to a 32-bit unsigned quantity.
+def UIMM32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(uint32_t(N->getZExtValue()), MVT::i64);
+}]>;
+
+// Negate and then truncate an immediate to a 32-bit unsigned quantity.
+def NEGIMM32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), MVT::i64);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Immediate asm operands.
+//===----------------------------------------------------------------------===//
+
+def U4Imm  : ImmediateAsmOperand<"U4Imm">;
+def U6Imm  : ImmediateAsmOperand<"U6Imm">;
+def S8Imm  : ImmediateAsmOperand<"S8Imm">;
+def U8Imm  : ImmediateAsmOperand<"U8Imm">;
+def S16Imm : ImmediateAsmOperand<"S16Imm">;
+def U16Imm : ImmediateAsmOperand<"U16Imm">;
+def S32Imm : ImmediateAsmOperand<"S32Imm">;
+def U32Imm : ImmediateAsmOperand<"U32Imm">;
+
+//===----------------------------------------------------------------------===//
+// 8-bit immediates
+//===----------------------------------------------------------------------===//
+
+def uimm8zx4 : Immediate<i8, [{
+  return isUInt<4>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U4Imm">;
+
+def uimm8zx6 : Immediate<i8, [{
+  return isUInt<6>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U6Imm">;
+
+def simm8    : Immediate<i8, [{}], SIMM8, "S8Imm">;
+def uimm8    : Immediate<i8, [{}], UIMM8, "U8Imm">;
+
+//===----------------------------------------------------------------------===//
+// i32 immediates
+//===----------------------------------------------------------------------===//
+
+// Immediates for the lower and upper 16 bits of an i32, with the other
+// bits of the i32 being zero.
+def imm32ll16 : Immediate<i32, [{
+  return SystemZ::isImmLL(N->getZExtValue());
+}], LL16, "U16Imm">;
+
+def imm32lh16 : Immediate<i32, [{
+  return SystemZ::isImmLH(N->getZExtValue());
+}], LH16, "U16Imm">;
+
+// Immediates for the lower and upper 16 bits of an i32, with the other
+// bits of the i32 being one.
+def imm32ll16c : Immediate<i32, [{
+  return SystemZ::isImmLL(uint32_t(~N->getZExtValue()));
+}], LL16, "U16Imm">;
+
+def imm32lh16c : Immediate<i32, [{
+  return SystemZ::isImmLH(uint32_t(~N->getZExtValue()));
+}], LH16, "U16Imm">;
+
+// Short immediates
+def imm32sx8 : Immediate<i32, [{
+  return isInt<8>(N->getSExtValue());
+}], SIMM8, "S8Imm">;
+
+def imm32zx8 : Immediate<i32, [{
+  return isUInt<8>(N->getZExtValue());
+}], UIMM8, "U8Imm">;
+
+def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
+
+def imm32sx16 : Immediate<i32, [{
+  return isInt<16>(N->getSExtValue());
+}], SIMM16, "S16Imm">;
+
+def imm32zx16 : Immediate<i32, [{
+  return isUInt<16>(N->getZExtValue());
+}], UIMM16, "U16Imm">;
+
+def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
+
+// Full 32-bit immediates.  we need both signed and unsigned versions
+// because the assembler is picky.  E.g. AFI requires signed operands
+// while NILF requires unsigned ones.
+def simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">;
+def uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">;
+
+def imm32 : ImmLeaf<i32, [{}]>;
+
+//===----------------------------------------------------------------------===//
+// 64-bit immediates
+//===----------------------------------------------------------------------===//
+
+// Immediates for 16-bit chunks of an i64, with the other bits of the
+// i32 being zero.
+def imm64ll16 : Immediate<i64, [{
+  return SystemZ::isImmLL(N->getZExtValue());
+}], LL16, "U16Imm">;
+
+def imm64lh16 : Immediate<i64, [{
+  return SystemZ::isImmLH(N->getZExtValue());
+}], LH16, "U16Imm">;
+
+def imm64hl16 : Immediate<i64, [{
+  return SystemZ::isImmHL(N->getZExtValue());
+}], HL16, "U16Imm">;
+
+def imm64hh16 : Immediate<i64, [{
+  return SystemZ::isImmHH(N->getZExtValue());
+}], HH16, "U16Imm">;
+
+// Immediates for 16-bit chunks of an i64, with the other bits of the
+// i32 being one.
+def imm64ll16c : Immediate<i64, [{
+  return SystemZ::isImmLL(uint64_t(~N->getZExtValue()));
+}], LL16, "U16Imm">;
+
+def imm64lh16c : Immediate<i64, [{
+  return SystemZ::isImmLH(uint64_t(~N->getZExtValue()));
+}], LH16, "U16Imm">;
+
+def imm64hl16c : Immediate<i64, [{
+  return SystemZ::isImmHL(uint64_t(~N->getZExtValue()));
+}], HL16, "U16Imm">;
+
+def imm64hh16c : Immediate<i64, [{
+  return SystemZ::isImmHH(uint64_t(~N->getZExtValue()));
+}], HH16, "U16Imm">;
+
+// Immediates for the lower and upper 32 bits of an i64, with the other
+// bits of the i32 being zero.
+def imm64lf32 : Immediate<i64, [{
+  return SystemZ::isImmLF(N->getZExtValue());
+}], LF32, "U32Imm">;
+
+def imm64hf32 : Immediate<i64, [{
+  return SystemZ::isImmHF(N->getZExtValue());
+}], HF32, "U32Imm">;
+
+// Immediates for the lower and upper 32 bits of an i64, with the other
+// bits of the i32 being one.
+def imm64lf32c : Immediate<i64, [{
+  return SystemZ::isImmLF(uint64_t(~N->getZExtValue()));
+}], LF32, "U32Imm">;
+
+def imm64hf32c : Immediate<i64, [{
+  return SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
+}], HF32, "U32Imm">;
+
+// Short immediates.
+def imm64sx8 : Immediate<i64, [{
+  return isInt<8>(N->getSExtValue());
+}], SIMM8, "S8Imm">;
+
+def imm64sx16 : Immediate<i64, [{
+  return isInt<16>(N->getSExtValue());
+}], SIMM16, "S16Imm">;
+
+def imm64zx16 : Immediate<i64, [{
+  return isUInt<16>(N->getZExtValue());
+}], UIMM16, "U16Imm">;
+
+def imm64sx32 : Immediate<i64, [{
+  return isInt<32>(N->getSExtValue());
+}], SIMM32, "S32Imm">;
+
+def imm64zx32 : Immediate<i64, [{
+  return isUInt<32>(N->getZExtValue());
+}], UIMM32, "U32Imm">;
+
+def imm64zx32n : Immediate<i64, [{
+  return isUInt<32>(-N->getSExtValue());
+}], NEGIMM32, "U32Imm">;
+
+def imm64 : ImmLeaf<i64, [{}]>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point immediates
+//===----------------------------------------------------------------------===//
+
+// Floating-point zero.
+def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
+
+// Floating point negative zero.
+def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>;
+
+//===----------------------------------------------------------------------===//
+// Symbolic address operands
+//===----------------------------------------------------------------------===//
+
+// PC-relative offsets of a basic block.  The offset is sign-extended
+// and multiplied by 2.
+def brtarget16 : Operand<OtherVT> {
+  let EncoderMethod = "getPC16DBLEncoding";
+}
+def brtarget32 : Operand<OtherVT> {
+  let EncoderMethod = "getPC32DBLEncoding";
+}
+
+// A PC-relative offset of a global value.  The offset is sign-extended
+// and multiplied by 2.
+def pcrel32 : PCRelAddress<i64, "pcrel32"> {
+  let EncoderMethod = "getPC32DBLEncoding";
+}
+
+// A PC-relative offset of a global value when the value is used as a
+// call target.  The offset is sign-extended and multiplied by 2.
+def pcrel16call : PCRelAddress<i64, "pcrel16call"> {
+  let PrintMethod = "printCallOperand";
+  let EncoderMethod = "getPLT16DBLEncoding";
+}
+def pcrel32call : PCRelAddress<i64, "pcrel32call"> {
+  let PrintMethod = "printCallOperand";
+  let EncoderMethod = "getPLT32DBLEncoding";
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing modes
+//===----------------------------------------------------------------------===//
+
+// 12-bit displacement operands.
+def disp12imm32 : Operand<i32>;
+def disp12imm64 : Operand<i64>;
+
+// 20-bit displacement operands.
+def disp20imm32 : Operand<i32>;
+def disp20imm64 : Operand<i64>;
+
+def BDAddr32Disp12  : AddressAsmOperand<"BDAddr",  "32", "12">;
+def BDAddr32Disp20  : AddressAsmOperand<"BDAddr",  "32", "20">;
+def BDAddr64Disp12  : AddressAsmOperand<"BDAddr",  "64", "12">;
+def BDAddr64Disp20  : AddressAsmOperand<"BDAddr",  "64", "20">;
+def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">;
+def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">;
+
+// DAG patterns and operands for addressing modes.  Each mode has
+// the form <type><range><group> where:
+//
+// <type> is one of:
+//   shift    : base + displacement (32-bit)
+//   bdaddr   : base + displacement
+//   bdxaddr  : base + displacement + index
+//   laaddr   : like bdxaddr, but used for Load Address operations
+//   dynalloc : base + displacement + index + ADJDYNALLOC
+//
+// <range> is one of:
+//   12       : the displacement is an unsigned 12-bit value
+//   20       : the displacement is a signed 20-bit value
+//
+// <group> is one of:
+//   pair     : used when there is an equivalent instruction with the opposite
+//              range value (12 or 20)
+//   only     : used when there is no equivalent instruction with the opposite
+//              range value
+def shift12only      : BDMode <"BDAddr",   "32", "12", "Only">;
+def shift20only      : BDMode <"BDAddr",   "32", "20", "Only">;
+def bdaddr12only     : BDMode <"BDAddr",   "64", "12", "Only">;
+def bdaddr12pair     : BDMode <"BDAddr",   "64", "12", "Pair">;
+def bdaddr20only     : BDMode <"BDAddr",   "64", "20", "Only">;
+def bdaddr20pair     : BDMode <"BDAddr",   "64", "20", "Pair">;
+def bdxaddr12only    : BDXMode<"BDXAddr",  "64", "12", "Only">;
+def bdxaddr12pair    : BDXMode<"BDXAddr",  "64", "12", "Pair">;
+def bdxaddr20only    : BDXMode<"BDXAddr",  "64", "20", "Only">;
+def bdxaddr20only128 : BDXMode<"BDXAddr",  "64", "20", "Only128">;
+def bdxaddr20pair    : BDXMode<"BDXAddr",  "64", "20", "Pair">;
+def dynalloc12only   : BDXMode<"DynAlloc", "64", "12", "Only">;
+def laaddr12pair     : BDXMode<"LAAddr",   "64", "12", "Pair">;
+def laaddr20pair     : BDXMode<"LAAddr",   "64", "20", "Pair">;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous
+//===----------------------------------------------------------------------===//
+
+// Access registers.  At present we just use them for accessing the thread
+// pointer, so we don't expose them as register to LLVM.
+def AccessReg : AsmOperandClass {
+  let Name = "AccessReg";
+  let ParserMethod = "parseAccessReg";
+}
+def access_reg : Immediate<i8, [{ return N->getZExtValue() < 16; }],
+                           NOOP_SDNodeXForm, "AccessReg"> {
+  let ParserMatchClass = AccessReg;
+}
+
+// A 4-bit condition-code mask.
+def cond4 : PatLeaf<(i8 imm), [{ return (N->getZExtValue() < 16); }]>,
+            Operand<i8> {
+  let PrintMethod = "printCond4Operand";
+}
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
new file mode 100644
index 0000000..8c4df56
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -0,0 +1,196 @@
+//===-- SystemZOperators.td - SystemZ-specific operators ------*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Type profiles
+//===----------------------------------------------------------------------===//
+def SDT_CallSeqStart        : SDCallSeqStart<[SDTCisVT<0, i64>]>;
+def SDT_CallSeqEnd          : SDCallSeqEnd<[SDTCisVT<0, i64>,
+                                            SDTCisVT<1, i64>]>;
+def SDT_ZCall               : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_ZCmp                : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_ZBRCCMask           : SDTypeProfile<0, 2,
+                                            [SDTCisVT<0, i8>,
+                                             SDTCisVT<1, OtherVT>]>;
+def SDT_ZSelectCCMask       : SDTypeProfile<1, 3,
+                                            [SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<1, 2>,
+                                             SDTCisVT<3, i8>]>;
+def SDT_ZWrapPtr            : SDTypeProfile<1, 1,
+                                            [SDTCisSameAs<0, 1>,
+                                             SDTCisPtrTy<0>]>;
+def SDT_ZAdjDynAlloc        : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
+def SDT_ZExtractAccess      : SDTypeProfile<1, 1,
+                                            [SDTCisVT<0, i32>,
+                                             SDTCisVT<1, i8>]>;
+def SDT_ZGR128Binary32      : SDTypeProfile<1, 2,
+                                            [SDTCisVT<0, untyped>,
+                                             SDTCisVT<1, untyped>,
+                                             SDTCisVT<2, i32>]>;
+def SDT_ZGR128Binary64      : SDTypeProfile<1, 2,
+                                            [SDTCisVT<0, untyped>,
+                                             SDTCisVT<1, untyped>,
+                                             SDTCisVT<2, i64>]>;
+def SDT_ZAtomicLoadBinaryW  : SDTypeProfile<1, 5,
+                                            [SDTCisVT<0, i32>,
+                                             SDTCisPtrTy<1>,
+                                             SDTCisVT<2, i32>,
+                                             SDTCisVT<3, i32>,
+                                             SDTCisVT<4, i32>,
+                                             SDTCisVT<5, i32>]>;
+def SDT_ZAtomicCmpSwapW     : SDTypeProfile<1, 6,
+                                            [SDTCisVT<0, i32>,
+                                             SDTCisPtrTy<1>,
+                                             SDTCisVT<2, i32>,
+                                             SDTCisVT<3, i32>,
+                                             SDTCisVT<4, i32>,
+                                             SDTCisVT<5, i32>,
+                                             SDTCisVT<6, i32>]>;
+
+//===----------------------------------------------------------------------===//
+// Node definitions
+//===----------------------------------------------------------------------===//
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start       : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
+                                 [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
+def callseq_end         : SDNode<"ISD::CALLSEQ_END",   SDT_CallSeqEnd,
+                                 [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue,
+                                  SDNPOutGlue]>;
+
+// Nodes for SystemZISD::*.  See SystemZISelLowering.h for more details.
+def z_retflag           : SDNode<"SystemZISD::RET_FLAG", SDTNone,
+                                 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def z_call              : SDNode<"SystemZISD::CALL", SDT_ZCall,
+                                 [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+                                  SDNPVariadic]>;
+def z_pcrel_wrapper     : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
+def z_cmp               : SDNode<"SystemZISD::CMP", SDT_ZCmp, [SDNPOutGlue]>;
+def z_ucmp              : SDNode<"SystemZISD::UCMP", SDT_ZCmp, [SDNPOutGlue]>;
+def z_br_ccmask         : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
+                                 [SDNPHasChain, SDNPInGlue]>;
+def z_select_ccmask     : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
+    		                 [SDNPInGlue]>;
+def z_adjdynalloc       : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
+def z_extract_access    : SDNode<"SystemZISD::EXTRACT_ACCESS",
+                                 SDT_ZExtractAccess>;
+def z_umul_lohi64       : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
+def z_sdivrem64         : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
+def z_udivrem32         : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>;
+def z_udivrem64         : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>;
+
+class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
+  : SDNode<"SystemZISD::"##name, profile,
+           [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+
+def z_atomic_swapw      : AtomicWOp<"ATOMIC_SWAPW">;
+def z_atomic_loadw_add  : AtomicWOp<"ATOMIC_LOADW_ADD">;
+def z_atomic_loadw_sub  : AtomicWOp<"ATOMIC_LOADW_SUB">;
+def z_atomic_loadw_and  : AtomicWOp<"ATOMIC_LOADW_AND">;
+def z_atomic_loadw_or   : AtomicWOp<"ATOMIC_LOADW_OR">;
+def z_atomic_loadw_xor  : AtomicWOp<"ATOMIC_LOADW_XOR">;
+def z_atomic_loadw_nand : AtomicWOp<"ATOMIC_LOADW_NAND">;
+def z_atomic_loadw_min  : AtomicWOp<"ATOMIC_LOADW_MIN">;
+def z_atomic_loadw_max  : AtomicWOp<"ATOMIC_LOADW_MAX">;
+def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">;
+def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">;
+def z_atomic_cmp_swapw  : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>;
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments
+//===----------------------------------------------------------------------===//
+
+// Register sign-extend operations.  Sub-32-bit values are represented as i32s.
+def sext8  : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>;
+def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;
+def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>;
+
+// Register zero-extend operations.  Sub-32-bit values are represented as i32s.
+def zext8  : PatFrag<(ops node:$src), (and node:$src, 0xff)>;
+def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>;
+def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
+
+// Typed floating-point loads.
+def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
+def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
+
+// Aligned loads.
+class AlignedLoad<SDPatternOperator load>
+  : PatFrag<(ops node:$addr), (load node:$addr), [{
+  LoadSDNode *Load = cast<LoadSDNode>(N);
+  return Load->getAlignment() >= Load->getMemoryVT().getStoreSize();
+}]>;
+def aligned_load        : AlignedLoad<load>;
+def aligned_sextloadi16 : AlignedLoad<sextloadi16>;
+def aligned_sextloadi32 : AlignedLoad<sextloadi32>;
+def aligned_zextloadi16 : AlignedLoad<zextloadi16>;
+def aligned_zextloadi32 : AlignedLoad<zextloadi32>;
+
+// Aligned stores.
+class AlignedStore<SDPatternOperator store>
+  : PatFrag<(ops node:$src, node:$addr), (store node:$src, node:$addr), [{
+  StoreSDNode *Store = cast<StoreSDNode>(N);
+  return Store->getAlignment() >= Store->getMemoryVT().getStoreSize();
+}]>;
+def aligned_store         : AlignedStore<store>;
+def aligned_truncstorei16 : AlignedStore<truncstorei16>;
+def aligned_truncstorei32 : AlignedStore<truncstorei32>;
+
+// Insertions.
+def inserti8 : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, -256), node:$src2)>;
+def insertll : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, 0xffffffffffff0000), node:$src2)>;
+def insertlh : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, 0xffffffff0000ffff), node:$src2)>;
+def inserthl : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, 0xffff0000ffffffff), node:$src2)>;
+def inserthh : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, 0x0000ffffffffffff), node:$src2)>;
+def insertlf : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, 0xffffffff00000000), node:$src2)>;
+def inserthf : PatFrag<(ops node:$src1, node:$src2),
+                       (or (and node:$src1, 0x00000000ffffffff), node:$src2)>;
+
+// ORs that can be treated as insertions.
+def or_as_inserti8 : PatFrag<(ops node:$src1, node:$src2),
+                             (or node:$src1, node:$src2), [{
+  unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+  return CurDAG->MaskedValueIsZero(N->getOperand(0),
+                                   APInt::getLowBitsSet(BitWidth, 8));
+}]>;
+
+// ORs that can be treated as reversed insertions.
+def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2),
+                                (or node:$src1, node:$src2), [{
+  unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+  return CurDAG->MaskedValueIsZero(N->getOperand(1),
+                                   APInt::getLowBitsSet(BitWidth, 8));
+}]>;
+
+// Fused multiply-add and multiply-subtract, but with the order of the
+// operands matching SystemZ's MA and MS instructions.
+def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                    (fma node:$src2, node:$src3, node:$src1)>;
+def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                    (fma node:$src2, node:$src3, (fneg node:$src1))>;
+
+// Floating-point negative absolute.
+def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
+
+// Create a unary operator that loads from memory and then performs
+// the given operation on it.
+class loadu<SDPatternOperator operator>
+  : PatFrag<(ops node:$addr), (operator (load node:$addr))>;
+
+// Create a store operator that performs the given unary operation
+// on the value before storing it.
+class storeu<SDPatternOperator operator>
+  : PatFrag<(ops node:$value, node:$addr),
+            (store (operator node:$value), node:$addr)>;
diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td
new file mode 100644
index 0000000..3689f74
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZPatterns.td
@@ -0,0 +1,71 @@
+//===-- SystemZPatterns.td - SystemZ-specific pattern rules ---*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Record that INSN performs a 64-bit version of unary operator OPERATOR
+// in which the operand is sign-extended from 32 to 64 bits.
+multiclass SXU<SDPatternOperator operator, Instruction insn> {
+  def : Pat<(operator (sext (i32 GR32:$src))),
+            (insn GR32:$src)>;
+  def : Pat<(operator (sext_inreg GR64:$src, i32)),
+            (insn (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+}
+
+// Record that INSN performs a 64-bit version of binary operator OPERATOR
+// in which the first operand has class CLS and which the second operand
+// is sign-extended from a 32-bit register.
+multiclass SXB<SDPatternOperator operator, RegisterOperand cls,
+               Instruction insn> {
+  def : Pat<(operator cls:$src1, (sext GR32:$src2)),
+            (insn cls:$src1, GR32:$src2)>;
+  def : Pat<(operator cls:$src1, (sext_inreg GR64:$src2, i32)),
+            (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>;
+}
+
+// Like SXB, but for zero extension.
+multiclass ZXB<SDPatternOperator operator, RegisterOperand cls,
+               Instruction insn> {
+  def : Pat<(operator cls:$src1, (zext GR32:$src2)),
+            (insn cls:$src1, GR32:$src2)>;
+  def : Pat<(operator cls:$src1, (and GR64:$src2, 0xffffffff)),
+            (insn cls:$src1, (EXTRACT_SUBREG GR64:$src2, subreg_32bit))>;
+}
+
+// Record that INSN performs a binary read-modify-write operation,
+// with LOAD, OPERATOR and STORE being the read, modify and write
+// respectively.  MODE is the addressing mode and IMM is the type
+// of the second operand.
+class RMWI<SDPatternOperator load, SDPatternOperator operator,
+           SDPatternOperator store, AddressingMode mode,
+           PatFrag imm, Instruction insn>
+  : Pat<(store (operator (load mode:$addr), imm:$src), mode:$addr),
+        (insn mode:$addr, (UIMM8 imm:$src))>;
+
+// Record that INSN performs binary operation OPERATION on a byte
+// memory location.  IMM is the type of the second operand.
+multiclass RMWIByte<SDPatternOperator operator, AddressingMode mode,
+                    Instruction insn> {
+  def : RMWI<zextloadi8, operator, truncstorei8, mode, imm32, insn>;
+  def : RMWI<zextloadi8, operator, truncstorei8, mode, imm64, insn>;
+  def : RMWI<sextloadi8, operator, truncstorei8, mode, imm32, insn>;
+  def : RMWI<sextloadi8, operator, truncstorei8, mode, imm64, insn>;
+  def : RMWI<extloadi8, operator, truncstorei8, mode, imm32, insn>;
+  def : RMWI<extloadi8, operator, truncstorei8, mode, imm64, insn>;
+}
+
+// Record that INSN performs insertion TYPE into a register of class CLS.
+// The inserted operand is loaded using LOAD from an address of mode MODE.
+multiclass InsertMem<string type, Instruction insn, RegisterOperand cls,
+                     SDPatternOperator load, AddressingMode mode> {
+  def : Pat<(!cast<SDPatternOperator>("or_as_"##type)
+              cls:$src1, (load mode:$src2)),
+            (insn cls:$src1, mode:$src2)>;
+  def : Pat<(!cast<SDPatternOperator>("or_as_rev"##type)
+              (load mode:$src2), cls:$src1),
+            (insn cls:$src1, mode:$src2)>;
+}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
new file mode 100644
index 0000000..a0ae7ed
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -0,0 +1,162 @@
+//===-- SystemZRegisterInfo.cpp - SystemZ register information ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZRegisterInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
+                                         const SystemZInstrInfo &tii)
+  : SystemZGenRegisterInfo(SystemZ::R14D), TM(tm), TII(tii) {}
+
+const uint16_t*
+SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  static const uint16_t CalleeSavedRegs[] = {
+    SystemZ::R6D,  SystemZ::R7D,  SystemZ::R8D,  SystemZ::R9D,
+    SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D,
+    SystemZ::R14D, SystemZ::R15D,
+    SystemZ::F8D,  SystemZ::F9D,  SystemZ::F10D, SystemZ::F11D,
+    SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D,
+    0
+  };
+
+  return CalleeSavedRegs;
+}
+
+BitVector
+SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF)) {
+    // R11D is the frame pointer.  Reserve all aliases.
+    Reserved.set(SystemZ::R11D);
+    Reserved.set(SystemZ::R11W);
+    Reserved.set(SystemZ::R10Q);
+  }
+
+  // R15D is the stack pointer.  Reserve all aliases.
+  Reserved.set(SystemZ::R15D);
+  Reserved.set(SystemZ::R15W);
+  Reserved.set(SystemZ::R14Q);
+  return Reserved;
+}
+
+bool
+SystemZRegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+					   MachineBasicBlock::iterator SaveMBBI,
+					   MachineBasicBlock::iterator &UseMBBI,
+					   const TargetRegisterClass *RC,
+					   unsigned Reg) const {
+  MachineFunction &MF = *MBB.getParent();
+  const SystemZFrameLowering *TFI =
+    static_cast<const SystemZFrameLowering *>(TM.getFrameLowering());
+  unsigned Base = getFrameRegister(MF);
+  uint64_t Offset = TFI->getEmergencySpillSlotOffset(MF);
+  DebugLoc DL;
+
+  unsigned LoadOpcode, StoreOpcode;
+  TII.getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
+
+  // The offset must always be in range of a 12-bit unsigned displacement.
+  BuildMI(MBB, SaveMBBI, DL, TII.get(StoreOpcode))
+    .addReg(Reg, RegState::Kill).addReg(Base).addImm(Offset).addReg(0);
+  BuildMI(MBB, UseMBBI, DL, TII.get(LoadOpcode), Reg)
+    .addReg(Base).addImm(Offset).addReg(0);
+  return true;
+}
+
+void
+SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                                         int SPAdj, unsigned FIOperandNum,
+                                         RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Outgoing arguments should be part of the frame");
+
+  MachineBasicBlock &MBB = *MI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  DebugLoc DL = MI->getDebugLoc();
+
+  // Decompose the frame index into a base and offset.
+  int FrameIndex = MI->getOperand(FIOperandNum).getIndex();
+  unsigned BasePtr = getFrameRegister(MF);
+  int64_t Offset = (TFI->getFrameIndexOffset(MF, FrameIndex) +
+                    MI->getOperand(FIOperandNum + 1).getImm());
+
+  // Special handling of dbg_value instructions.
+  if (MI->isDebugValue()) {
+    MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, /*isDef*/ false);
+    MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+    return;
+  }
+
+  // See if the offset is in range, or if an equivalent instruction that
+  // accepts the offset exists.
+  unsigned Opcode = MI->getOpcode();
+  unsigned OpcodeForOffset = TII.getOpcodeForOffset(Opcode, Offset);
+  if (OpcodeForOffset)
+    MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+  else {
+    // Create an anchor point that is in range.  Start at 0xffff so that
+    // can use LLILH to load the immediate.
+    int64_t OldOffset = Offset;
+    int64_t Mask = 0xffff;
+    do {
+      Offset = OldOffset & Mask;
+      OpcodeForOffset = TII.getOpcodeForOffset(Opcode, Offset);
+      Mask >>= 1;
+      assert(Mask && "One offset must be OK");
+    } while (!OpcodeForOffset);
+
+    unsigned ScratchReg =
+      MF.getRegInfo().createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+    int64_t HighOffset = OldOffset - Offset;
+
+    if (MI->getDesc().TSFlags & SystemZII::HasIndex
+        && MI->getOperand(FIOperandNum + 2).getReg() == 0) {
+      // Load the offset into the scratch register and use it as an index.
+      // The scratch register then dies here.
+      TII.loadImmediate(MBB, MI, ScratchReg, HighOffset);
+      MI->getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+      MI->getOperand(FIOperandNum + 2).ChangeToRegister(ScratchReg,
+                                                        false, false, true);
+    } else {
+      // Load the anchor address into a scratch register.
+      unsigned LAOpcode = TII.getOpcodeForOffset(SystemZ::LA, HighOffset);
+      if (LAOpcode)
+        BuildMI(MBB, MI, DL, TII.get(LAOpcode),ScratchReg)
+          .addReg(BasePtr).addImm(HighOffset).addReg(0);
+      else {
+        // Load the high offset into the scratch register and use it as
+        // an index.
+        TII.loadImmediate(MBB, MI, ScratchReg, HighOffset);
+        BuildMI(MBB, MI, DL, TII.get(SystemZ::AGR),ScratchReg)
+          .addReg(ScratchReg, RegState::Kill).addReg(BasePtr);
+      }
+
+      // Use the scratch register as the base.  It then dies here.
+      MI->getOperand(FIOperandNum).ChangeToRegister(ScratchReg,
+                                                    false, false, true);
+    }
+  }
+  MI->setDesc(TII.get(OpcodeForOffset));
+  MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+}
+
+unsigned
+SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D;
+}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
new file mode 100644
index 0000000..91a70de
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -0,0 +1,70 @@
+//===-- SystemZRegisterInfo.h - SystemZ register information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZREGISTERINFO_H
+#define SystemZREGISTERINFO_H
+
+#include "SystemZ.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "SystemZGenRegisterInfo.inc"
+
+namespace llvm {
+
+namespace SystemZ {
+  // Return the subreg to use for referring to the even and odd registers
+  // in a GR128 pair.  Is32Bit says whether we want a GR32 or GR64.
+  inline unsigned even128(bool Is32bit) {
+    return Is32bit ? subreg_32bit : subreg_high;
+  }
+  inline unsigned odd128(bool Is32bit) {
+    return Is32bit ? subreg_low32 : subreg_low;
+  }
+}
+
+class SystemZSubtarget;
+class SystemZInstrInfo;
+
+struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
+private:
+  SystemZTargetMachine &TM;
+  const SystemZInstrInfo &TII;
+
+public:
+  SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii);
+
+  // Override TargetRegisterInfo.h.
+  virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
+    LLVM_OVERRIDE {
+    return true;
+  }
+  virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const
+    LLVM_OVERRIDE {
+    return true;
+  }
+  virtual const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0)
+    const LLVM_OVERRIDE;
+  virtual BitVector getReservedRegs(const MachineFunction &MF)
+    const LLVM_OVERRIDE;
+  virtual bool saveScavengerRegister(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator SaveMBBI,
+                                     MachineBasicBlock::iterator &UseMBBI,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Reg) const LLVM_OVERRIDE;
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                                   int SPAdj, unsigned FIOperandNum,
+                                   RegScavenger *RS) const LLVM_OVERRIDE;
+  virtual unsigned getFrameRegister(const MachineFunction &MF) const
+    LLVM_OVERRIDE;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
new file mode 100644
index 0000000..bd1b563
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -0,0 +1,150 @@
+//==- SystemZRegisterInfo.td - SystemZ register definitions -*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Class definitions.
+//===----------------------------------------------------------------------===//
+
+class SystemZReg<string n> : Register<n> {
+  let Namespace = "SystemZ";
+}
+
+class SystemZRegWithSubregs<string n, list<Register> subregs>
+  : RegisterWithSubRegs<n, subregs> {
+  let Namespace = "SystemZ";
+}
+
+let Namespace = "SystemZ" in {
+def subreg_32bit  : SubRegIndex; // could also be known as "subreg_high32"
+def subreg_high   : SubRegIndex;
+def subreg_low    : SubRegIndex;
+def subreg_low32  : SubRegIndex<[subreg_low, subreg_32bit]>;
+}
+
+// Define a register class that contains values of type TYPE and an
+// associated operand called NAME.  SIZE is the size and alignment
+// of the registers and REGLIST is the list of individual registers.
+multiclass SystemZRegClass<string name, ValueType type, int size, dag regList> {
+  def AsmOperand : AsmOperandClass {
+    let Name = name;
+    let ParserMethod = "parse"##name;
+    let RenderMethod = "addRegOperands";
+  }
+  def Bit : RegisterClass<"SystemZ", [type], size, regList> {
+    let Size = size;
+  }
+  def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> {
+    let ParserMatchClass = !cast<AsmOperandClass>(name##"AsmOperand");
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// General-purpose registers
+//===----------------------------------------------------------------------===//
+
+// Lower 32 bits of one of the 16 64-bit general-purpose registers
+class GPR32<bits<16> num, string n> : SystemZReg<n> {
+  let HWEncoding = num;
+}
+
+// One of the 16 64-bit general-purpose registers.
+class GPR64<bits<16> num, string n, GPR32 low>
+ : SystemZRegWithSubregs<n, [low]> {
+  let HWEncoding = num;
+  let SubRegIndices = [subreg_32bit];
+}
+
+// 8 even-odd pairs of GPR64s.
+class GPR128<bits<16> num, string n, GPR64 high, GPR64 low>
+ : SystemZRegWithSubregs<n, [high, low]> {
+  let HWEncoding = num;
+  let SubRegIndices = [subreg_high, subreg_low];
+}
+
+// General-purpose registers
+foreach I = 0-15 in {
+  def R#I#W : GPR32<I, "r"#I>;
+  def R#I#D : GPR64<I, "r"#I, !cast<GPR32>("R"#I#"W")>, DwarfRegNum<[I]>;
+}
+
+foreach I = [0, 2, 4, 6, 8, 10, 12, 14] in {
+  def R#I#Q : GPR128<I, "r"#I, !cast<GPR64>("R"#I#"D"),
+                     !cast<GPR64>("R"#!add(I, 1)#"D")>;
+}
+
+/// Allocate the callee-saved R6-R13 backwards. That way they can be saved
+/// together with R14 and R15 in one prolog instruction.
+defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uW",  0, 5),
+                                                  (sequence "R%uW", 15, 6))>;
+defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD",  0, 5),
+                                                  (sequence "R%uD", 15, 6))>;
+
+// The architecture doesn't really have any i128 support, so model the
+// register pairs as untyped instead.
+defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q,
+                                                         R12Q, R10Q, R8Q, R6Q,
+                                                         R14Q)>;
+
+// Base and index registers.  Everything except R0, which in an address
+// context evaluates as 0.
+defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0W)>;
+defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>;
+
+// Not used directly, but needs to exist for ADDR32 and ADDR64 subregs
+// of a GR128.
+defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point registers
+//===----------------------------------------------------------------------===//
+
+// Lower 32 bits of one of the 16 64-bit floating-point registers
+class FPR32<bits<16> num, string n> : SystemZReg<n> {
+  let HWEncoding = num;
+}
+
+// One of the 16 64-bit floating-point registers
+class FPR64<bits<16> num, string n, FPR32 low>
+ : SystemZRegWithSubregs<n, [low]> {
+  let HWEncoding = num;
+  let SubRegIndices = [subreg_32bit];
+}
+
+// 8 pairs of FPR64s, with a one-register gap inbetween.
+class FPR128<bits<16> num, string n, FPR64 high, FPR64 low>
+ : SystemZRegWithSubregs<n, [high, low]> {
+  let HWEncoding = num;
+  let SubRegIndices = [subreg_high, subreg_low];
+}
+
+// Floating-point registers
+foreach I = 0-15 in {
+  def F#I#S : FPR32<I, "f"#I>;
+  def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>,
+              DwarfRegNum<[!add(I, 16)]>;
+}
+
+foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
+  def F#I#Q  : FPR128<I, "f"#I, !cast<FPR64>("F"#I#"D"),
+                     !cast<FPR64>("F"#!add(I, 2)#"D")>;
+}
+
+// There's no store-multiple instruction for FPRs, so we're not fussy
+// about the order in which call-saved registers are allocated.
+defm FP32  : SystemZRegClass<"FP32", f32, 32, (sequence "F%uS", 0, 15)>;
+defm FP64  : SystemZRegClass<"FP64", f64, 64, (sequence "F%uD", 0, 15)>;
+defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q,
+                                                      F8Q, F9Q, F12Q, F13Q)>;
+
+//===----------------------------------------------------------------------===//
+// Other registers
+//===----------------------------------------------------------------------===//
+
+// Status register
+def PSW : SystemZReg<"psw">;
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
new file mode 100644
index 0000000..cfd3324
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -0,0 +1,56 @@
+//===-- SystemZSubtarget.cpp - SystemZ subtarget information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZSubtarget.h"
+#include "llvm/IR/GlobalValue.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SystemZGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+SystemZSubtarget::SystemZSubtarget(const std::string &TT,
+                                   const std::string &CPU,
+                                   const std::string &FS)
+  : SystemZGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT) {
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "z10";
+
+  // Parse features string.
+  ParseSubtargetFeatures(CPUName, FS);
+}
+
+// Return true if GV binds locally under reloc model RM.
+static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) {
+  // For non-PIC, all symbols bind locally.
+  if (RM == Reloc::Static)
+    return true;
+
+  return GV->hasLocalLinkage() || !GV->hasDefaultVisibility();
+}
+
+bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
+                                       Reloc::Model RM,
+                                       CodeModel::Model CM) const {
+  // PC32DBL accesses require the low bit to be clear.  Note that a zero
+  // value selects the default alignment and is therefore OK.
+  if (GV->getAlignment() == 1)
+    return false;
+
+  // For the small model, all locally-binding symbols are in range.
+  if (CM == CodeModel::Small)
+    return bindsLocally(GV, RM);
+
+  // For Medium and above, assume that the symbol is not within the 4GB range.
+  // Taking the address of locally-defined text would be OK, but that
+  // case isn't easy to detect.
+  return false;
+}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
new file mode 100644
index 0000000..8d4d450
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -0,0 +1,48 @@
+//===-- SystemZSubtarget.h - SystemZ subtarget information -----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZSUBTARGET_H
+#define SYSTEMZSUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SystemZGenSubtargetInfo.inc"
+
+namespace llvm {
+class GlobalValue;
+class StringRef;
+
+class SystemZSubtarget : public SystemZGenSubtargetInfo {
+private:
+  Triple TargetTriple;
+
+public:
+  SystemZSubtarget(const std::string &TT, const std::string &CPU,
+                   const std::string &FS);
+
+  // Automatically generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  // Return true if GV can be accessed using LARL for reloc model RM
+  // and code model CM.
+  bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
+                       CodeModel::Model CM) const;
+
+  bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
new file mode 100644
index 0000000..8c4c456
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -0,0 +1,60 @@
+//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetMachine.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeSystemZTarget() {
+  // Register the target.
+  RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
+}
+
+SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
+                                           StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM,
+                                           CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    Subtarget(TT, CPU, FS),
+    // Make sure that global data has at least 16 bits of alignment by default,
+    // so that we can refer to it using LARL.  We don't have any special
+    // requirements for stack variables though.
+    DL("E-p:64:64:64-i1:8:16-i8:8:16-i16:16-i32:32-i64:64"
+       "-f32:32-f64:64-f128:64-a0:8:16-n32:64"),
+    InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+    FrameLowering(*this, Subtarget) {
+}
+
+namespace {
+/// SystemZ Code Generator Pass Configuration Options.
+class SystemZPassConfig : public TargetPassConfig {
+public:
+  SystemZPassConfig(SystemZTargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  SystemZTargetMachine &getSystemZTargetMachine() const {
+    return getTM<SystemZTargetMachine>();
+  }
+
+  virtual bool addInstSelector();
+};
+} // end anonymous namespace
+
+bool SystemZPassConfig::addInstSelector() {
+  addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
+  return false;
+}
+
+TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new SystemZPassConfig(this, PM);
+}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
new file mode 100644
index 0000000..98614e7
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -0,0 +1,74 @@
+//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef SYSTEMZTARGETMACHINE_H
+#define SYSTEMZTARGETMACHINE_H
+
+#include "SystemZFrameLowering.h"
+#include "SystemZISelLowering.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SystemZTargetMachine : public LLVMTargetMachine {
+  SystemZSubtarget        Subtarget;
+  const DataLayout        DL;
+  SystemZInstrInfo        InstrInfo;
+  SystemZTargetLowering   TLInfo;
+  TargetSelectionDAGInfo  TSInfo;
+  SystemZFrameLowering    FrameLowering;
+
+public:
+  SystemZTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+                       StringRef FS, const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
+
+  // Override TargetMachine.
+  virtual const TargetFrameLowering *getFrameLowering() const LLVM_OVERRIDE {
+    return &FrameLowering;
+  }
+  virtual const SystemZInstrInfo *getInstrInfo() const LLVM_OVERRIDE {
+    return &InstrInfo;
+  }
+  virtual const SystemZSubtarget *getSubtargetImpl() const LLVM_OVERRIDE {
+    return &Subtarget;
+  }
+  virtual const DataLayout *getDataLayout() const LLVM_OVERRIDE {
+    return &DL;
+  }
+  virtual const SystemZRegisterInfo *getRegisterInfo() const LLVM_OVERRIDE {
+    return &InstrInfo.getRegisterInfo();
+  }
+  virtual const SystemZTargetLowering *getTargetLowering() const LLVM_OVERRIDE {
+    return &TLInfo;
+  }
+  virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const
+    LLVM_OVERRIDE {
+    return &TSInfo;
+  }
+
+  // Override LLVMTargetMachine
+  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM) LLVM_OVERRIDE;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/SystemZ/TargetInfo/CMakeLists.txt b/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000..b6051d3
--- /dev/null
+++ b/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMSystemZInfo
+  SystemZTargetInfo.cpp
+  )
+
+add_dependencies(LLVMSystemZInfo SystemZCommonTableGen)
diff --git a/lib/Target/SystemZ/TargetInfo/LLVMBuild.txt b/lib/Target/SystemZ/TargetInfo/LLVMBuild.txt
new file mode 100644
index 0000000..ea43736
--- /dev/null
+++ b/lib/Target/SystemZ/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/SystemZ/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SystemZInfo
+parent = SystemZ
+required_libraries = MC Support Target
+add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/TargetInfo/Makefile b/lib/Target/SystemZ/TargetInfo/Makefile
new file mode 100644
index 0000000..0be80eb
--- /dev/null
+++ b/lib/Target/SystemZ/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/SystemZ/TargetInfo/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSystemZInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
new file mode 100644
index 0000000..8f9aa28
--- /dev/null
+++ b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- SystemZTargetInfo.cpp - SystemZ target implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::TheSystemZTarget;
+
+extern "C" void LLVMInitializeSystemZTargetInfo() {
+  RegisterTarget<Triple::systemz, /*HasJIT=*/true>
+    X(TheSystemZTarget, "systemz", "SystemZ");
+}
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 9a78ebc..3d92f29 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -16,6 +16,7 @@
 #include "llvm-c/Initialization.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Value.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetLibraryInfo.h"
@@ -23,6 +24,23 @@
 
 using namespace llvm;
 
+inline DataLayout *unwrap(LLVMTargetDataRef P) {
+  return reinterpret_cast<DataLayout*>(P);
+}
+
+inline LLVMTargetDataRef wrap(const DataLayout *P) {
+  return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
+}
+
+inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
+  return reinterpret_cast<TargetLibraryInfo*>(P);
+}
+
+inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
+  TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
+  return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
+}
+
 void llvm::initializeTarget(PassRegistry &Registry) {
   initializeDataLayoutPass(Registry);
   initializeTargetLibraryInfoPass(Registry);
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index 79f74bd..01d12e8 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -28,7 +28,36 @@
 
 using namespace llvm;
 
+inline DataLayout *unwrap(LLVMTargetDataRef P) {
+  return reinterpret_cast<DataLayout*>(P);
+}
+
+inline LLVMTargetDataRef wrap(const DataLayout *P) {
+  return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
+}
+
+inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
+  return reinterpret_cast<TargetLibraryInfo*>(P);
+}
+
+inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
+  TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
+  return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
+}
 
+inline TargetMachine *unwrap(LLVMTargetMachineRef P) {
+  return reinterpret_cast<TargetMachine*>(P);
+}
+inline Target *unwrap(LLVMTargetRef P) {
+  return reinterpret_cast<Target*>(P);
+}
+inline LLVMTargetMachineRef wrap(const TargetMachine *P) {
+  return
+    reinterpret_cast<LLVMTargetMachineRef>(const_cast<TargetMachine*>(P));
+}
+inline LLVMTargetRef wrap(const Target * P) {
+  return reinterpret_cast<LLVMTargetRef>(const_cast<Target*>(P));
+}
 
 LLVMTargetRef LLVMGetFirstTarget() {
    const Target* target = &*TargetRegistry::begin();
@@ -77,29 +106,9 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple,
       break;
   }
 
-  CodeModel::Model CM;
-  switch (CodeModel) {
-    case LLVMCodeModelJITDefault:
-      CM = CodeModel::JITDefault;
-      break;
-    case LLVMCodeModelSmall:
-      CM = CodeModel::Small;
-      break;
-    case LLVMCodeModelKernel:
-      CM = CodeModel::Kernel;
-      break;
-    case LLVMCodeModelMedium:
-      CM = CodeModel::Medium;
-      break;
-    case LLVMCodeModelLarge:
-      CM = CodeModel::Large;
-      break;
-    default:
-      CM = CodeModel::Default;
-      break;
-  }
-  CodeGenOpt::Level OL;
+  CodeModel::Model CM = unwrap(CodeModel);
 
+  CodeGenOpt::Level OL;
   switch (Level) {
     case LLVMCodeGenLevelNone:
       OL = CodeGenOpt::None;
@@ -149,8 +158,8 @@ LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
   return wrap(unwrap(T)->getDataLayout());
 }
 
-LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
-  char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
+static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
+  formatted_raw_ostream &OS, LLVMCodeGenFileType codegen, char **ErrorMessage) {
   TargetMachine* TM = unwrap(T);
   Module* Mod = unwrap(M);
 
@@ -176,14 +185,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
       ft = TargetMachine::CGFT_ObjectFile;
       break;
   }
-  raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary);
-  formatted_raw_ostream destf(dest);
-  if (!error.empty()) {
-    *ErrorMessage = strdup(error.c_str());
-    return true;
-  }
-
-  if (TM->addPassesToEmitFile(pass, destf, ft)) {
+  if (TM->addPassesToEmitFile(pass, OS, ft)) {
     error = "TargetMachine can't emit a file of this type";
     *ErrorMessage = strdup(error.c_str());
     return true;
@@ -191,7 +193,35 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
 
   pass.run(*Mod);
 
-  destf.flush();
-  dest.flush();
+  OS.flush();
   return false;
 }
+
+LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
+  char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
+  std::string error;
+  raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary);
+  formatted_raw_ostream destf(dest);
+  if (!error.empty()) {
+    *ErrorMessage = strdup(error.c_str());
+    return true;
+  }
+  bool Result = LLVMTargetMachineEmit(T, M, destf, codegen, ErrorMessage);
+  dest.flush();
+  return Result;
+}
+
+LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T,
+  LLVMModuleRef M, LLVMCodeGenFileType codegen, char** ErrorMessage,
+  LLVMMemoryBufferRef *OutMemBuf) {
+  std::string CodeString;
+  raw_string_ostream OStream(CodeString);
+  formatted_raw_ostream Out(OStream);
+  bool Result = LLVMTargetMachineEmit(T, M, Out, codegen, ErrorMessage);
+  OStream.flush();
+
+  std::string &Data = OStream.str();
+  *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.c_str(),
+                                                     Data.length(), "");
+  return Result;
+}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e462322..68908ab 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -33,17 +33,451 @@ using namespace llvm;
 namespace {
 struct X86Operand;
 
+static const char OpPrecedence[] = {
+  0, // IC_PLUS
+  0, // IC_MINUS
+  1, // IC_MULTIPLY
+  1, // IC_DIVIDE
+  2, // IC_RPAREN
+  3, // IC_LPAREN
+  0, // IC_IMM
+  0  // IC_REGISTER
+};
+
 class X86AsmParser : public MCTargetAsmParser {
   MCSubtargetInfo &STI;
   MCAsmParser &Parser;
   ParseInstructionInfo *InstInfo;
 private:
+  enum InfixCalculatorTok {
+    IC_PLUS = 0,
+    IC_MINUS,
+    IC_MULTIPLY,
+    IC_DIVIDE,
+    IC_RPAREN,
+    IC_LPAREN,
+    IC_IMM,
+    IC_REGISTER
+  };
+
+  class InfixCalculator {
+    typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
+    SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
+    SmallVector<ICToken, 4> PostfixStack;
+    
+  public:
+    int64_t popOperand() {
+      assert (!PostfixStack.empty() && "Poped an empty stack!");
+      ICToken Op = PostfixStack.pop_back_val();
+      assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
+              && "Expected and immediate or register!");
+      return Op.second;
+    }
+    void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
+      assert ((Op == IC_IMM || Op == IC_REGISTER) &&
+              "Unexpected operand!");
+      PostfixStack.push_back(std::make_pair(Op, Val));
+    }
+    
+    void popOperator() { InfixOperatorStack.pop_back_val(); }
+    void pushOperator(InfixCalculatorTok Op) {
+      // Push the new operator if the stack is empty.
+      if (InfixOperatorStack.empty()) {
+        InfixOperatorStack.push_back(Op);
+        return;
+      }
+      
+      // Push the new operator if it has a higher precedence than the operator
+      // on the top of the stack or the operator on the top of the stack is a
+      // left parentheses.
+      unsigned Idx = InfixOperatorStack.size() - 1;
+      InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
+      if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
+        InfixOperatorStack.push_back(Op);
+        return;
+      }
+      
+      // The operator on the top of the stack has higher precedence than the
+      // new operator.
+      unsigned ParenCount = 0;
+      while (1) {
+        // Nothing to process.
+        if (InfixOperatorStack.empty())
+          break;
+        
+        Idx = InfixOperatorStack.size() - 1;
+        StackOp = InfixOperatorStack[Idx];
+        if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
+          break;
+        
+        // If we have an even parentheses count and we see a left parentheses,
+        // then stop processing.
+        if (!ParenCount && StackOp == IC_LPAREN)
+          break;
+        
+        if (StackOp == IC_RPAREN) {
+          ++ParenCount;
+          InfixOperatorStack.pop_back_val();
+        } else if (StackOp == IC_LPAREN) {
+          --ParenCount;
+          InfixOperatorStack.pop_back_val();
+        } else {
+          InfixOperatorStack.pop_back_val();
+          PostfixStack.push_back(std::make_pair(StackOp, 0));
+        }
+      }
+      // Push the new operator.
+      InfixOperatorStack.push_back(Op);
+    }
+    int64_t execute() {
+      // Push any remaining operators onto the postfix stack.
+      while (!InfixOperatorStack.empty()) {
+        InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
+        if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
+          PostfixStack.push_back(std::make_pair(StackOp, 0));
+      }
+      
+      if (PostfixStack.empty())
+        return 0;
+      
+      SmallVector<ICToken, 16> OperandStack;
+      for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
+        ICToken Op = PostfixStack[i];
+        if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
+          OperandStack.push_back(Op);
+        } else {
+          assert (OperandStack.size() > 1 && "Too few operands.");
+          int64_t Val;
+          ICToken Op2 = OperandStack.pop_back_val();
+          ICToken Op1 = OperandStack.pop_back_val();
+          switch (Op.first) {
+          default:
+            report_fatal_error("Unexpected operator!");
+            break;
+          case IC_PLUS:
+            Val = Op1.second + Op2.second;
+            OperandStack.push_back(std::make_pair(IC_IMM, Val));
+            break;
+          case IC_MINUS:
+            Val = Op1.second - Op2.second;
+            OperandStack.push_back(std::make_pair(IC_IMM, Val));
+            break;
+          case IC_MULTIPLY:
+            assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
+                    "Multiply operation with an immediate and a register!");
+            Val = Op1.second * Op2.second;
+            OperandStack.push_back(std::make_pair(IC_IMM, Val));
+            break;
+          case IC_DIVIDE:
+            assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
+                    "Divide operation with an immediate and a register!");
+            assert (Op2.second != 0 && "Division by zero!");
+            Val = Op1.second / Op2.second;
+            OperandStack.push_back(std::make_pair(IC_IMM, Val));
+            break;
+          }
+        }
+      }
+      assert (OperandStack.size() == 1 && "Expected a single result.");
+      return OperandStack.pop_back_val().second;
+    }
+  };
+
+  enum IntelExprState {
+    IES_PLUS,
+    IES_MINUS,
+    IES_MULTIPLY,
+    IES_DIVIDE,
+    IES_LBRAC,
+    IES_RBRAC,
+    IES_LPAREN,
+    IES_RPAREN,
+    IES_REGISTER,
+    IES_INTEGER,
+    IES_IDENTIFIER,
+    IES_ERROR
+  };
+
+  class IntelExprStateMachine {
+    IntelExprState State, PrevState;
+    unsigned BaseReg, IndexReg, TmpReg, Scale;
+    int64_t Imm;
+    const MCExpr *Sym;
+    StringRef SymName;
+    bool StopOnLBrac, AddImmPrefix;
+    InfixCalculator IC;
+    InlineAsmIdentifierInfo Info;
+  public:
+    IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
+      State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
+      Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
+      AddImmPrefix(addimmprefix) { Info.clear(); }
+    
+    unsigned getBaseReg() { return BaseReg; }
+    unsigned getIndexReg() { return IndexReg; }
+    unsigned getScale() { return Scale; }
+    const MCExpr *getSym() { return Sym; }
+    StringRef getSymName() { return SymName; }
+    int64_t getImm() { return Imm + IC.execute(); }
+    bool isValidEndState() { return State == IES_RBRAC; }
+    bool getStopOnLBrac() { return StopOnLBrac; }
+    bool getAddImmPrefix() { return AddImmPrefix; }
+    bool hadError() { return State == IES_ERROR; }
+
+    InlineAsmIdentifierInfo &getIdentifierInfo() {
+      return Info;
+    }
+
+    void onPlus() {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_RPAREN:
+      case IES_REGISTER:
+        State = IES_PLUS;
+        IC.pushOperator(IC_PLUS);
+        if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+          // If we already have a BaseReg, then assume this is the IndexReg with
+          // a scale of 1.
+          if (!BaseReg) {
+            BaseReg = TmpReg;
+          } else {
+            assert (!IndexReg && "BaseReg/IndexReg already set!");
+            IndexReg = TmpReg;
+            Scale = 1;
+          }
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onMinus() {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_MULTIPLY:
+      case IES_DIVIDE:
+      case IES_LPAREN:
+      case IES_RPAREN:
+      case IES_LBRAC:
+      case IES_RBRAC:
+      case IES_INTEGER:
+      case IES_REGISTER:
+        State = IES_MINUS;
+        // Only push the minus operator if it is not a unary operator.
+        if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
+              CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
+              CurrState == IES_LPAREN || CurrState == IES_LBRAC))
+          IC.pushOperator(IC_MINUS);
+        if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+          // If we already have a BaseReg, then assume this is the IndexReg with
+          // a scale of 1.
+          if (!BaseReg) {
+            BaseReg = TmpReg;
+          } else {
+            assert (!IndexReg && "BaseReg/IndexReg already set!");
+            IndexReg = TmpReg;
+            Scale = 1;
+          }
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onRegister(unsigned Reg) {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_LPAREN:
+        State = IES_REGISTER;
+        TmpReg = Reg;
+        IC.pushOperand(IC_REGISTER);
+        break;
+      case IES_MULTIPLY:
+        // Index Register - Scale * Register
+        if (PrevState == IES_INTEGER) {
+          assert (!IndexReg && "IndexReg already set!");
+          State = IES_REGISTER;
+          IndexReg = Reg;
+          // Get the scale and replace the 'Scale * Register' with '0'.
+          Scale = IC.popOperand();
+          IC.pushOperand(IC_IMM);
+          IC.popOperator();
+        } else {
+          State = IES_ERROR;
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_MINUS:
+        State = IES_INTEGER;
+        Sym = SymRef;
+        SymName = SymRefName;
+        IC.pushOperand(IC_IMM);
+        break;
+      }
+    }
+    void onInteger(int64_t TmpInt) {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_MINUS:
+      case IES_DIVIDE:
+      case IES_MULTIPLY:
+      case IES_LPAREN:
+        State = IES_INTEGER;
+        if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
+          // Index Register - Register * Scale
+          assert (!IndexReg && "IndexReg already set!");
+          IndexReg = TmpReg;
+          Scale = TmpInt;
+          // Get the scale and replace the 'Register * Scale' with '0'.
+          IC.popOperator();
+        } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
+                    PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
+                    PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
+                   CurrState == IES_MINUS) {
+          // Unary minus.  No need to pop the minus operand because it was never
+          // pushed.
+          IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
+        } else {
+          IC.pushOperand(IC_IMM, TmpInt);
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onStar() {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_REGISTER:
+      case IES_RPAREN:
+        State = IES_MULTIPLY;
+        IC.pushOperator(IC_MULTIPLY);
+        break;
+      }
+    }
+    void onDivide() {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_RPAREN:
+        State = IES_DIVIDE;
+        IC.pushOperator(IC_DIVIDE);
+        break;
+      }
+    }
+    void onLBrac() {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_RBRAC:
+        State = IES_PLUS;
+        IC.pushOperator(IC_PLUS);
+        break;
+      }
+    }
+    void onRBrac() {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_REGISTER:
+      case IES_RPAREN:
+        State = IES_RBRAC;
+        if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
+          // If we already have a BaseReg, then assume this is the IndexReg with
+          // a scale of 1.
+          if (!BaseReg) {
+            BaseReg = TmpReg;
+          } else {
+            assert (!IndexReg && "BaseReg/IndexReg already set!");
+            IndexReg = TmpReg;
+            Scale = 1;
+          }
+        }
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onLParen() {
+      IntelExprState CurrState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_PLUS:
+      case IES_MINUS:
+      case IES_MULTIPLY:
+      case IES_DIVIDE:
+      case IES_LPAREN:
+        // FIXME: We don't handle this type of unary minus, yet.
+        if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
+            PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
+            PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
+            CurrState == IES_MINUS) {
+          State = IES_ERROR;
+          break;
+        }
+        State = IES_LPAREN;
+        IC.pushOperator(IC_LPAREN);
+        break;
+      }
+      PrevState = CurrState;
+    }
+    void onRParen() {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_INTEGER:
+      case IES_REGISTER:
+      case IES_RPAREN:
+        State = IES_RPAREN;
+        IC.pushOperator(IC_RPAREN);
+        break;
+      }
+    }
+  };
+
   MCAsmParser &getParser() const { return Parser; }
 
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
 
   bool Error(SMLoc L, const Twine &Msg,
-             ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
+             ArrayRef<SMRange> Ranges = None,
              bool MatchingInlineAsm = false) {
     if (MatchingInlineAsm) return true;
     return Parser.Error(L, Msg, Ranges);
@@ -57,21 +491,25 @@ private:
   X86Operand *ParseOperand();
   X86Operand *ParseATTOperand();
   X86Operand *ParseIntelOperand();
-  X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc);
-  X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind);
-  X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp,
+  X86Operand *ParseIntelOffsetOfOperator();
+  X86Operand *ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
+  X86Operand *ParseIntelOperator(unsigned OpKind);
+  X86Operand *ParseIntelMemOperand(unsigned SegReg, int64_t ImmDisp,
                                    SMLoc StartLoc);
-  X86Operand *ParseIntelBracExpression(unsigned SegReg, uint64_t ImmDisp,
-                                       unsigned Size);
-  X86Operand *ParseIntelVarWithQualifier(const MCExpr *&Disp,
-                                         SMLoc &IdentStart);
-  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+  X86Operand *ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
+  X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
+                                       int64_t ImmDisp, unsigned Size);
+  X86Operand *ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
+                                   InlineAsmIdentifierInfo &Info,
+                                   bool IsUnevaluatedOperand, SMLoc &End);
 
-  X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End,
-                                    SMLoc SizeDirLoc, unsigned Size);
+  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
 
-  bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp,
-                             SmallString<64> &Err);
+  X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
+                                    unsigned BaseReg, unsigned IndexReg,
+                                    unsigned Scale, SMLoc Start, SMLoc End,
+                                    unsigned Size, StringRef Identifier,
+                                    InlineAsmIdentifierInfo &Info);
 
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
   bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
@@ -101,6 +539,10 @@ private:
     setAvailableFeatures(FB);
   }
 
+  bool isParsingIntelSyntax() {
+    return getParser().getAssemblerDialect();
+  }
+
   /// @name Auto-generated Matcher Functions
   /// {
 
@@ -123,10 +565,6 @@ public:
                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
 
   virtual bool ParseDirective(AsmToken DirectiveID);
-
-  bool isParsingIntelSyntax() {
-    return getParser().getAssemblerDialect();
-  }
 };
 } // end anonymous namespace
 
@@ -176,6 +614,8 @@ struct X86Operand : public MCParsedAsmOperand {
 
   SMLoc StartLoc, EndLoc;
   SMLoc OffsetOfLoc;
+  StringRef SymName;
+  void *OpDecl;
   bool AddressOf;
 
   struct TokOp {
@@ -210,6 +650,9 @@ struct X86Operand : public MCParsedAsmOperand {
   X86Operand(KindTy K, SMLoc Start, SMLoc End)
     : Kind(K), StartLoc(Start), EndLoc(End) {}
 
+  StringRef getSymName() { return SymName; }
+  void *getOpDecl() { return OpDecl; }
+
   /// getStartLoc - Get the location of the first token of this operand.
   SMLoc getStartLoc() const { return StartLoc; }
   /// getEndLoc - Get the location of the last token of this operand.
@@ -473,11 +916,15 @@ struct X86Operand : public MCParsedAsmOperand {
 
   static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
                                bool AddressOf = false,
-                               SMLoc OffsetOfLoc = SMLoc()) {
+                               SMLoc OffsetOfLoc = SMLoc(),
+                               StringRef SymName = StringRef(),
+                               void *OpDecl = 0) {
     X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
     Res->Reg.RegNo = RegNo;
     Res->AddressOf = AddressOf;
     Res->OffsetOfLoc = OffsetOfLoc;
+    Res->SymName = SymName;
+    Res->OpDecl = OpDecl;
     return Res;
   }
 
@@ -489,7 +936,8 @@ struct X86Operand : public MCParsedAsmOperand {
 
   /// Create an absolute memory operand.
   static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
-                               unsigned Size = 0) {
+                               unsigned Size = 0, StringRef SymName = StringRef(),
+                               void *OpDecl = 0) {
     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
     Res->Mem.SegReg   = 0;
     Res->Mem.Disp     = Disp;
@@ -497,7 +945,9 @@ struct X86Operand : public MCParsedAsmOperand {
     Res->Mem.IndexReg = 0;
     Res->Mem.Scale    = 1;
     Res->Mem.Size     = Size;
-    Res->AddressOf = false;
+    Res->SymName      = SymName;
+    Res->OpDecl       = OpDecl;
+    Res->AddressOf    = false;
     return Res;
   }
 
@@ -505,7 +955,9 @@ struct X86Operand : public MCParsedAsmOperand {
   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
                                unsigned BaseReg, unsigned IndexReg,
                                unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
-                               unsigned Size = 0) {
+                               unsigned Size = 0,
+                               StringRef SymName = StringRef(),
+                               void *OpDecl = 0) {
     // We should never just have a displacement, that should be parsed as an
     // absolute memory operand.
     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -520,7 +972,9 @@ struct X86Operand : public MCParsedAsmOperand {
     Res->Mem.IndexReg = IndexReg;
     Res->Mem.Scale    = Scale;
     Res->Mem.Size     = Size;
-    Res->AddressOf = false;
+    Res->SymName      = SymName;
+    Res->OpDecl       = OpDecl;
+    Res->AddressOf    = false;
     return Res;
   }
 };
@@ -676,306 +1130,104 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
   return Size;
 }
 
-enum IntelBracExprState {
-  IBES_START,
-  IBES_LBRAC,
-  IBES_RBRAC,
-  IBES_REGISTER,
-  IBES_REGISTER_STAR,
-  IBES_REGISTER_STAR_INTEGER,
-  IBES_INTEGER,
-  IBES_INTEGER_STAR,
-  IBES_INDEX_REGISTER,
-  IBES_IDENTIFIER,
-  IBES_DISP_EXPR,
-  IBES_MINUS,
-  IBES_ERROR
-};
-
-class IntelBracExprStateMachine {
-  IntelBracExprState State;
-  unsigned BaseReg, IndexReg, Scale;
-  int64_t Disp;
-
-  unsigned TmpReg;
-  int64_t TmpInteger;
-
-  bool isPlus;
-
-public:
-  IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) :
-    State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(disp),
-    TmpReg(0), TmpInteger(0), isPlus(true) {}
-
-  unsigned getBaseReg() { return BaseReg; }
-  unsigned getIndexReg() { return IndexReg; }
-  unsigned getScale() { return Scale; }
-  int64_t getDisp() { return Disp; }
-  bool isValidEndState() { return State == IBES_RBRAC; }
-
-  void onPlus() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_INTEGER:
-      State = IBES_START;
-      if (isPlus)
-        Disp += TmpInteger;
-      else
-        Disp -= TmpInteger;
-      break;
-    case IBES_REGISTER:
-      State = IBES_START;
-      // If we already have a BaseReg, then assume this is the IndexReg with a
-      // scale of 1.
-      if (!BaseReg) {
-        BaseReg = TmpReg;
-      } else {
-        assert (!IndexReg && "BaseReg/IndexReg already set!");
-        IndexReg = TmpReg;
-        Scale = 1;
-      }
-      break;
-    case IBES_INDEX_REGISTER:
-      State = IBES_START;
-      break;
-    }
-    isPlus = true;
-  }
-  void onMinus() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_START:
-      State = IBES_MINUS;
-      break;
-    case IBES_INTEGER:
-      State = IBES_START;
-      if (isPlus)
-        Disp += TmpInteger;
-      else
-        Disp -= TmpInteger;
-      break;
-    case IBES_REGISTER:
-      State = IBES_START;
-      // If we already have a BaseReg, then assume this is the IndexReg with a
-      // scale of 1.
-      if (!BaseReg) {
-        BaseReg = TmpReg;
-      } else {
-        assert (!IndexReg && "BaseReg/IndexReg already set!");
-        IndexReg = TmpReg;
-        Scale = 1;
-      }
-      break;
-    case IBES_INDEX_REGISTER:
-      State = IBES_START;
-      break;
-    }
-    isPlus = false;
-  }
-  void onRegister(unsigned Reg) {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_START:
-      State = IBES_REGISTER;
-      TmpReg = Reg;
-      break;
-    case IBES_INTEGER_STAR:
-      assert (!IndexReg && "IndexReg already set!");
-      State = IBES_INDEX_REGISTER;
-      IndexReg = Reg;
-      Scale = TmpInteger;
-      break;
-    }
-  }
-  void onDispExpr() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_START:
-      State = IBES_DISP_EXPR;
-      break;
+X86Operand *
+X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
+                                    unsigned BaseReg, unsigned IndexReg,
+                                    unsigned Scale, SMLoc Start, SMLoc End,
+                                    unsigned Size, StringRef Identifier,
+                                    InlineAsmIdentifierInfo &Info){
+  if (isa<MCSymbolRefExpr>(Disp)) {
+    // If this is not a VarDecl then assume it is a FuncDecl or some other label
+    // reference.  We need an 'r' constraint here, so we need to create register
+    // operand to ensure proper matching.  Just pick a GPR based on the size of
+    // a pointer.
+    if (!Info.IsVarDecl) {
+      unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
+      return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
+                                   SMLoc(), Identifier, Info.OpDecl);
     }
-  }
-  void onInteger(int64_t TmpInt) {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_START:
-      State = IBES_INTEGER;
-      TmpInteger = TmpInt;
-      break;
-    case IBES_MINUS:
-      State = IBES_INTEGER;
-      TmpInteger = TmpInt;
-      break;
-    case IBES_REGISTER_STAR:
-      assert (!IndexReg && "IndexReg already set!");
-      State = IBES_INDEX_REGISTER;
-      IndexReg = TmpReg;
-      Scale = TmpInt;
-      break;
-    }
-  }
-  void onStar() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_INTEGER:
-      State = IBES_INTEGER_STAR;
-      break;
-    case IBES_REGISTER:
-      State = IBES_REGISTER_STAR;
-      break;
-    }
-  }
-  void onLBrac() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_RBRAC:
-      State = IBES_START;
-      isPlus = true;
-      break;
-    }
-  }
-  void onRBrac() {
-    switch (State) {
-    default:
-      State = IBES_ERROR;
-      break;
-    case IBES_DISP_EXPR:
-      State = IBES_RBRAC;
-      break;
-    case IBES_INTEGER:
-      State = IBES_RBRAC;
-      if (isPlus)
-        Disp += TmpInteger;
-      else
-        Disp -= TmpInteger;
-      break;
-    case IBES_REGISTER:
-      State = IBES_RBRAC;
-      // If we already have a BaseReg, then assume this is the IndexReg with a
-      // scale of 1.
-      if (!BaseReg) {
-        BaseReg = TmpReg;
-      } else {
-        assert (!IndexReg && "BaseReg/IndexReg already set!");
-        IndexReg = TmpReg;
-        Scale = 1;
-      }
-      break;
-    case IBES_INDEX_REGISTER:
-      State = IBES_RBRAC;
-      break;
-    }
-  }
-};
-
-X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start,
-                                                SMLoc End, SMLoc SizeDirLoc,
-                                                unsigned Size) {
-  bool NeedSizeDir = false;
-  bool IsVarDecl = false;
-  if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
-    const MCSymbol &Sym = SymRef->getSymbol();
-    // FIXME: The SemaLookup will fail if the name is anything other then an
-    // identifier.
-    // FIXME: Pass a valid SMLoc.
-    unsigned tLength, tSize, tType;
-    SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength,
-                                            tSize, tType, IsVarDecl);
     if (!Size) {
-      Size = tType * 8; // Size is in terms of bits in this context.
-      NeedSizeDir = Size > 0;
+      Size = Info.Type * 8; // Size is in terms of bits in this context.
+      if (Size)
+        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
+                                                    /*Len=*/0, Size));
     }
   }
 
-  // If this is not a VarDecl then assume it is a FuncDecl or some other label
-  // reference.  We need an 'r' constraint here, so we need to create register
-  // operand to ensure proper matching.  Just pick a GPR based on the size of
-  // a pointer.
-  if (!IsVarDecl) {
-    unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
-    return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true);
-  }
-
-  if (NeedSizeDir)
-    InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc,
-                                                /*Len*/0, Size));  
-
   // When parsing inline assembly we set the base register to a non-zero value
-  // as we don't know the actual value at this time.  This is necessary to
+  // if we don't know the actual value at this time.  This is necessary to
   // get the matching correct in some cases.
-  return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0,
-                               /*Scale*/1, Start, End, Size);
+  BaseReg = BaseReg ? BaseReg : 1;
+  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
+                               End, Size, Identifier, Info.OpDecl);
 }
 
-X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
-                                                   uint64_t ImmDisp,
-                                                   unsigned Size) {
-  const AsmToken &Tok = Parser.getTok();
-  SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc();
-
-  // Eat '['
-  if (getLexer().isNot(AsmToken::LBrac))
-    return ErrorOperand(Start, "Expected '[' token!");
-  Parser.Lex();
-
-  unsigned TmpReg = 0;
-
-  // Try to handle '[' 'Symbol' ']'
-  if (getLexer().is(AsmToken::Identifier)) {
-    if (ParseRegister(TmpReg, Start, End)) {
-      const MCExpr *Disp;
-      SMLoc IdentStart = Tok.getLoc();
-      if (getParser().parseExpression(Disp, End))
-        return 0;
-
-      if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
-        return Err;
-
-      if (getLexer().isNot(AsmToken::RBrac))
-        return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!");
-
-      // FIXME: We don't handle 'ImmDisp' '[' 'Symbol' ']'.
-      if (ImmDisp)
-        return ErrorOperand(Start, "Unsupported immediate displacement!");
-
-      // Adjust the EndLoc due to the ']'.
-      End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1);
-      Parser.Lex();
-      if (!isParsingInlineAsm())
-        return X86Operand::CreateMem(Disp, Start, End, Size);
-
-      // We want the size directive before the '['.
-      SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1);
-      return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size);
+static void
+RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
+                           StringRef SymName, int64_t ImmDisp,
+                           int64_t FinalImmDisp, SMLoc &BracLoc,
+                           SMLoc &StartInBrac, SMLoc &End) {
+  // Remove the '[' and ']' from the IR string.
+  AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
+  AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
+
+  // If ImmDisp is non-zero, then we parsed a displacement before the
+  // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
+  // If ImmDisp doesn't match the displacement computed by the state machine
+  // then we have an additional displacement in the bracketed expression.
+  if (ImmDisp != FinalImmDisp) {
+    if (ImmDisp) {
+      // We have an immediate displacement before the bracketed expression.
+      // Adjust this to match the final immediate displacement.
+      bool Found = false;
+      for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
+             E = AsmRewrites->end(); I != E; ++I) {
+        if ((*I).Loc.getPointer() > BracLoc.getPointer())
+          continue;
+        if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
+          assert (!Found && "ImmDisp already rewritten.");
+          (*I).Kind = AOK_Imm;
+          (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
+          (*I).Val = FinalImmDisp;
+          Found = true;
+          break;
+        }
+      }
+      assert (Found && "Unable to rewrite ImmDisp.");
+    } else {
+      // We have a symbolic and an immediate displacement, but no displacement
+      // before the bracketed expression.  Put the immediate displacement
+      // before the bracketed expression.
+      AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
     }
   }
+  // Remove all the ImmPrefix rewrites within the brackets.
+  for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
+         E = AsmRewrites->end(); I != E; ++I) {
+    if ((*I).Loc.getPointer() < StartInBrac.getPointer())
+      continue;
+    if ((*I).Kind == AOK_ImmPrefix)
+      (*I).Kind = AOK_Delete;
+  }
+  const char *SymLocPtr = SymName.data();
+  // Skip everything before the symbol.        
+  if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
+    assert(Len > 0 && "Expected a non-negative length.");
+    AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
+  }
+  // Skip everything after the symbol.
+  if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
+    SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
+    assert(Len > 0 && "Expected a non-negative length.");
+    AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
+  }
+}
 
-  // Parse [ BaseReg + Scale*IndexReg + Disp ].  We may have already parsed an
-  // immediate displacement before the bracketed expression.
-  bool Done = false;
-  IntelBracExprStateMachine SM(Parser, ImmDisp);
-
-  // If we parsed a register, then the end loc has already been set and
-  // the identifier has already been lexed.  We also need to update the
-  // state.
-  if (TmpReg)
-    SM.onRegister(TmpReg);
+X86Operand *
+X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
+  const AsmToken &Tok = Parser.getTok();
 
-  const MCExpr *Disp = 0;
+  bool Done = false;
   while (!Done) {
     bool UpdateLocLex = true;
 
@@ -983,6 +1235,10 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
     // identifier.  Don't try an parse it as a register.
     if (Tok.getString().startswith("."))
       break;
+    
+    // If we're parsing an immediate expression, we don't expect a '['.
+    if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
+      break;
 
     switch (getLexer().getKind()) {
     default: {
@@ -992,139 +1248,185 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
       }
       return ErrorOperand(Tok.getLoc(), "Unexpected token!");
     }
+    case AsmToken::EndOfStatement: {
+      Done = true;
+      break;
+    }
     case AsmToken::Identifier: {
-      // This could be a register or a displacement expression.
-      if(!ParseRegister(TmpReg, Start, End)) {
+      // This could be a register or a symbolic displacement.
+      unsigned TmpReg;
+      const MCExpr *Val;
+      SMLoc IdentLoc = Tok.getLoc();
+      StringRef Identifier = Tok.getString();
+      if(!ParseRegister(TmpReg, IdentLoc, End)) {
         SM.onRegister(TmpReg);
         UpdateLocLex = false;
         break;
-      } else if (!getParser().parseExpression(Disp, End)) {
-        SM.onDispExpr();
+      } else {
+        if (!isParsingInlineAsm()) {
+          if (getParser().parsePrimaryExpr(Val, End))
+            return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
+        } else {
+          InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
+          if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
+                                                     /*Unevaluated*/ false, End))
+            return Err;
+        }
+        SM.onIdentifierExpr(Val, Identifier);
         UpdateLocLex = false;
         break;
       }
       return ErrorOperand(Tok.getLoc(), "Unexpected identifier!");
     }
-    case AsmToken::Integer: {
-      int64_t Val = Tok.getIntVal();
-      SM.onInteger(Val);
+    case AsmToken::Integer:
+      if (isParsingInlineAsm() && SM.getAddImmPrefix())
+        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
+                                                    Tok.getLoc()));
+      SM.onInteger(Tok.getIntVal());
       break;
-    }
     case AsmToken::Plus:    SM.onPlus(); break;
     case AsmToken::Minus:   SM.onMinus(); break;
     case AsmToken::Star:    SM.onStar(); break;
+    case AsmToken::Slash:   SM.onDivide(); break;
     case AsmToken::LBrac:   SM.onLBrac(); break;
     case AsmToken::RBrac:   SM.onRBrac(); break;
+    case AsmToken::LParen:  SM.onLParen(); break;
+    case AsmToken::RParen:  SM.onRParen(); break;
     }
+    if (SM.hadError())
+      return ErrorOperand(Tok.getLoc(), "Unexpected token!");
+
     if (!Done && UpdateLocLex) {
       End = Tok.getLoc();
       Parser.Lex(); // Consume the token.
     }
   }
+  return 0;
+}
 
-  if (!Disp)
-    Disp = MCConstantExpr::Create(SM.getDisp(), getContext());
+X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
+                                                   int64_t ImmDisp,
+                                                   unsigned Size) {
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
+  if (getLexer().isNot(AsmToken::LBrac))
+    return ErrorOperand(BracLoc, "Expected '[' token!");
+  Parser.Lex(); // Eat '['
+
+  SMLoc StartInBrac = Tok.getLoc();
+  // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ].  We
+  // may have already parsed an immediate displacement before the bracketed
+  // expression.
+  IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
+  if (X86Operand *Err = ParseIntelExpression(SM, End))
+    return Err;
+
+  const MCExpr *Disp;
+  if (const MCExpr *Sym = SM.getSym()) {
+    // A symbolic displacement.
+    Disp = Sym;
+    if (isParsingInlineAsm())
+      RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
+                                 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
+                                 End);
+  } else {
+    // An immediate displacement only.   
+    Disp = MCConstantExpr::Create(SM.getImm(), getContext());
+  }
 
   // Parse the dot operator (e.g., [ebx].foo.bar).
   if (Tok.getString().startswith(".")) {
-    SmallString<64> Err;
     const MCExpr *NewDisp;
-    if (ParseIntelDotOperator(Disp, &NewDisp, Err))
-      return ErrorOperand(Tok.getLoc(), Err);
+    if (X86Operand *Err = ParseIntelDotOperator(Disp, NewDisp))
+      return Err;
     
-    End = Parser.getTok().getEndLoc();
+    End = Tok.getEndLoc();
     Parser.Lex();  // Eat the field.
     Disp = NewDisp;
   }
 
   int BaseReg = SM.getBaseReg();
   int IndexReg = SM.getIndexReg();
-
-  // handle [-42]
-  if (!BaseReg && !IndexReg) {
-    if (!SegReg)
-      return X86Operand::CreateMem(Disp, Start, End);
-    else
-      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
+  int Scale = SM.getScale();
+  if (!isParsingInlineAsm()) {
+    // handle [-42]
+    if (!BaseReg && !IndexReg) {
+      if (!SegReg)
+        return X86Operand::CreateMem(Disp, Start, End, Size);
+      else
+        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, Start, End, Size);
+    }
+    return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
+                                 End, Size);
   }
 
-  int Scale = SM.getScale();
-  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
-                               Start, End, Size);
+  InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
+  return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
+                               End, Size, SM.getSymName(), Info);
 }
 
 // Inline assembly may use variable names with namespace alias qualifiers.
-X86Operand *X86AsmParser::ParseIntelVarWithQualifier(const MCExpr *&Disp,
-                                                     SMLoc &IdentStart) {
-  // We should only see Foo::Bar if we're parsing inline assembly.
-  if (!isParsingInlineAsm())
-    return 0;
-
-  // If we don't see a ':' then there can't be a qualifier.
-  if (getLexer().isNot(AsmToken::Colon))
-    return 0;
+X86Operand *X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
+                                               StringRef &Identifier,
+                                               InlineAsmIdentifierInfo &Info,
+                                               bool IsUnevaluatedOperand,
+                                               SMLoc &End) {
+  assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
+  Val = 0;
 
+  StringRef LineBuf(Identifier.data());
+  SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
 
-  bool Done = false;
   const AsmToken &Tok = Parser.getTok();
-  SMLoc IdentEnd = Tok.getEndLoc();
-  while (!Done) {
-    switch (getLexer().getKind()) {
-    default:
-      Done = true; 
-      break;
-    case AsmToken::Colon:
-      getLexer().Lex(); // Consume ':'.
-      if (getLexer().isNot(AsmToken::Colon))
-        return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
-      getLexer().Lex(); // Consume second ':'.
-      if (getLexer().isNot(AsmToken::Identifier))
-        return ErrorOperand(Tok.getLoc(), "Expected an identifier token!");
-      break;
-    case AsmToken::Identifier:
-      IdentEnd = Tok.getEndLoc();
-      getLexer().Lex(); // Consume the identifier.
-      break;
-    }
+
+  // Advance the token stream until the end of the current token is
+  // after the end of what the frontend claimed.
+  const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
+  while (true) {
+    End = Tok.getEndLoc();
+    getLexer().Lex();
+
+    assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
+    if (End.getPointer() == EndPtr) break;
   }
-  size_t Len = IdentEnd.getPointer() - IdentStart.getPointer();
-  StringRef Identifier(IdentStart.getPointer(), Len);
+
+  // Create the symbol reference.
+  Identifier = LineBuf;
   MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
-  Disp = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
+  Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
   return 0;
 }
 
 /// ParseIntelMemOperand - Parse intel style memory operand.
 X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
-                                               uint64_t ImmDisp,
+                                               int64_t ImmDisp,
                                                SMLoc Start) {
   const AsmToken &Tok = Parser.getTok();
   SMLoc End;
 
   unsigned Size = getIntelMemOperandSize(Tok.getString());
   if (Size) {
-    Parser.Lex();
-    assert ((Tok.getString() == "PTR" || Tok.getString() == "ptr") &&
-            "Unexpected token!");
-    Parser.Lex();
+    Parser.Lex(); // Eat operand size (e.g., byte, word).
+    if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
+      return ErrorOperand(Start, "Expected 'PTR' or 'ptr' token!");
+    Parser.Lex(); // Eat ptr.
   }
 
   // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
   if (getLexer().is(AsmToken::Integer)) {
-    const AsmToken &IntTok = Parser.getTok();
     if (isParsingInlineAsm())
       InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
-                                                  IntTok.getLoc()));
-    uint64_t ImmDisp = IntTok.getIntVal();
+                                                  Tok.getLoc()));
+    int64_t ImmDisp = Tok.getIntVal();
     Parser.Lex(); // Eat the integer.
     if (getLexer().isNot(AsmToken::LBrac))
       return ErrorOperand(Start, "Expected '[' token!");
-    return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+    return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
   }
 
   if (getLexer().is(AsmToken::LBrac))
-    return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+    return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
 
   if (!ParseRegister(SegReg, Start, End)) {
     // Handel SegReg : [ ... ]
@@ -1133,37 +1435,37 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg,
     Parser.Lex(); // Eat :
     if (getLexer().isNot(AsmToken::LBrac))
       return ErrorOperand(Start, "Expected '[' token!");
-    return ParseIntelBracExpression(SegReg, ImmDisp, Size);
+    return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
   }
 
-  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
-  SMLoc IdentStart = Tok.getLoc();
-  if (getParser().parseExpression(Disp, End))
-    return 0;
+  const MCExpr *Val;
+  if (!isParsingInlineAsm()) {
+    if (getParser().parsePrimaryExpr(Val, End))
+      return ErrorOperand(Tok.getLoc(), "Unexpected token!");
 
-  if (!isParsingInlineAsm())
-    return X86Operand::CreateMem(Disp, Start, End, Size);
+    return X86Operand::CreateMem(Val, Start, End, Size);
+  }
 
-  if (X86Operand *Err = ParseIntelVarWithQualifier(Disp, IdentStart))
+  InlineAsmIdentifierInfo Info;
+  StringRef Identifier = Tok.getString();
+  if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
+                                             /*Unevaluated*/ false, End))
     return Err;
-
-  return CreateMemForInlineAsm(Disp, Start, End, Start, Size);
+  return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
+                               /*Scale=*/1, Start, End, Size, Identifier, Info);
 }
 
 /// Parse the '.' operator.
-bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
-                                         const MCExpr **NewDisp,
-                                         SmallString<64> &Err) {
-  AsmToken Tok = *&Parser.getTok();
-  uint64_t OrigDispVal, DotDispVal;
+X86Operand *X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
+                                                const MCExpr *&NewDisp) {
+  const AsmToken &Tok = Parser.getTok();
+  int64_t OrigDispVal, DotDispVal;
 
   // FIXME: Handle non-constant expressions.
-  if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp)) {
+  if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
     OrigDispVal = OrigDisp->getValue();
-  } else {
-    Err = "Non-constant offsets are not supported!";
-    return true;
-  }
+  else
+    return ErrorOperand(Tok.getLoc(), "Non-constant offsets are not supported!");
 
   // Drop the '.'.
   StringRef DotDispStr = Tok.getString().drop_front(1);
@@ -1173,23 +1475,15 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
     APInt DotDisp;
     DotDispStr.getAsInteger(10, DotDisp);
     DotDispVal = DotDisp.getZExtValue();
-  } else if (Tok.is(AsmToken::Identifier)) {
-    // We should only see an identifier when parsing the original inline asm.
-    // The front-end should rewrite this in terms of immediates.
-    assert (isParsingInlineAsm() && "Unexpected field name!");
-
+  } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
     unsigned DotDisp;
     std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
     if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
-                                           DotDisp)) {
-      Err = "Unable to lookup field reference!";
-      return true;
-    }
+                                           DotDisp))
+      return ErrorOperand(Tok.getLoc(), "Unable to lookup field reference!");
     DotDispVal = DotDisp;
-  } else {
-    Err = "Unexpected token type!";
-    return true;
-  }
+  } else
+    return ErrorOperand(Tok.getLoc(), "Unexpected token type!");
 
   if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
     SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
@@ -1199,22 +1493,24 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
                                                 Val));
   }
 
-  *NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
-  return false;
+  NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
+  return 0;
 }
 
 /// Parse the 'offset' operator.  This operator is used to specify the
 /// location rather then the content of a variable.
-X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
-  SMLoc OffsetOfLoc = Start;
+X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc OffsetOfLoc = Tok.getLoc();
   Parser.Lex(); // Eat offset.
-  Start = Parser.getTok().getLoc();
-  assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
 
-  SMLoc End;
   const MCExpr *Val;
-  if (getParser().parseExpression(Val, End))
-    return ErrorOperand(Start, "Unable to parse expression!");
+  InlineAsmIdentifierInfo Info;
+  SMLoc Start = Tok.getLoc(), End;
+  StringRef Identifier = Tok.getString();
+  if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
+                                             /*Unevaluated*/ false, End))
+    return Err;
 
   // Don't emit the offset operator.
   InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
@@ -1224,7 +1520,7 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator(SMLoc Start) {
   // the size of a pointer.
   unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
   return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
-                               OffsetOfLoc);
+                               OffsetOfLoc, Identifier, Info.OpDecl);
 }
 
 enum IntelOperatorKind {
@@ -1239,34 +1535,25 @@ enum IntelOperatorKind {
 /// variable.  A variable's size is the product of its LENGTH and TYPE.  The
 /// TYPE operator returns the size of a C or C++ type or variable. If the
 /// variable is an array, TYPE returns the size of a single element.
-X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
-  SMLoc TypeLoc = Start;
-  Parser.Lex(); // Eat offset.
-  Start = Parser.getTok().getLoc();
-  assert (Parser.getTok().is(AsmToken::Identifier) && "Expected an identifier");
-
-  SMLoc End;
-  const MCExpr *Val;
-  if (getParser().parseExpression(Val, End))
-    return 0;
+X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc TypeLoc = Tok.getLoc();
+  Parser.Lex(); // Eat operator.
+
+  const MCExpr *Val = 0;
+  InlineAsmIdentifierInfo Info;
+  SMLoc Start = Tok.getLoc(), End;
+  StringRef Identifier = Tok.getString();
+  if (X86Operand *Err = ParseIntelIdentifier(Val, Identifier, Info,
+                                             /*Unevaluated*/ true, End))
+    return Err;
 
-  unsigned Length = 0, Size = 0, Type = 0;
-  if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Val)) {
-    const MCSymbol &Sym = SymRef->getSymbol();
-    // FIXME: The SemaLookup will fail if the name is anything other then an
-    // identifier.
-    // FIXME: Pass a valid SMLoc.
-    bool IsVarDecl;
-    if (!SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Length,
-                                                 Size, Type, IsVarDecl))
-      return ErrorOperand(Start, "Unable to lookup expr!");
-  }
-  unsigned CVal;
+  unsigned CVal = 0;
   switch(OpKind) {
   default: llvm_unreachable("Unexpected operand kind!");
-  case IOK_LENGTH: CVal = Length; break;
-  case IOK_SIZE: CVal = Size; break;
-  case IOK_TYPE: CVal = Type; break;
+  case IOK_LENGTH: CVal = Info.Length; break;
+  case IOK_SIZE: CVal = Info.Size; break;
+  case IOK_TYPE: CVal = Info.Type; break;
   }
 
   // Rewrite the type operator and the C or C++ type or variable in terms of an
@@ -1279,44 +1566,54 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) {
 }
 
 X86Operand *X86AsmParser::ParseIntelOperand() {
-  SMLoc Start = Parser.getTok().getLoc(), End;
-  StringRef AsmTokStr = Parser.getTok().getString();
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc Start = Tok.getLoc(), End;
 
   // Offset, length, type and size operators.
   if (isParsingInlineAsm()) {
+    StringRef AsmTokStr = Tok.getString();
     if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
-      return ParseIntelOffsetOfOperator(Start);
+      return ParseIntelOffsetOfOperator();
     if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
-      return ParseIntelOperator(Start, IOK_LENGTH);
+      return ParseIntelOperator(IOK_LENGTH);
     if (AsmTokStr == "size" || AsmTokStr == "SIZE")
-      return ParseIntelOperator(Start, IOK_SIZE);
+      return ParseIntelOperator(IOK_SIZE);
     if (AsmTokStr == "type" || AsmTokStr == "TYPE")
-      return ParseIntelOperator(Start, IOK_TYPE);
+      return ParseIntelOperator(IOK_TYPE);
   }
 
   // Immediate.
-  if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
-      getLexer().is(AsmToken::Minus)) {
-    const MCExpr *Val;
-    bool isInteger = getLexer().is(AsmToken::Integer);
-    if (!getParser().parseExpression(Val, End)) {
-      if (isParsingInlineAsm())
+  if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
+      getLexer().is(AsmToken::LParen)) {    
+    AsmToken StartTok = Tok;
+    IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
+                             /*AddImmPrefix=*/false);
+    if (X86Operand *Err = ParseIntelExpression(SM, End))
+      return Err;
+
+    int64_t Imm = SM.getImm();
+    if (isParsingInlineAsm()) {
+      unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
+      if (StartTok.getString().size() == Len)
+        // Just add a prefix if this wasn't a complex immediate expression.
         InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
-      // Immediate.
-      if (getLexer().isNot(AsmToken::LBrac))
-        return X86Operand::CreateImm(Val, Start, End);
-
-      // Only positive immediates are valid.
-      if (!isInteger) {
-        Error(Parser.getTok().getLoc(), "expected a positive immediate "
-              "displacement before bracketed expr.");
-        return 0;
-      }
+      else
+        // Otherwise, rewrite the complex expression as a single immediate.
+        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
+    }
 
-      // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
-      if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue())
-        return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start);
+    if (getLexer().isNot(AsmToken::LBrac)) {
+      const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
+      return X86Operand::CreateImm(ImmExpr, Start, End);
     }
+
+    // Only positive immediates are valid.
+    if (Imm < 0)
+      return ErrorOperand(Start, "expected a positive immediate displacement "
+                          "before bracketed expr.");
+
+    // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
+    return ParseIntelMemOperand(/*SegReg=*/0, Imm, Start);
   }
 
   // Register.
@@ -1907,7 +2204,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   assert(!Operands.empty() && "Unexpect empty operand list!");
   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
   assert(Op->isToken() && "Leading operand should always be a mnemonic!");
-  ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
+  ArrayRef<SMRange> EmptyRanges = None;
 
   // First, handle aliases that expand to multiple instructions.
   // FIXME: This should be replaced with a real .td file alias mechanism.
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index d14899d..7cb71f0 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -33,6 +33,7 @@ set(sources
   X86TargetObjectFile.cpp
   X86TargetTransformInfo.cpp
   X86VZeroUpper.cpp
+  X86FixupLEAs.cpp
   )
 
 if( CMAKE_CL_64 )
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 3669560..d8f7278 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -20,6 +20,7 @@
 #include "X86MCTargetDesc.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCInstrInfo.h"
 
 namespace llvm {
 
@@ -41,7 +42,6 @@ namespace X86 {
     AddrNumOperands = 5
   };
 } // end namespace X86;
- 
 
 /// X86II - This namespace holds all of the target specific flags that
 /// instruction info tracks.
@@ -274,11 +274,12 @@ namespace X86II {
 
     //// MRM_XX - A mod/rm byte of exactly 0xXX.
     MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36,
-    MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40,
-    MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46,
-    MRM_D4 = 47, MRM_D5 = 48, MRM_D6 = 49, MRM_D8 = 50,
-    MRM_D9 = 51, MRM_DA = 52, MRM_DB = 53, MRM_DC = 54,
-    MRM_DD = 55, MRM_DE = 56, MRM_DF = 57,
+    MRM_C8 = 37, MRM_C9 = 38, MRM_CA = 39, MRM_CB = 40,
+    MRM_E8 = 41, MRM_F0 = 42, MRM_F8 = 45, MRM_F9 = 46,
+    MRM_D0 = 47, MRM_D1 = 48, MRM_D4 = 49, MRM_D5 = 50,
+    MRM_D6 = 51, MRM_D8 = 52, MRM_D9 = 53, MRM_DA = 54,
+    MRM_DB = 55, MRM_DC = 56, MRM_DD = 57, MRM_DE = 58,
+    MRM_DF = 59,
 
     /// RawFrmImm8 - This is used for the ENTER instruction, which has two
     /// immediates, the first of which is a 16-bit immediate (specified by
@@ -521,6 +522,26 @@ namespace X86II {
     }
   }
 
+  /// getOperandBias - compute any additional adjustment needed to
+  ///                  the offset to the start of the memory operand
+  ///                  in this instruction.
+  /// If this is a two-address instruction,skip one of the register operands.
+  /// FIXME: This should be handled during MCInst lowering.
+  inline int getOperandBias(const MCInstrDesc& Desc)
+  {
+    unsigned NumOps = Desc.getNumOperands();
+    unsigned CurOp = 0;
+    if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
+      ++CurOp;
+    else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
+      assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
+      // Special case for GATHER with 2 TIED_TO operands
+      // Skip the first 2 operands: dst, mask_wb
+      CurOp += 2;
+    }
+    return CurOp;
+  }
+
   /// getMemoryOperandNo - The function returns the MCInst operand # for the
   /// first field of the memory operand.  If the instruction doesn't have a
   /// memory operand, this returns -1.
@@ -576,12 +597,13 @@ namespace X86II {
     }
     case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
     case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
-    case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8:
-    case X86II::MRM_F9: case X86II::MRM_D0: case X86II::MRM_D1:
-    case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
-    case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA:
-    case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD:
-    case X86II::MRM_DE: case X86II::MRM_DF:
+    case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_E8:
+    case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9:
+    case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
+    case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
+    case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
+    case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
+    case X86II::MRM_DF:
       return -1;
     }
   }
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 776cee1..016af71 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -237,6 +237,14 @@ StartsWithGlobalOffsetTable(const MCExpr *Expr) {
   return GOT_Normal;
 }
 
+static bool HasSecRelSymbolRef(const MCExpr *Expr) {
+  if (Expr->getKind() == MCExpr::SymbolRef) {
+    const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+    return Ref->getKind() == MCSymbolRefExpr::VK_SECREL;
+  }
+  return false;
+}
+
 void X86MCCodeEmitter::
 EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
               MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
@@ -268,8 +276,13 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
       if (Kind == GOT_Normal)
         ImmOffset = CurByte;
     } else if (Expr->getKind() == MCExpr::SymbolRef) {
-      const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
-      if (Ref->getKind() == MCSymbolRefExpr::VK_SECREL) {
+      if (HasSecRelSymbolRef(Expr)) {
+        FixupKind = MCFixupKind(FK_SecRel_4);
+      }
+    } else if (Expr->getKind() == MCExpr::Binary) {
+      const MCBinaryExpr *Bin = static_cast<const MCBinaryExpr*>(Expr);
+      if (HasSecRelSymbolRef(Bin->getLHS())
+          || HasSecRelSymbolRef(Bin->getRHS())) {
         FixupKind = MCFixupKind(FK_SecRel_4);
       }
     }
@@ -979,18 +992,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
     return;
 
-  // If this is a two-address instruction, skip one of the register operands.
-  // FIXME: This should be handled during MCInst lowering.
   unsigned NumOps = Desc.getNumOperands();
-  unsigned CurOp = 0;
-  if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
-    ++CurOp;
-  else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0) {
-    assert(Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1);
-    // Special case for GATHER with 2 TIED_TO operands
-    // Skip the first 2 operands: dst, mask_wb
-    CurOp += 2;
-  }
+  unsigned CurOp = X86II::getOperandBias(Desc);
 
   // Keep track of the current byte being emitted.
   unsigned CurByte = 0;
@@ -1138,12 +1141,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     break;
   case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3:
   case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9:
-  case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4:
-  case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8:
-  case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB:
-  case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE:
-  case X86II::MRM_DF: case X86II::MRM_E8: case X86II::MRM_F0:
-  case X86II::MRM_F8: case X86II::MRM_F9:
+  case X86II::MRM_CA: case X86II::MRM_CB: case X86II::MRM_D0:
+  case X86II::MRM_D1: case X86II::MRM_D4: case X86II::MRM_D5:
+  case X86II::MRM_D6: case X86II::MRM_D8: case X86II::MRM_D9:
+  case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC:
+  case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF:
+  case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8:
+  case X86II::MRM_F9:
     EmitByte(BaseOpcode, CurByte, OS);
 
     unsigned char MRM;
@@ -1155,6 +1159,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     case X86II::MRM_C4: MRM = 0xC4; break;
     case X86II::MRM_C8: MRM = 0xC8; break;
     case X86II::MRM_C9: MRM = 0xC9; break;
+    case X86II::MRM_CA: MRM = 0xCA; break;
+    case X86II::MRM_CB: MRM = 0xCB; break;
     case X86II::MRM_D0: MRM = 0xD0; break;
     case X86II::MRM_D1: MRM = 0xD1; break;
     case X86II::MRM_D4: MRM = 0xD4; break;
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index bc272ef..ed64a32 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -9,6 +9,8 @@
 
 #include "MCTargetDesc/X86FixupKinds.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/MC/MCWinCOFFObjectWriter.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -27,7 +29,9 @@ namespace {
     X86WinCOFFObjectWriter(bool Is64Bit_);
     ~X86WinCOFFObjectWriter();
 
-    virtual unsigned getRelocType(unsigned FixupKind) const;
+    virtual unsigned getRelocType(const MCValue &Target,
+                                  const MCFixup &Fixup,
+                                  bool IsCrossSection) const LLVM_OVERRIDE;
   };
 }
 
@@ -38,7 +42,14 @@ X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit_)
 
 X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {}
 
-unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const {
+unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target,
+                                              const MCFixup &Fixup,
+                                              bool IsCrossSection) const {
+  unsigned FixupKind = IsCrossSection ? FK_PCRel_4 : Fixup.getKind();
+
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
   switch (FixupKind) {
   case FK_PCRel_4:
   case X86::reloc_riprel_4byte:
@@ -46,6 +57,9 @@ unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const {
     return Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 : COFF::IMAGE_REL_I386_REL32;
   case FK_Data_4:
   case X86::reloc_signed_4byte:
+    if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32)
+      return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32NB :
+                       COFF::IMAGE_REL_I386_DIR32NB;
     return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 : COFF::IMAGE_REL_I386_DIR32;
   case FK_Data_8:
     if (Is64Bit)
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 1f9919f..947002f 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -69,6 +69,11 @@ ImmutablePass *createX86TargetTransformInfoPass(const X86TargetMachine *TM);
 /// createX86PadShortFunctions - Return a pass that pads short functions
 /// with NOOPs. This will prevent a stall when returning on the Atom.
 FunctionPass *createX86PadShortFunctions();
+/// createX86FixupLEAs - Return a a pass that selectively replaces
+/// certain instructions (like add, sub, inc, dec, some shifts,
+/// and some multiplies) by equivalent LEA instructions, in order
+/// to eliminate execution delays in some Atom processors.
+FunctionPass *createX86FixupLEAs();
 
 } // End llvm namespace
 
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 1dcc344..c865500 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -139,6 +139,8 @@ def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
 def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
                                      "CallRegIndirect", "true",
                                      "Call register indirect">;
+def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
+                                   "LEA instruction needs inputs at AG stage">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
@@ -188,6 +190,7 @@ def : ProcessorModel<"atom", AtomModel,
                       FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
                       FeatureSlowDivide,
                       FeatureCallRegIndirect,
+                      FeatureLEAUsesAG,
                       FeaturePadShortFunctions]>;
 
 // "Arrandale" along with corei3 and corei5
@@ -252,11 +255,16 @@ def : Proc<"amdfam10",        [FeatureSSE4A,
 // Bobcat
 def : Proc<"btver1",          [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
                                FeatureLZCNT, FeaturePOPCNT]>;
+// Jaguar
+def : Proc<"btver2",          [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
+                               FeatureAES, FeaturePCLMUL, FeatureBMI,
+                               FeatureF16C, FeatureMOVBE, FeatureLZCNT,
+                               FeaturePOPCNT]>;
 // Bulldozer
 def : Proc<"bdver1",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
                                FeatureAES, FeaturePCLMUL,
                                FeatureLZCNT, FeaturePOPCNT]>;
-// Enhanced Bulldozer
+// Piledriver
 def : Proc<"bdver2",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
                                FeatureAES, FeaturePCLMUL,
                                FeatureF16C, FeatureLZCNT,
@@ -300,6 +308,9 @@ def ATTAsmParser : AsmParser {
 def ATTAsmParserVariant : AsmParserVariant {
   int Variant = 0;
 
+  // Variant name.
+  string Name = "att";
+
   // Discard comments in assembly strings.
   string CommentDelimiter = "#";
 
@@ -310,6 +321,9 @@ def ATTAsmParserVariant : AsmParserVariant {
 def IntelAsmParserVariant : AsmParserVariant {
   int Variant = 1;
 
+  // Variant name.
+  string Name = "intel";
+
   // Discard comments in assembly strings.
   string CommentDelimiter = ";";
 
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 2518e02..8fea6ed 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -1451,6 +1451,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
     MCE.emitByte(BaseOpcode);
     MCE.emitByte(0xC9);
     break;
+  case X86II::MRM_CA:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xCA);
+    break;
+  case X86II::MRM_CB:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xCB);
+    break;
   case X86II::MRM_E8:
     MCE.emitByte(BaseOpcode);
     MCE.emitByte(0xE8);
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index cadec68..cf44bd0 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -68,12 +68,12 @@ public:
 
   virtual bool TargetSelectInstruction(const Instruction *I);
 
-  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// \brief The specified machine instr operand is a vreg, and that
   /// vreg is being provided by the specified load instruction.  If possible,
   /// try to fold the load as an operand to the instruction, returning true if
   /// possible.
-  virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
-                             const LoadInst *LI);
+  virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                   const LoadInst *LI);
 
   virtual bool FastLowerArguments();
 
@@ -107,6 +107,8 @@ private:
 
   bool X86SelectShift(const Instruction *I);
 
+  bool X86SelectDivRem(const Instruction *I);
+
   bool X86SelectSelect(const Instruction *I);
 
   bool X86SelectTrunc(const Instruction *I);
@@ -691,11 +693,6 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
   if (S->isAtomic())
     return false;
 
-  unsigned SABIAlignment =
-    TD.getABITypeAlignment(S->getValueOperand()->getType());
-  if (S->getAlignment() != 0 && S->getAlignment() < SABIAlignment)
-    return false;
-
   MVT VT;
   if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
     return false;
@@ -1235,6 +1232,124 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
   return true;
 }
 
+bool X86FastISel::X86SelectDivRem(const Instruction *I) {
+  const static unsigned NumTypes = 4; // i8, i16, i32, i64
+  const static unsigned NumOps   = 4; // SDiv, SRem, UDiv, URem
+  const static bool S = true;  // IsSigned
+  const static bool U = false; // !IsSigned
+  const static unsigned Copy = TargetOpcode::COPY;
+  // For the X86 DIV/IDIV instruction, in most cases the dividend
+  // (numerator) must be in a specific register pair highreg:lowreg,
+  // producing the quotient in lowreg and the remainder in highreg.
+  // For most data types, to set up the instruction, the dividend is
+  // copied into lowreg, and lowreg is sign-extended or zero-extended
+  // into highreg.  The exception is i8, where the dividend is defined
+  // as a single register rather than a register pair, and we
+  // therefore directly sign-extend or zero-extend the dividend into
+  // lowreg, instead of copying, and ignore the highreg.
+  const static struct DivRemEntry {
+    // The following portion depends only on the data type.
+    const TargetRegisterClass *RC;
+    unsigned LowInReg;  // low part of the register pair
+    unsigned HighInReg; // high part of the register pair
+    // The following portion depends on both the data type and the operation.
+    struct DivRemResult {
+    unsigned OpDivRem;        // The specific DIV/IDIV opcode to use.
+    unsigned OpSignExtend;    // Opcode for sign-extending lowreg into
+                              // highreg, or copying a zero into highreg.
+    unsigned OpCopy;          // Opcode for copying dividend into lowreg, or
+                              // zero/sign-extending into lowreg for i8.
+    unsigned DivRemResultReg; // Register containing the desired result.
+    bool IsOpSigned;          // Whether to use signed or unsigned form.
+    } ResultTable[NumOps];
+  } OpTable[NumTypes] = {
+    { &X86::GR8RegClass,  X86::AX,  0, {
+        { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AL,  S }, // SDiv
+        { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AH,  S }, // SRem
+        { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AL,  U }, // UDiv
+        { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AH,  U }, // URem
+      }
+    }, // i8
+    { &X86::GR16RegClass, X86::AX,  X86::DX, {
+        { X86::IDIV16r, X86::CWD,     Copy,            X86::AX,  S }, // SDiv
+        { X86::IDIV16r, X86::CWD,     Copy,            X86::DX,  S }, // SRem
+        { X86::DIV16r,  X86::MOV16r0, Copy,            X86::AX,  U }, // UDiv
+        { X86::DIV16r,  X86::MOV16r0, Copy,            X86::DX,  U }, // URem
+      }
+    }, // i16
+    { &X86::GR32RegClass, X86::EAX, X86::EDX, {
+        { X86::IDIV32r, X86::CDQ,     Copy,            X86::EAX, S }, // SDiv
+        { X86::IDIV32r, X86::CDQ,     Copy,            X86::EDX, S }, // SRem
+        { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EAX, U }, // UDiv
+        { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EDX, U }, // URem
+      }
+    }, // i32
+    { &X86::GR64RegClass, X86::RAX, X86::RDX, {
+        { X86::IDIV64r, X86::CQO,     Copy,            X86::RAX, S }, // SDiv
+        { X86::IDIV64r, X86::CQO,     Copy,            X86::RDX, S }, // SRem
+        { X86::DIV64r,  X86::MOV64r0, Copy,            X86::RAX, U }, // UDiv
+        { X86::DIV64r,  X86::MOV64r0, Copy,            X86::RDX, U }, // URem
+      }
+    }, // i64
+  };
+
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT))
+    return false;
+
+  unsigned TypeIndex, OpIndex;
+  switch (VT.SimpleTy) {
+  default: return false;
+  case MVT::i8:  TypeIndex = 0; break;
+  case MVT::i16: TypeIndex = 1; break;
+  case MVT::i32: TypeIndex = 2; break;
+  case MVT::i64: TypeIndex = 3;
+    if (!Subtarget->is64Bit())
+      return false;
+    break;
+  }
+
+  switch (I->getOpcode()) {
+  default: llvm_unreachable("Unexpected div/rem opcode");
+  case Instruction::SDiv: OpIndex = 0; break;
+  case Instruction::SRem: OpIndex = 1; break;
+  case Instruction::UDiv: OpIndex = 2; break;
+  case Instruction::URem: OpIndex = 3; break;
+  }
+
+  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
+  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
+  unsigned Op0Reg = getRegForValue(I->getOperand(0));
+  if (Op0Reg == 0)
+    return false;
+  unsigned Op1Reg = getRegForValue(I->getOperand(1));
+  if (Op1Reg == 0)
+    return false;
+
+  // Move op0 into low-order input register.
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+          TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
+  // Zero-extend or sign-extend into high-order input register.
+  if (OpEntry.OpSignExtend) {
+    if (OpEntry.IsOpSigned)
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+              TII.get(OpEntry.OpSignExtend));
+    else
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+              TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg);
+  }
+  // Generate the DIV/IDIV instruction.
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+          TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
+  // Copy output register into result register.
+  unsigned ResultReg = createResultReg(TypeEntry.RC);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+          TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg);
+  UpdateValueMap(I, ResultReg);
+
+  return true;
+}
+
 bool X86FastISel::X86SelectSelect(const Instruction *I) {
   MVT VT;
   if (!isTypeLegal(I->getType(), VT))
@@ -2084,6 +2199,11 @@ X86FastISel::TargetSelectInstruction(const Instruction *I)  {
   case Instruction::AShr:
   case Instruction::Shl:
     return X86SelectShift(I);
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+  case Instruction::SRem:
+  case Instruction::URem:
+    return X86SelectDivRem(I);
   case Instruction::Select:
     return X86SelectSelect(I);
   case Instruction::Trunc:
@@ -2275,12 +2395,8 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
 }
 
 
-/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
-/// vreg is being provided by the specified load instruction.  If possible,
-/// try to fold the load as an operand to the instruction, returning true if
-/// possible.
-bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
-                                const LoadInst *LI) {
+bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+                                      const LoadInst *LI) {
   X86AddressMode AM;
   if (!X86SelectAddress(LI->getOperand(0), AM))
     return false;
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
new file mode 100644
index 0000000..0dd034c
--- /dev/null
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -0,0 +1,253 @@
+//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which will find  instructions  which
+// can be re-written as LEA instructions in order to reduce pipeline
+// delays for some models of the Intel Atom family.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-fixup-LEAs"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
+STATISTIC(NumLEAs, "Number of LEA instructions created");
+
+namespace {
+  class FixupLEAPass : public MachineFunctionPass {
+    enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
+    static char ID;
+    /// \brief Loop over all of the instructions in the basic block
+    /// replacing applicable instructions with LEA instructions,
+    /// where appropriate.
+    bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
+
+    virtual const char *getPassName() const { return "X86 Atom LEA Fixup";}
+
+    /// \brief Given a machine register, look for the instruction
+    /// which writes it in the current basic block. If found,
+    /// try to replace it with an equivalent LEA instruction.
+    /// If replacement succeeds, then also process the the newly created
+    /// instruction.
+    void  seekLEAFixup(MachineOperand& p, MachineBasicBlock::iterator& I,
+                      MachineFunction::iterator MFI);
+
+    /// \brief Given a memory access or LEA instruction
+    /// whose address mode uses a base and/or index register, look for
+    /// an opportunity to replace the instruction which sets the base or index
+    /// register with an equivalent LEA instruction.
+    void processInstruction(MachineBasicBlock::iterator& I,
+                            MachineFunction::iterator MFI);
+
+    /// \brief Determine if an instruction references a machine register
+    /// and, if so, whether it reads or writes the register.
+    RegUsageState usesRegister(MachineOperand& p,
+                               MachineBasicBlock::iterator I);
+
+    /// \brief Step backwards through a basic block, looking
+    /// for an instruction which writes a register within 
+    /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
+    MachineBasicBlock::iterator searchBackwards(MachineOperand& p,
+                                                MachineBasicBlock::iterator& I,
+                                                MachineFunction::iterator MFI);
+
+    /// \brief if an instruction can be converted to an 
+    /// equivalent LEA, insert the new instruction into the basic block
+    /// and return a pointer to it. Otherwise, return zero.
+    MachineInstr* postRAConvertToLEA(MachineFunction::iterator &MFI,
+                                     MachineBasicBlock::iterator &MBBI) const;
+
+  public:
+    FixupLEAPass() : MachineFunctionPass(ID) {}
+
+    /// \brief Loop over all of the basic blocks,
+    /// replacing instructions by equivalent LEA instructions
+    /// if needed and when possible.
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    MachineFunction *MF;
+    const TargetMachine *TM;
+    const TargetInstrInfo *TII; // Machine instruction info.
+
+  };
+  char FixupLEAPass::ID = 0;
+}
+
+MachineInstr *
+FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
+                                 MachineBasicBlock::iterator &MBBI) const {
+  MachineInstr* MI = MBBI;
+  MachineInstr* NewMI;
+  switch (MI->getOpcode()) {
+  case X86::MOV32rr: 
+  case X86::MOV64rr: {
+    const MachineOperand& Src = MI->getOperand(1);
+    const MachineOperand& Dest = MI->getOperand(0);
+    NewMI = BuildMI(*MF, MI->getDebugLoc(),
+      TII->get( MI->getOpcode() == X86::MOV32rr ? X86::LEA32r : X86::LEA64r))
+      .addOperand(Dest)
+      .addOperand(Src).addImm(1).addReg(0).addImm(0).addReg(0);
+    MFI->insert(MBBI, NewMI);   // Insert the new inst
+    return NewMI;
+  }
+  case X86::ADD64ri32:
+  case X86::ADD64ri8:
+  case X86::ADD64ri32_DB:
+  case X86::ADD64ri8_DB:
+  case X86::ADD32ri:
+  case X86::ADD32ri8:
+  case X86::ADD32ri_DB:
+  case X86::ADD32ri8_DB:
+  case X86::ADD16ri:
+  case X86::ADD16ri8:
+  case X86::ADD16ri_DB:
+  case X86::ADD16ri8_DB:
+    if (!MI->getOperand(2).isImm()) {
+      // convertToThreeAddress will call getImm()
+      // which requires isImm() to be true
+      return 0;
+    }
+  }
+  return TII->convertToThreeAddress(MFI, MBBI, 0);
+}
+
+FunctionPass *llvm::createX86FixupLEAs() {
+  return new FixupLEAPass();
+}
+
+bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
+  MF = &Func;
+  TII = Func.getTarget().getInstrInfo();
+  TM = &MF->getTarget();
+
+  DEBUG(dbgs() << "Start X86FixupLEAs\n";);
+  // Process all basic blocks.
+  for (MachineFunction::iterator I = Func.begin(), E = Func.end(); I != E; ++I)
+    processBasicBlock(Func, I);
+  DEBUG(dbgs() << "End X86FixupLEAs\n";);
+
+  return true;
+}
+
+FixupLEAPass::RegUsageState FixupLEAPass::usesRegister(MachineOperand& p,
+                                MachineBasicBlock::iterator I) {
+  RegUsageState RegUsage = RU_NotUsed;
+  MachineInstr* MI = I;
+
+  for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
+    MachineOperand& opnd = MI->getOperand(i);
+    if (opnd.isReg() && opnd.getReg() == p.getReg()){
+      if (opnd.isDef())
+        return RU_Write;
+      RegUsage = RU_Read;
+    }
+  }
+  return RegUsage;
+}
+
+/// getPreviousInstr - Given a reference to an instruction in a basic
+/// block, return a reference to the previous instruction in the block,
+/// wrapping around to the last instruction of the block if the block
+/// branches to itself.
+static inline bool getPreviousInstr(MachineBasicBlock::iterator& I,
+                                    MachineFunction::iterator MFI) {
+  if (I == MFI->begin()) {
+    if (MFI->isPredecessor(MFI)) {
+      I = --MFI->end();
+      return true;
+    }
+    else
+      return false;
+  }
+  --I;
+  return true;
+}
+
+MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p,
+                                   MachineBasicBlock::iterator& I,
+                                   MachineFunction::iterator MFI) {
+  int InstrDistance = 1;
+  MachineBasicBlock::iterator CurInst;
+  static const int INSTR_DISTANCE_THRESHOLD = 5;
+
+  CurInst = I;
+  bool Found;
+  Found = getPreviousInstr(CurInst, MFI);
+  while( Found && I != CurInst) {
+    if (CurInst->isCall() || CurInst->isInlineAsm())
+      break;
+    if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
+      break; // too far back to make a difference
+    if (usesRegister(p, CurInst) == RU_Write){
+      return CurInst;
+    }
+    InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst);
+    Found = getPreviousInstr(CurInst, MFI);
+  }
+  return 0;
+}
+
+void FixupLEAPass::processInstruction(MachineBasicBlock::iterator& I,
+                                      MachineFunction::iterator MFI) {
+  // Process a load, store, or LEA instruction.
+  MachineInstr *MI = I;
+  int opcode = MI->getOpcode();
+  const MCInstrDesc& Desc = MI->getDesc();
+  int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags, opcode);
+  if (AddrOffset >= 0) {
+    AddrOffset += X86II::getOperandBias(Desc);
+    MachineOperand& p = MI->getOperand(AddrOffset + X86::AddrBaseReg);
+    if (p.isReg() && p.getReg() != X86::ESP) {
+      seekLEAFixup(p, I, MFI);
+    }
+    MachineOperand& q = MI->getOperand(AddrOffset + X86::AddrIndexReg);
+    if (q.isReg() && q.getReg() != X86::ESP) {
+      seekLEAFixup(q, I, MFI);
+    }
+  }
+}
+
+void FixupLEAPass::seekLEAFixup(MachineOperand& p,
+                                MachineBasicBlock::iterator& I,
+                                MachineFunction::iterator MFI) {
+  MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI);
+  if (MBI) {
+    MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI);
+    if (NewMI) {
+      ++NumLEAs;
+      DEBUG(dbgs() << "Candidate to replace:"; MBI->dump(););
+      // now to replace with an equivalent LEA...
+      DEBUG(dbgs() << "Replaced by: "; NewMI->dump(););
+      MFI->erase(MBI);
+      MachineBasicBlock::iterator J =
+                             static_cast<MachineBasicBlock::iterator> (NewMI);
+      processInstruction(J, MFI);
+    }
+  }
+}
+
+bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
+                                     MachineFunction::iterator MFI) {
+
+  for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+    processInstruction(I, MFI);
+  return false;
+}
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 54cbd40..42b4e73 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -369,7 +369,14 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
 /// getCompactUnwindRegNum - Get the compact unwind number for a given
 /// register. The number corresponds to the enum lists in
 /// compact_unwind_encoding.h.
-static int getCompactUnwindRegNum(const uint16_t *CURegs, unsigned Reg) {
+static int getCompactUnwindRegNum(unsigned Reg, bool is64Bit) {
+  static const uint16_t CU32BitRegs[] = {
+    X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
+  };
+  static const uint16_t CU64BitRegs[] = {
+    X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+  };
+  const uint16_t *CURegs = is64Bit ? CU64BitRegs : CU32BitRegs;
   for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
     if (*CURegs == Reg)
       return Idx;
@@ -398,16 +405,8 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
   //     4       3
   //     5       3
   //
-  static const uint16_t CU32BitRegs[] = {
-    X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
-  };
-  static const uint16_t CU64BitRegs[] = {
-    X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
-  };
-  const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
-
   for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
-    int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]);
+    int CUReg = getCompactUnwindRegNum(SavedRegs[i], Is64Bit);
     if (CUReg == -1) return ~0U;
     SavedRegs[i] = CUReg;
   }
@@ -466,14 +465,6 @@ encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
 static uint32_t
 encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
                                       bool Is64Bit) {
-  static const uint16_t CU32BitRegs[] = {
-    X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
-  };
-  static const uint16_t CU64BitRegs[] = {
-    X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
-  };
-  const uint16_t *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
-
   // Encode the registers in the order they were saved, 3-bits per register. The
   // registers are numbered from 1 to CU_NUM_SAVED_REGS.
   uint32_t RegEnc = 0;
@@ -481,7 +472,7 @@ encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
     unsigned Reg = SavedRegs[I];
     if (Reg == 0) continue;
 
-    int CURegNum = getCompactUnwindRegNum(CURegs, Reg);
+    int CURegNum = getCompactUnwindRegNum(Reg, Is64Bit);
     if (CURegNum == -1) return ~0U;
 
     // Encode the 3-bit register number in order, skipping over 3-bits for each
@@ -528,11 +519,17 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
     if (!MI.getFlag(MachineInstr::FrameSetup)) break;
 
     // We don't exect any more prolog instructions.
-    if (ExpectEnd) return 0;
+    if (ExpectEnd) return CU::UNWIND_MODE_DWARF;
 
     if (Opc == PushInstr) {
       // If there are too many saved registers, we cannot use compact encoding.
-      if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0;
+      if (SavedRegIdx >= CU_NUM_SAVED_REGS) return CU::UNWIND_MODE_DWARF;
+
+      unsigned Reg = MI.getOperand(0).getReg();
+      if (Reg == (Is64Bit ? X86::RAX : X86::EAX)) {
+        ExpectEnd = true;
+        continue;
+      }
 
       SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg();
       StackAdjust += OffsetSize;
@@ -542,7 +539,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
       unsigned DstReg = MI.getOperand(0).getReg();
 
       if (DstReg != FramePtr || SrcReg != StackPtr)
-        return 0;
+        return CU::UNWIND_MODE_DWARF;
 
       StackAdjust = 0;
       memset(SavedRegs, 0, sizeof(SavedRegs));
@@ -552,7 +549,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
                Opc == X86::SUB32ri || Opc == X86::SUB32ri8) {
       if (StackSize)
         // We already have a stack size.
-        return 0;
+        return CU::UNWIND_MODE_DWARF;
 
       if (!MI.getOperand(0).isReg() ||
           MI.getOperand(0).getReg() != MI.getOperand(1).getReg() ||
@@ -560,7 +557,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
         // We need this to be a stack adjustment pointer. Something like:
         //
         //   %RSP<def> = SUB64ri8 %RSP, 48
-        return 0;
+        return CU::UNWIND_MODE_DWARF;
 
       StackSize = MI.getOperand(2).getImm() / StackDivide;
       SubtractInstrIdx += InstrOffset;
@@ -574,31 +571,31 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
   if (HasFP) {
     if ((StackAdjust & 0xFF) != StackAdjust)
       // Offset was too big for compact encoding.
-      return 0;
+      return CU::UNWIND_MODE_DWARF;
 
     // Get the encoding of the saved registers when we have a frame pointer.
     uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
-    if (RegEnc == ~0U) return 0;
+    if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
 
-    CompactUnwindEncoding |= 0x01000000;
+    CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
     CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
-    CompactUnwindEncoding |= RegEnc & 0x7FFF;
+    CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
   } else {
     ++StackAdjust;
     uint32_t TotalStackSize = StackAdjust + StackSize;
     if ((TotalStackSize & 0xFF) == TotalStackSize) {
       // Frameless stack with a small stack size.
-      CompactUnwindEncoding |= 0x02000000;
+      CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
 
       // Encode the stack size.
       CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16;
     } else {
       if ((StackAdjust & 0x7) != StackAdjust)
         // The extra stack adjustments are too big for us to handle.
-        return 0;
+        return CU::UNWIND_MODE_DWARF;
 
       // Frameless stack with an offset too large for us to encode compactly.
-      CompactUnwindEncoding |= 0x03000000;
+      CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
 
       // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
       // instruction.
@@ -616,10 +613,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
     uint32_t RegEnc =
       encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx,
                                                Is64Bit);
-    if (RegEnc == ~0U) return 0;
+    if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
 
     // Encode the register encoding.
-    CompactUnwindEncoding |= RegEnc & 0x3FF;
+    CompactUnwindEncoding |=
+      RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
   }
 
   return CompactUnwindEncoding;
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 3f08b9a..6e309d8 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -19,8 +19,35 @@
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
-  class MCSymbol;
-  class X86TargetMachine;
+
+namespace CU {
+
+  /// Compact unwind encoding values.
+  enum CompactUnwindEncodings {
+    /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
+    /// the return address, then [RE]SP is moved to [RE]BP.
+    UNWIND_MODE_BP_FRAME                   = 0x01000000,
+
+    /// A frameless function with a small constant stack size.
+    UNWIND_MODE_STACK_IMMD                 = 0x02000000,
+
+    /// A frameless function with a large constant stack size.
+    UNWIND_MODE_STACK_IND                  = 0x03000000,
+
+    /// No compact unwind encoding is available.
+    UNWIND_MODE_DWARF                      = 0x04000000,
+
+    /// Mask for encoding the frame registers.
+    UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
+
+    /// Mask for encoding the frameless registers.
+    UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
+  };
+
+} // end CU namespace
+
+class MCSymbol;
+class X86TargetMachine;
 
 class X86FrameLowering : public TargetFrameLowering {
   const X86TargetMachine &TM;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 6041669..968b358 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1503,8 +1503,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
   const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
   SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
-                                           MVT::i32, MVT::i32, MVT::Other, Ops,
-                                           array_lengthof(Ops));
+                                           MVT::i32, MVT::i32, MVT::Other, Ops);
   cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
   return ResNode;
 }
@@ -1720,7 +1719,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
       Op = ADD;
       break;
   }
-  
+
   Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val);
   bool isUnOp = !Val.getNode();
   bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
@@ -1772,12 +1771,10 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
   if (isUnOp) {
     SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
-    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
-                                         array_lengthof(Ops)), 0);
+    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
   } else {
     SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
-    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops,
-                                         array_lengthof(Ops)), 0);
+    Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
   }
   cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
   SDValue RetVals[] = { Undef, Ret };
@@ -1971,8 +1968,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
   SDValue Segment = CurDAG->getRegister(0, MVT::i32);
   const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue()), VIdx,
                           Disp, Segment, VMask, Chain};
-  SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
-                                           VTs, Ops, array_lengthof(Ops));
+  SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), VTs, Ops);
   // Node has 2 outputs: VDst and MVT::Other.
   // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
   // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
@@ -2186,7 +2182,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
 
     SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
     SDValue Ops[] = {N1, InFlag};
-    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
+    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
 
     ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
     ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
@@ -2267,16 +2263,14 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
                         InFlag };
       if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
         SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
-                                               array_lengthof(Ops));
+        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
         ResHi = SDValue(CNode, 0);
         ResLo = SDValue(CNode, 1);
         Chain = SDValue(CNode, 2);
         InFlag = SDValue(CNode, 3);
       } else {
         SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops,
-                                               array_lengthof(Ops));
+        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
         Chain = SDValue(CNode, 0);
         InFlag = SDValue(CNode, 1);
       }
@@ -2287,15 +2281,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       SDValue Ops[] = { N1, InFlag };
       if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
         SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
-                                               array_lengthof(Ops));
+        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
         ResHi = SDValue(CNode, 0);
         ResLo = SDValue(CNode, 1);
         InFlag = SDValue(CNode, 2);
       } else {
         SDVTList VTs = CurDAG->getVTList(MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops,
-                                               array_lengthof(Ops));
+        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
         InFlag = SDValue(CNode, 0);
       }
     }
@@ -2343,6 +2335,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
     }
 
+    // Propagate ordering to the last node, for now.
+    CurDAG->AssignOrdering(InFlag.getNode(), CurDAG->GetOrdering(Node));
+
     return NULL;
   }
 
@@ -2409,8 +2404,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
         SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
         Move =
           SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
-                                         MVT::Other, Ops,
-                                         array_lengthof(Ops)), 0);
+                                         MVT::Other, Ops), 0);
         Chain = Move.getValue(1);
         ReplaceUses(N0.getValue(1), Chain);
       } else {
@@ -2441,8 +2435,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
                         InFlag };
       SDNode *CNode =
-        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
-                               array_lengthof(Ops));
+        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
       InFlag = SDValue(CNode, 1);
       // Update the chain.
       ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
@@ -2674,8 +2667,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
     MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
                                                    Node->getDebugLoc(),
-                                                   MVT::i32, MVT::Other, Ops,
-                                                   array_lengthof(Ops));
+                                                   MVT::i32, MVT::Other, Ops);
     Result->setMemRefs(MemOp, MemOp + 2);
 
     ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6934186..f69f5d8 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -163,10 +163,28 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   Subtarget = &TM.getSubtarget<X86Subtarget>();
   X86ScalarSSEf64 = Subtarget->hasSSE2();
   X86ScalarSSEf32 = Subtarget->hasSSE1();
-
   RegInfo = TM.getRegisterInfo();
   TD = getDataLayout();
 
+  resetOperationActions();
+}
+
+void X86TargetLowering::resetOperationActions() {
+  const TargetMachine &TM = getTargetMachine();
+  static bool FirstTimeThrough = true;
+
+  // If none of the target options have changed, then we don't need to reset the
+  // operation actions.
+  if (!FirstTimeThrough && TO == TM.Options) return;
+
+  if (!FirstTimeThrough) {
+    // Reinitialize the actions.
+    initActions();
+    FirstTimeThrough = false;
+  }
+
+  TO = TM.Options;
+
   // Set up the TargetLowering object.
   static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
 
@@ -508,16 +526,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   if (Subtarget->hasSSE1())
     setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
 
-  setOperationAction(ISD::MEMBARRIER    , MVT::Other, Custom);
   setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);
 
-  // On X86 and X86-64, atomic operations are lowered to locked instructions.
-  // Locked instructions, in turn, have implicit fence semantics (all memory
-  // operations are flushed before issuing the locked instruction, and they
-  // are not buffered), so we can fold away the common pattern of
-  // fence-atomic-fence.
-  setShouldFoldAtomicFences(true);
-
   // Expand certain atomics
   for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
     MVT VT = IntVTs[i];
@@ -1785,7 +1795,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
       if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
       SDValue Ops[] = { Chain, InFlag };
       Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
-                                         MVT::Other, MVT::Glue, Ops, 2), 1);
+                                         MVT::Other, MVT::Glue, Ops), 1);
       Val = Chain.getValue(0);
 
       // Round the f80 to the right size, which also moves it to the appropriate
@@ -4404,13 +4414,15 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
     if (Subtarget->hasInt256()) { // AVX2
       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
-      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops,
+                        array_lengthof(Ops));
     } else {
       // 256-bit logic and arithmetic instructions in AVX are all
       // floating-point, no support for integer ops. Emit fp zeroed vectors.
       SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
-      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops,
+                        array_lengthof(Ops));
     }
   } else
     llvm_unreachable("Unexpected vector type");
@@ -4431,7 +4443,8 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
   if (VT.is256BitVector()) {
     if (HasInt256) { // AVX2
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
-      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops,
+                        array_lengthof(Ops));
     } else { // AVX
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
       Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
@@ -5101,7 +5114,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
     SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
     SDValue ResNode =
-        DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, 2, MVT::i64,
+        DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops,
+                                array_lengthof(Ops), MVT::i64,
                                 LDBase->getPointerInfo(),
                                 LDBase->getAlignment(),
                                 false/*isVolatile*/, true/*ReadMem*/,
@@ -7624,10 +7638,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
 
   if (InFlag) {
     SDValue Ops[] = { Chain,  TGA, *InFlag };
-    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3);
+    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops));
   } else {
     SDValue Ops[]  = { Chain, TGA };
-    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2);
+    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops));
   }
 
   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
@@ -7937,7 +7951,7 @@ SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
   }
 
   SDValue Ops[2] = { Lo, Hi };
-  return DAG.getMergeValues(Ops, 2, dl);
+  return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
 }
 
 SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
@@ -8220,8 +8234,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
 
   SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
   SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
-  SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
-                                         MVT::i64, MMO);
+  SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
+                                         array_lengthof(Ops), MVT::i64, MMO);
 
   APInt FF(32, 0x5F800000ULL);
 
@@ -8313,8 +8327,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
     MachineMemOperand *MMO =
       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
                               MachineMemOperand::MOLoad, MemSize, MemSize);
-    Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
-                                    DstTy, MMO);
+    Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops,
+                                    array_lengthof(Ops), DstTy, MMO);
     Chain = Value.getValue(1);
     SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
     StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
@@ -8328,7 +8342,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
     // Build the FP_TO_INT*_IN_MEM
     SDValue Ops[] = { Chain, Value, StackSlot };
     SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
-                                           Ops, 3, DstTy, MMO);
+                                           Ops, array_lengthof(Ops), DstTy,
+                                           MMO);
     return std::make_pair(FIST, StackSlot);
   } else {
     SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
@@ -8340,8 +8355,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
       MVT::i32, eax.getValue(2));
     SDValue Ops[] = { eax, edx };
     SDValue pair = IsReplace
-      ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, 2)
-      : DAG.getMergeValues(Ops, 2, DL);
+      ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, array_lengthof(Ops))
+      : DAG.getMergeValues(Ops, array_lengthof(Ops), DL);
     return std::make_pair(pair, SDValue());
   }
 }
@@ -9165,14 +9180,6 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
   }
 
   if (LHS.getNode()) {
-    // If the LHS is of the form (x ^ -1) then replace the LHS with x and flip
-    // the condition code later.
-    bool Invert = false;
-    if (LHS.getOpcode() == ISD::XOR && isAllOnes(LHS.getOperand(1))) {
-      Invert = true;
-      LHS = LHS.getOperand(0);
-    }
-
     // If LHS is i8, promote it to i32 with any_extend.  There is no i8 BT
     // instruction.  Since the shift amount is in-range-or-undefined, we know
     // that doing a bittest on the i32 value is ok.  We extend to i32 because
@@ -9189,9 +9196,6 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
 
     SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
     X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
-    // Flip the condition if the LHS was a not instruction
-    if (Invert)
-      Cond = X86::GetOppositeBranchCondition(Cond);
     return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
                        DAG.getConstant(Cond, MVT::i8), BT);
   }
@@ -9335,14 +9339,54 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
   // Check that the operation in question is available (most are plain SSE2,
   // but PCMPGTQ and PCMPEQQ have different requirements).
   if (VT == MVT::v2i64) {
-    if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
-      return SDValue();
+    if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) {
+      assert(Subtarget->hasSSE2() && "Don't know how to lower!");
+
+      // First cast everything to the right type.
+      Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
+      Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+
+      // Since SSE has no unsigned integer comparisons, we need to flip the sign
+      // bits of the inputs before performing those operations. The lower
+      // compare is always unsigned.
+      SDValue SB;
+      if (FlipSigns) {
+        SB = DAG.getConstant(0x80000000U, MVT::v4i32);
+      } else {
+        SDValue Sign = DAG.getConstant(0x80000000U, MVT::i32);
+        SDValue Zero = DAG.getConstant(0x00000000U, MVT::i32);
+        SB = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                         Sign, Zero, Sign, Zero);
+      }
+      Op0 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op0, SB);
+      Op1 = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Op1, SB);
+
+      // Emulate PCMPGTQ with (hi1 > hi2) | ((hi1 == hi2) & (lo1 > lo2))
+      SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
+      SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1);
+
+      // Create masks for only the low parts/high parts of the 64 bit integers.
+      const int MaskHi[] = { 1, 1, 3, 3 };
+      const int MaskLo[] = { 0, 0, 2, 2 };
+      SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi);
+      SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
+      SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi);
+
+      SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo);
+      Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi);
+
+      if (Invert)
+        Result = DAG.getNOT(dl, Result, MVT::v4i32);
+
+      return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+    }
+
     if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
       // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
       // pcmpeqd + pshufd + pand.
       assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
 
-      // First cast everything to the right type,
+      // First cast everything to the right type.
       Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
       Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
 
@@ -9361,17 +9405,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
     }
   }
 
-  // Since SSE has no unsigned integer comparisons, we need to flip  the sign
+  // Since SSE has no unsigned integer comparisons, we need to flip the sign
   // bits of the inputs before performing those operations.
   if (FlipSigns) {
     EVT EltVT = VT.getVectorElementType();
-    SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
-                                      EltVT);
-    std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
-    SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
-                                    SignBits.size());
-    Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
-    Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
+    SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), VT);
+    Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB);
+    Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB);
   }
 
   SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
@@ -10937,7 +10977,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
                       SDValue(Result.getNode(), 1) };
     SDValue isValid = DAG.getNode(X86ISD::CMOV, dl,
                                   DAG.getVTList(Op->getValueType(1), MVT::Glue),
-                                  Ops, 4);
+                                  Ops, array_lengthof(Ops));
 
     // Return { result, isValid, chain }.
     return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
@@ -10990,7 +11030,10 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
+  unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
+  assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
+          (FrameReg == X86::EBP && VT == MVT::i32)) &&
+         "Invalid Frame Register!");
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   while (Depth--)
     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
@@ -11010,21 +11053,23 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   SDValue Handler   = Op.getOperand(2);
   DebugLoc dl       = Op.getDebugLoc();
 
-  SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
-                                     Subtarget->is64Bit() ? X86::RBP : X86::EBP,
-                                     getPointerTy());
-  unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
-
-  SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
-                                  DAG.getIntPtrConstant(RegInfo->getSlotSize()));
-  StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
+  EVT PtrVT = getPointerTy();
+  unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
+  assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
+          (FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
+         "Invalid Frame Register!");
+  SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
+  unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
+
+  SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
+                                 DAG.getIntPtrConstant(RegInfo->getSlotSize()));
+  StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset);
   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
                        false, false, 0);
   Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
 
-  return DAG.getNode(X86ISD::EH_RETURN, dl,
-                     MVT::Other,
-                     Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
+  return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain,
+                     DAG.getRegister(StoreAddrReg, PtrVT));
 }
 
 SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
@@ -11235,7 +11280,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
   SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
                                           DAG.getVTList(MVT::Other),
-                                          Ops, 2, MVT::i16, MMO);
+                                          Ops, array_lengthof(Ops), MVT::i16,
+                                          MMO);
 
   // Load FP Control Word from stack slot
   SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
@@ -12075,52 +12121,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
   }
 }
 
-static SDValue LowerMEMBARRIER(SDValue Op, const X86Subtarget *Subtarget,
-                              SelectionDAG &DAG) {
-  DebugLoc dl = Op.getDebugLoc();
-
-  // Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
-  // There isn't any reason to disable it if the target processor supports it.
-  if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
-    SDValue Chain = Op.getOperand(0);
-    SDValue Zero = DAG.getConstant(0, MVT::i32);
-    SDValue Ops[] = {
-      DAG.getRegister(X86::ESP, MVT::i32), // Base
-      DAG.getTargetConstant(1, MVT::i8),   // Scale
-      DAG.getRegister(0, MVT::i32),        // Index
-      DAG.getTargetConstant(0, MVT::i32),  // Disp
-      DAG.getRegister(0, MVT::i32),        // Segment.
-      Zero,
-      Chain
-    };
-    SDNode *Res =
-      DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
-                          array_lengthof(Ops));
-    return SDValue(Res, 0);
-  }
-
-  unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
-  if (!isDev)
-    return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
-
-  unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-  unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
-  unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
-  unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
-
-  // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
-  if (!Op1 && !Op2 && !Op3 && Op4)
-    return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
-
-  // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-  if (Op1 && !Op2 && !Op3 && !Op4)
-    return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
-
-  // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
-  //           (MFENCE)>;
-  return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
-}
-
 static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
                                  SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
@@ -12149,9 +12149,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget *Subtarget,
       Zero,
       Chain
     };
-    SDNode *Res =
-      DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
-                         array_lengthof(Ops));
+    SDNode *Res = DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops);
     return SDValue(Res, 0);
   }
 
@@ -12185,7 +12183,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
   MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
   SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
-                                           Ops, 5, T, MMO);
+                                           Ops, array_lengthof(Ops), T, MMO);
   SDValue cpOut =
     DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
   return cpOut;
@@ -12207,7 +12205,7 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
     DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
     rdx.getValue(1)
   };
-  return DAG.getMergeValues(Ops, 2, dl);
+  return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
 }
 
 SDValue X86TargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
@@ -12301,7 +12299,8 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
   assert(Subtarget->isTargetDarwin() && Subtarget->is64Bit());
 
   // For MacOSX, we want to call an alternative entry point: __sincos_stret,
-  // which returns the values in two XMM registers.
+  // which returns the values as { float, float } (in XMM0) or
+  // { double, double } (which is returned in XMM0, XMM1).
   DebugLoc dl = Op.getDebugLoc();
   SDValue Arg = Op.getOperand(0);
   EVT ArgVT = Arg.getValueType();
@@ -12316,14 +12315,16 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
   Entry.isZExt = false;
   Args.push_back(Entry);
 
+  bool isF64 = ArgVT == MVT::f64;
   // Only optimize x86_64 for now. i386 is a bit messy. For f32,
   // the small struct {f32, f32} is returned in (eax, edx). For f64,
   // the results are returned via SRet in memory.
-  const char *LibcallName = (ArgVT == MVT::f64)
-    ? "__sincos_stret" : "__sincosf_stret";
+  const char *LibcallName =  isF64 ? "__sincos_stret" : "__sincosf_stret";
   SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
 
-  StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+  Type *RetTy = isF64
+    ? (Type*)StructType::get(ArgTy, ArgTy, NULL)
+    : (Type*)VectorType::get(ArgTy, 4);
   TargetLowering::
     CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
                          false, false, false, false, 0,
@@ -12331,7 +12332,18 @@ SDValue X86TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
                          /*doesNotRet=*/false, /*isReturnValueUsed*/true,
                          Callee, Args, DAG, dl);
   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
-  return CallResult.first;
+
+  if (isF64)
+    // Returned in xmm0 and xmm1.
+    return CallResult.first;
+
+  // Returned in bits 0:31 and 32:64 xmm0.
+  SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
+                               CallResult.first, DAG.getIntPtrConstant(0));
+  SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT,
+                               CallResult.first, DAG.getIntPtrConstant(1));
+  SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
+  return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
 }
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
@@ -12340,7 +12352,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Should not custom lower this!");
   case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op,DAG);
-  case ISD::MEMBARRIER:         return LowerMEMBARRIER(Op, Subtarget, DAG);
   case ISD::ATOMIC_FENCE:       return LowerATOMIC_FENCE(Op, Subtarget, DAG);
   case ISD::ATOMIC_CMP_SWAP:    return LowerCMP_SWAP(Op, Subtarget, DAG);
   case ISD::ATOMIC_LOAD_SUB:    return LowerLOAD_SUB(Op,DAG);
@@ -12457,7 +12468,7 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
   SDValue Ops[] = { Chain, In1, In2L, In2H };
   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
   SDValue Result =
-    DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64,
+    DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, array_lengthof(Ops), MVT::i64,
                             cast<MemSDNode>(Node)->getMemOperand());
   SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
   Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
@@ -12537,7 +12548,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                                      eax.getValue(2));
     // Use a buildpair to merge the two 32-bit values into a 64-bit one.
     SDValue Ops[] = { eax, edx };
-    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2));
+    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops,
+                                  array_lengthof(Ops)));
     Results.push_back(edx.getValue(1));
     return;
   }
@@ -12576,7 +12588,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG :
                                   X86ISD::LCMPXCHG8_DAG;
     SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys,
-                                             Ops, 3, T, MMO);
+                                             Ops, array_lengthof(Ops), T, MMO);
     SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
                                         Regs64bit ? X86::RAX : X86::EAX,
                                         HalfT, Result.getValue(1));
@@ -15063,7 +15075,8 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
         SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
         SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
         SDValue ResNode =
-          DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2,
+          DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
+                                  array_lengthof(Ops),
                                   Ld->getMemoryVT(),
                                   Ld->getPointerInfo(),
                                   Ld->getAlignment(),
@@ -15755,6 +15768,51 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
     if (unsigned Op = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget))
       return DAG.getNode(Op, DL, N->getValueType(0), LHS, RHS);
 
+  // Simplify vector selection if the selector will be produced by CMPP*/PCMP*.
+  if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT &&
+      Cond.getOpcode() == ISD::SETCC) {
+
+    assert(Cond.getValueType().isVector() &&
+           "vector select expects a vector selector!");
+
+    EVT IntVT = Cond.getValueType();
+    bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
+    bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+    if (!TValIsAllOnes && !FValIsAllZeros) {
+      // Try invert the condition if true value is not all 1s and false value
+      // is not all 0s.
+      bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
+      bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
+
+      if (TValIsAllZeros || FValIsAllOnes) {
+        SDValue CC = Cond.getOperand(2);
+        ISD::CondCode NewCC =
+          ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+                               Cond.getOperand(0).getValueType().isInteger());
+        Cond = DAG.getSetCC(DL, IntVT, Cond.getOperand(0), Cond.getOperand(1), NewCC);
+        std::swap(LHS, RHS);
+        TValIsAllOnes = FValIsAllOnes;
+        FValIsAllZeros = TValIsAllZeros;
+      }
+    }
+
+    if (TValIsAllOnes || FValIsAllZeros) {
+      SDValue Ret;
+
+      if (TValIsAllOnes && FValIsAllZeros)
+        Ret = Cond;
+      else if (TValIsAllOnes)
+        Ret = DAG.getNode(ISD::OR, DL, IntVT, Cond,
+                          DAG.getNode(ISD::BITCAST, DL, IntVT, RHS));
+      else if (FValIsAllZeros)
+        Ret = DAG.getNode(ISD::AND, DL, IntVT, Cond,
+                          DAG.getNode(ISD::BITCAST, DL, IntVT, LHS));
+
+      return DAG.getNode(ISD::BITCAST, DL, VT, Ret);
+    }
+  }
+
   // If we know that this node is legal then we know that it is going to be
   // matched by one of the SSE/AVX BLEND instructions. These instructions only
   // depend on the highest bit in each word. Try to use SimplifyDemandedBits
@@ -15815,6 +15873,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
   SDValue SetCC;
   const ConstantSDNode* C = 0;
   bool needOppositeCond = (CC == X86::COND_E);
+  bool checkAgainstTrue = false; // Is it a comparison against 1?
 
   if ((C = dyn_cast<ConstantSDNode>(Op1)))
     SetCC = Op2;
@@ -15823,18 +15882,46 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
   else // Quit if all operands are not constants.
     return SDValue();
 
-  if (C->getZExtValue() == 1)
+  if (C->getZExtValue() == 1) {
     needOppositeCond = !needOppositeCond;
-  else if (C->getZExtValue() != 0)
+    checkAgainstTrue = true;
+  } else if (C->getZExtValue() != 0)
     // Quit if the constant is neither 0 or 1.
     return SDValue();
 
-  // Skip 'zext' or 'trunc' node.
-  if (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
-      SetCC.getOpcode() == ISD::TRUNCATE)
-    SetCC = SetCC.getOperand(0);
+  bool truncatedToBoolWithAnd = false;
+  // Skip (zext $x), (trunc $x), or (and $x, 1) node.
+  while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
+         SetCC.getOpcode() == ISD::TRUNCATE ||
+         SetCC.getOpcode() == ISD::AND) {
+    if (SetCC.getOpcode() == ISD::AND) {
+      int OpIdx = -1;
+      ConstantSDNode *CS;
+      if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) &&
+          CS->getZExtValue() == 1)
+        OpIdx = 1;
+      if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) &&
+          CS->getZExtValue() == 1)
+        OpIdx = 0;
+      if (OpIdx == -1)
+        break;
+      SetCC = SetCC.getOperand(OpIdx);
+      truncatedToBoolWithAnd = true;
+    } else
+      SetCC = SetCC.getOperand(0);
+  }
 
   switch (SetCC.getOpcode()) {
+  case X86ISD::SETCC_CARRY:
+    // Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to
+    // simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1,
+    // i.e. it's a comparison against true but the result of SETCC_CARRY is not
+    // truncated to i1 using 'and'.
+    if (checkAgainstTrue && !truncatedToBoolWithAnd)
+      break;
+    assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B &&
+           "Invalid use of SETCC_CARRY!");
+    // FALL THROUGH
   case X86ISD::SETCC:
     // Set the condition code or opposite one if necessary.
     CC = X86::CondCode(SetCC.getConstantOperandVal(0));
@@ -16165,8 +16252,7 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
-///                       when possible.
+/// PerformShiftCombine - Combine shifts.
 static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    const X86Subtarget *Subtarget) {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 5725f7a..2727e22 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -723,6 +723,9 @@ namespace llvm {
     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
                       SelectionDAG &DAG) const;
 
+    /// \brief Reset the operation actions based on target options.
+    virtual void resetOperationActions();
+
   protected:
     std::pair<const TargetRegisterClass*, uint8_t>
     findRepresentativeClass(MVT VT) const;
@@ -734,6 +737,10 @@ namespace llvm {
     const X86RegisterInfo *RegInfo;
     const DataLayout *TD;
 
+    /// Used to store the TargetOptions so that we don't waste time resetting
+    /// the operation actions unless we have to.
+    TargetOptions TO;
+
     /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
     /// floating point ops.
     /// When SSE is available, use it for f32 operations.
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 0ef9491..a71e024 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -35,25 +35,27 @@ def MRM_C3 : Format<35>;
 def MRM_C4 : Format<36>;
 def MRM_C8 : Format<37>;
 def MRM_C9 : Format<38>;
-def MRM_E8 : Format<39>;
-def MRM_F0 : Format<40>;
-def MRM_F8 : Format<41>;
-def MRM_F9 : Format<42>;
+def MRM_CA : Format<39>;
+def MRM_CB : Format<40>;
+def MRM_E8 : Format<41>;
+def MRM_F0 : Format<42>;
 def RawFrmImm8 : Format<43>;
 def RawFrmImm16 : Format<44>;
-def MRM_D0 : Format<45>;
-def MRM_D1 : Format<46>;
-def MRM_D4 : Format<47>;
-def MRM_D5 : Format<48>;
-def MRM_D6 : Format<49>;
-def MRM_D8 : Format<50>;
-def MRM_D9 : Format<51>;
-def MRM_DA : Format<52>;
-def MRM_DB : Format<53>;
-def MRM_DC : Format<54>;
-def MRM_DD : Format<55>;
-def MRM_DE : Format<56>;
-def MRM_DF : Format<57>;
+def MRM_F8 : Format<45>;
+def MRM_F9 : Format<46>;
+def MRM_D0 : Format<47>;
+def MRM_D1 : Format<48>;
+def MRM_D4 : Format<49>;
+def MRM_D5 : Format<50>;
+def MRM_D6 : Format<51>;
+def MRM_D8 : Format<52>;
+def MRM_D9 : Format<53>;
+def MRM_DA : Format<54>;
+def MRM_DB : Format<55>;
+def MRM_DC : Format<56>;
+def MRM_DD : Format<57>;
+def MRM_DE : Format<58>;
+def MRM_DF : Format<59>;
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 7ba542c..7c0423f 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -4281,7 +4281,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
     bool isAligned = (*MMOs.first) &&
                      (*MMOs.first)->getAlignment() >= Alignment;
     Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
-                              VT, MVT::Other, &AddrOps[0], AddrOps.size());
+                              VT, MVT::Other, AddrOps);
     NewNodes.push_back(Load);
 
     // Preserve memory reference information.
@@ -4303,8 +4303,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
   if (Load)
     BeforeOps.push_back(SDValue(Load, 0));
   std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
-  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0],
-                                      BeforeOps.size());
+  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
   NewNodes.push_back(NewNode);
 
   // Emit the store instruction.
@@ -4326,8 +4325,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
                      (*MMOs.first)->getAlignment() >= Alignment;
     SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
                                                          isAligned, TM),
-                                       dl, MVT::Other,
-                                       &AddrOps[0], AddrOps.size());
+                                       dl, MVT::Other, AddrOps);
     NewNodes.push_back(Store);
 
     // Preserve memory reference information.
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index ccc1aa2..3380d8c 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1833,90 +1833,90 @@ include "X86InstrCompiler.td"
 // Assembler Mnemonic Aliases
 //===----------------------------------------------------------------------===//
 
-def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"call", "calll", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"call", "callq", "att">, Requires<[In64BitMode]>;
 
-def : MnemonicAlias<"cbw",  "cbtw">;
-def : MnemonicAlias<"cwde", "cwtl">;
-def : MnemonicAlias<"cwd",  "cwtd">;
-def : MnemonicAlias<"cdq", "cltd">;
-def : MnemonicAlias<"cdqe", "cltq">;
-def : MnemonicAlias<"cqo", "cqto">;
+def : MnemonicAlias<"cbw",  "cbtw", "att">;
+def : MnemonicAlias<"cwde", "cwtl", "att">;
+def : MnemonicAlias<"cwd",  "cwtd", "att">;
+def : MnemonicAlias<"cdq",  "cltd", "att">;
+def : MnemonicAlias<"cdqe", "cltq", "att">;
+def : MnemonicAlias<"cqo",  "cqto", "att">;
 
 // lret maps to lretl, it is not ambiguous with lretq.
-def : MnemonicAlias<"lret", "lretl">;
+def : MnemonicAlias<"lret", "lretl", "att">;
 
-def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"leavel", "leave", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>;
 
-def : MnemonicAlias<"loopz", "loope">;
-def : MnemonicAlias<"loopnz", "loopne">;
+def : MnemonicAlias<"loopz",  "loope",  "att">;
+def : MnemonicAlias<"loopnz", "loopne", "att">;
 
-def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"popfd",  "popfl">;
+def : MnemonicAlias<"pop",   "popl",  "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pop",   "popq",  "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf",  "popfl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popf",  "popfq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popfd", "popfl", "att">;
 
 // FIXME: This is wrong for "push reg".  "push %bx" should turn into pushw in
 // all modes.  However: "push (addr)" and "push $42" should default to
 // pushl/pushq depending on the current mode.  Similar for "pop %bx"
-def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"pushfd", "pushfl">;
+def : MnemonicAlias<"push",   "pushl",  "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"push",   "pushq",  "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf",  "pushfl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushf",  "pushfq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushfd", "pushfl", "att">;
 
-def : MnemonicAlias<"repe", "rep">;
-def : MnemonicAlias<"repz", "rep">;
-def : MnemonicAlias<"repnz", "repne">;
+def : MnemonicAlias<"repe",  "rep",   "att">;
+def : MnemonicAlias<"repz",  "rep",   "att">;
+def : MnemonicAlias<"repnz", "repne", "att">;
 
-def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"retl", "ret", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retq", "ret", "att">, Requires<[In64BitMode]>;
 
-def : MnemonicAlias<"salb", "shlb">;
-def : MnemonicAlias<"salw", "shlw">;
-def : MnemonicAlias<"sall", "shll">;
-def : MnemonicAlias<"salq", "shlq">;
+def : MnemonicAlias<"salb", "shlb", "att">;
+def : MnemonicAlias<"salw", "shlw", "att">;
+def : MnemonicAlias<"sall", "shll", "att">;
+def : MnemonicAlias<"salq", "shlq", "att">;
 
-def : MnemonicAlias<"smovb", "movsb">;
-def : MnemonicAlias<"smovw", "movsw">;
-def : MnemonicAlias<"smovl", "movsl">;
-def : MnemonicAlias<"smovq", "movsq">;
+def : MnemonicAlias<"smovb", "movsb", "att">;
+def : MnemonicAlias<"smovw", "movsw", "att">;
+def : MnemonicAlias<"smovl", "movsl", "att">;
+def : MnemonicAlias<"smovq", "movsq", "att">;
 
-def : MnemonicAlias<"ud2a", "ud2">;
-def : MnemonicAlias<"verrw", "verr">;
+def : MnemonicAlias<"ud2a",  "ud2",  "att">;
+def : MnemonicAlias<"verrw", "verr", "att">;
 
 // System instruction aliases.
-def : MnemonicAlias<"iret", "iretl">;
-def : MnemonicAlias<"sysret", "sysretl">;
-def : MnemonicAlias<"sysexit", "sysexitl">;
+def : MnemonicAlias<"iret",    "iretl",    "att">;
+def : MnemonicAlias<"sysret",  "sysretl",  "att">;
+def : MnemonicAlias<"sysexit", "sysexitl", "att">;
 
-def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lgdtl", "lgdt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lgdtq", "lgdt", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lidtl", "lidt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidtq", "lidt", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sgdtl", "sgdt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdtq", "sgdt", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sidtl", "sidt", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidtq", "sidt", "att">, Requires<[In64BitMode]>;
 
 
 // Floating point stack aliases.
-def : MnemonicAlias<"fcmovz",   "fcmove">;
-def : MnemonicAlias<"fcmova",   "fcmovnbe">;
-def : MnemonicAlias<"fcmovnae", "fcmovb">;
-def : MnemonicAlias<"fcmovna",  "fcmovbe">;
-def : MnemonicAlias<"fcmovae",  "fcmovnb">;
-def : MnemonicAlias<"fcomip",   "fcompi">;
-def : MnemonicAlias<"fildq",    "fildll">;
-def : MnemonicAlias<"fistpq",   "fistpll">;
-def : MnemonicAlias<"fisttpq",  "fisttpll">;
-def : MnemonicAlias<"fldcww",   "fldcw">;
-def : MnemonicAlias<"fnstcww", "fnstcw">;
-def : MnemonicAlias<"fnstsww", "fnstsw">;
-def : MnemonicAlias<"fucomip",  "fucompi">;
-def : MnemonicAlias<"fwait",    "wait">;
+def : MnemonicAlias<"fcmovz",   "fcmove",   "att">;
+def : MnemonicAlias<"fcmova",   "fcmovnbe", "att">;
+def : MnemonicAlias<"fcmovnae", "fcmovb",   "att">;
+def : MnemonicAlias<"fcmovna",  "fcmovbe",  "att">;
+def : MnemonicAlias<"fcmovae",  "fcmovnb",  "att">;
+def : MnemonicAlias<"fcomip",   "fcompi",   "att">;
+def : MnemonicAlias<"fildq",    "fildll",   "att">;
+def : MnemonicAlias<"fistpq",   "fistpll",  "att">;
+def : MnemonicAlias<"fisttpq",  "fisttpll", "att">;
+def : MnemonicAlias<"fldcww",   "fldcw",    "att">;
+def : MnemonicAlias<"fnstcww",  "fnstcw",   "att">;
+def : MnemonicAlias<"fnstsww",  "fnstsw",   "att">;
+def : MnemonicAlias<"fucomip",  "fucompi",  "att">;
+def : MnemonicAlias<"fwait",    "wait",     "att">;
 
 
 class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond>
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 3842387..cce938b 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4462,12 +4462,12 @@ def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
 // Move Packed Doubleword Int first element to Doubleword Int
 //
 let SchedRW = [WriteMove] in {
-def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
-                          "vmov{d|q}\t{$src, $dst|$dst, $src}",
+def VMOVPQIto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+                          "mov{d|q}\t{$src, $dst|$dst, $src}",
                           [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
                                                            (iPTR 0)))],
                                                            IIC_SSE_MOVD_ToGP>,
-                      TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>;
+                      VEX;
 
 def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                         "mov{d|q}\t{$src, $dst|$dst, $src}",
@@ -5094,6 +5094,16 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
                     Sched<[WriteVecALULd]>;
 }
 
+// Helper fragments to match sext vXi1 to vXiY.
+def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
+                                               VR128:$src))>;
+def v8i1sextv8i16  : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i32 15)))>;
+def v4i1sextv4i32  : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i32 31)))>;
+def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
+                                               VR256:$src))>;
+def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i32 15)))>;
+def v8i1sextv8i32  : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i32 31)))>;
+
 let Predicates = [HasAVX] in {
   defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb",
                                   int_x86_ssse3_pabs_b_128>, VEX;
@@ -5101,6 +5111,19 @@ let Predicates = [HasAVX] in {
                                   int_x86_ssse3_pabs_w_128>, VEX;
   defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd",
                                   int_x86_ssse3_pabs_d_128>, VEX;
+
+  def : Pat<(xor
+            (bc_v2i64 (v16i1sextv16i8)),
+            (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
+            (VPABSBrr128 VR128:$src)>;
+  def : Pat<(xor
+            (bc_v2i64 (v8i1sextv8i16)),
+            (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
+            (VPABSWrr128 VR128:$src)>;
+  def : Pat<(xor
+            (bc_v2i64 (v4i1sextv4i32)),
+            (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
+            (VPABSDrr128 VR128:$src)>;
 }
 
 let Predicates = [HasAVX2] in {
@@ -5110,6 +5133,19 @@ let Predicates = [HasAVX2] in {
                                     int_x86_avx2_pabs_w>, VEX, VEX_L;
   defm VPABSD  : SS3I_unop_rm_int_y<0x1E, "vpabsd",
                                     int_x86_avx2_pabs_d>, VEX, VEX_L;
+
+  def : Pat<(xor
+            (bc_v4i64 (v32i1sextv32i8)),
+            (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
+            (VPABSBrr256 VR256:$src)>;
+  def : Pat<(xor
+            (bc_v4i64 (v16i1sextv16i16)),
+            (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
+            (VPABSWrr256 VR256:$src)>;
+  def : Pat<(xor
+            (bc_v4i64 (v8i1sextv8i32)),
+            (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
+            (VPABSDrr256 VR256:$src)>;
 }
 
 defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
@@ -5119,6 +5155,21 @@ defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
 defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
                               int_x86_ssse3_pabs_d_128>;
 
+let Predicates = [HasSSSE3] in {
+  def : Pat<(xor
+            (bc_v2i64 (v16i1sextv16i8)),
+            (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
+            (PABSBrr128 VR128:$src)>;
+  def : Pat<(xor
+            (bc_v2i64 (v8i1sextv8i16)),
+            (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
+            (PABSWrr128 VR128:$src)>;
+  def : Pat<(xor
+            (bc_v2i64 (v4i1sextv4i32)),
+            (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
+            (PABSDrr128 VR128:$src)>;
+}
+
 //===---------------------------------------------------------------------===//
 // SSSE3 - Packed Binary Operator Instructions
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index 5b6298b..89c1a68 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -34,7 +34,7 @@ def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
 def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
                    "shl{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
-                   
+
 let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
 def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
                    "shl{w}\t{$src2, $dst|$dst, $src2}",
@@ -43,7 +43,7 @@ def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
 def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
                    "shl{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>;
-def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst), 
+def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst),
                     (ins GR64:$src1, i8imm:$src2),
                     "shl{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))],
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 053417c..bab3cdd 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -449,15 +449,15 @@ let Uses = [RDX, RAX] in {
   def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
                "xsave\t$dst", []>, TB;
   def XSAVE64 : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
-                 "xsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+                 "xsave{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
   def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
                "xrstor\t$dst", []>, TB;
   def XRSTOR64 : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
-                 "xrstorq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+                 "xrstor{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
   def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
                   "xsaveopt\t$dst", []>, TB;
   def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
-                    "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+                    "xsaveopt{q|64}\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
 }
 } // SchedRW
 
@@ -515,8 +515,15 @@ let Predicates = [HasFSGSBase, In64BitMode] in {
 //===----------------------------------------------------------------------===//
 // INVPCID Instruction
 def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
-                "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+                "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
                 Requires<[In32BitMode]>;
 def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
-                "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+                "invpcid\t{$src2, $src1|$src1, $src2}", []>, OpSize, T8,
                 Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// SMAP Instruction
+let Defs = [EFLAGS], Uses = [EFLAGS] in {
+  def CLAC : I<0x01, MRM_CA, (outs), (ins), "clac", []>, TB;
+  def STAC : I<0x01, MRM_CB, (outs), (ins), "stac", []>, TB;
+}
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index 7de6791..84c9203 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -18,7 +18,7 @@ def HaswellModel : SchedMachineModel {
   let IssueWidth = 4;
   let MinLatency = 0; // 0 = Out-of-order execution.
   let LoadLatency = 4;
-  let ILPWindow = 40;
+  let ILPWindow = 30;
   let MispredictPenalty = 16;
 }
 
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index 74d5f1b..b36b3ad 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -19,7 +19,7 @@ def SandyBridgeModel : SchedMachineModel {
   let IssueWidth = 4;
   let MinLatency = 0; // 0 = Out-of-order execution.
   let LoadLatency = 4;
-  let ILPWindow = 30;
+  let ILPWindow = 20;
   let MispredictPenalty = 16;
 }
 
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 14619b6..74da2a9 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -170,6 +170,26 @@ bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
   return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
 }
 
+static bool OSHasAVXSupport() {
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+    || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+#if defined(__GNUC__)
+  // Check xgetbv; this uses a .byte sequence instead of the instruction
+  // directly because older assemblers do not include support for xgetbv and
+  // there is no easy way to conditionally compile based on the assembler used.
+  int rEAX, rEDX;
+  __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
+#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
+  unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
+#else
+  int rEAX = 0; // Ensures we return false
+#endif
+  return (rEAX & 6) == 6;
+#else
+  return false;
+#endif
+}
+
 void X86Subtarget::AutoDetectSubtargetFeatures() {
   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
   unsigned MaxLevel;
@@ -192,7 +212,9 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
   if ((ECX >> 9)  & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);}
   if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);}
   if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);}
-  if ((ECX >> 28) & 1) { X86SSELevel = AVX;   ToggleFeature(X86::FeatureAVX); }
+  if (((ECX >> 27) & 1) && ((ECX >> 28) & 1) && OSHasAVXSupport()) {
+    X86SSELevel = AVX;   ToggleFeature(X86::FeatureAVX);
+  }
 
   bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
   bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
@@ -467,6 +489,7 @@ void X86Subtarget::initializeEnvironment() {
   PostRAScheduler = false;
   PadShortFunctions = false;
   CallRegIndirect = false;
+  LEAUsesAG = false;
   stackAlignment = 4;
   // FIXME: this is a known good value for Yonah. How about others?
   MaxInlineSizeThreshold = 128;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 6fbdb1d..66832b9 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -165,6 +165,9 @@ protected:
   /// CallRegIndirect - True if the Calls with memory reference should be converted
   /// to a register-based indirect call.
   bool CallRegIndirect;
+  /// LEAUsesAG - True if the LEA instruction inputs have to be ready at
+  ///             address generation (AG) time.
+  bool LEAUsesAG;
 
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
@@ -278,6 +281,7 @@ public:
   bool hasSlowDivide() const { return HasSlowDivide; }
   bool padShortFunctions() const { return PadShortFunctions; }
   bool callRegIndirect() const { return CallRegIndirect; }
+  bool LEAusesAG() const { return LEAUsesAG; }
 
   bool isAtom() const { return X86ProcFamily == IntelAtom; }
 
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 8aa58a2..00fa47f 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -215,6 +215,11 @@ bool X86PassConfig::addPreEmitPass() {
     addPass(createX86PadShortFunctions());
     ShouldPrint = true;
   }
+  if (getOptLevel() != CodeGenOpt::None &&
+      getX86Subtarget().LEAusesAG()){
+    addPass(createX86FixupLEAs());
+    ShouldPrint = true;
+  }
 
   return ShouldPrint;
 }
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index a98c699..eba9d78 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -334,9 +334,44 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
+  std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
+  std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
+
+  static const TypeConversionCostTblEntry<MVT> SSE2ConvTbl[] = {
+    // These are somewhat magic numbers justified by looking at the output of
+    // Intel's IACA, running some kernels and making sure when we take
+    // legalization into account the throughput will be overestimated.
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+    // There are faster sequences for float conversions.
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+  };
+
+  if (ST->hasSSE2() && !ST->hasAVX()) {
+    int Idx = ConvertCostTableLookup<MVT>(SSE2ConvTbl,
+                                          array_lengthof(SSE2ConvTbl),
+                                          ISD, LTDest.second, LTSrc.second);
+    if (Idx != -1)
+      return LTSrc.first * SSE2ConvTbl[Idx].Cost;
+  }
+
   EVT SrcTy = TLI->getValueType(Src);
   EVT DstTy = TLI->getValueType(Dst);
 
+  // The function getSimpleVT only handles simple value types.
   if (!SrcTy.isSimple() || !DstTy.isSimple())
     return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
 
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 099ad39..d5bfddc 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_target(XCoreCodeGen
   XCoreInstrInfo.cpp
   XCoreISelDAGToDAG.cpp
   XCoreISelLowering.cpp
+  XCoreLowerThreadLocal.cpp
   XCoreMachineFunctionInfo.cpp
   XCoreMCInstLower.cpp
   XCoreRegisterInfo.cpp
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 7b99967..a2ae40c 100644
--- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -97,8 +97,8 @@ static DecodeStatus DecodeRRegsRegisterClass(MCInst &Inst,
 static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
                                       uint64_t Address, const void *Decoder);
 
-static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
-                                       uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeNegImmOperand(MCInst &Inst, unsigned Val,
+                                        uint64_t Address, const void *Decoder);
 
 static DecodeStatus Decode2RInstruction(MCInst &Inst,
                                         unsigned Insn,
@@ -242,10 +242,9 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeMEMiiOperand(MCInst &Inst, unsigned Val,
-                                       uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(Val));
-  Inst.addOperand(MCOperand::CreateImm(0));
+static DecodeStatus DecodeNegImmOperand(MCInst &Inst, unsigned Val,
+                                        uint64_t Address, const void *Decoder) {
+  Inst.addOperand(MCOperand::CreateImm(-(int64_t)Val));
   return MCDisassembler::Success;
 }
 
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
index 1592351..9ae8c0d 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -84,14 +84,3 @@ printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
   assert(Op.isExpr() && "unknown operand kind in printOperand");
   printExpr(Op.getExpr(), O);
 }
-
-void XCoreInstPrinter::
-printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
-  printOperand(MI, opNum, O);
-
-  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
-    return;
-
-  O << "+";
-  printOperand(MI, opNum+1, O);
-}
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index b5b072d..c177365 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -66,6 +66,9 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
                                                CodeModel::Model CM,
                                                CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
+  if (RM == Reloc::Default) {
+    RM = Reloc::Static;
+  }
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index 08f091e..2f375fc 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -20,12 +20,16 @@
 
 namespace llvm {
   class FunctionPass;
+  class ModulePass;
   class TargetMachine;
   class XCoreTargetMachine;
   class formatted_raw_ostream;
 
+  void initializeXCoreLowerThreadLocalPass(PassRegistry &p);
+
   FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM,
                                    CodeGenOpt::Level OptLevel);
+  ModulePass *createXCoreLowerThreadLocalPass();
 
 } // end namespace llvm;
 
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 0d146ba..e177ad3 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -36,7 +36,6 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
@@ -46,12 +45,6 @@
 #include <cctype>
 using namespace llvm;
 
-static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
-  cl::desc("Maximum number of threads (for emulation thread-local storage)"),
-  cl::Hidden,
-  cl::value_desc("number"),
-  cl::init(8));
-
 namespace {
   class XCoreAsmPrinter : public AsmPrinter {
     const XCoreSubtarget &Subtarget;
@@ -152,10 +145,10 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   EmitAlignment(Align > 2 ? Align : 2, GV);
   
-  unsigned Size = TD->getTypeAllocSize(C->getType());
   if (GV->isThreadLocal()) {
-    Size *= MaxThreads;
+    report_fatal_error("TLS is not supported by this target!");
   }
+  unsigned Size = TD->getTypeAllocSize(C->getType());
   if (MAI->hasDotTypeDotSizeDirective()) {
     OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
     OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," +
@@ -164,10 +157,6 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   OutStreamer.EmitLabel(GVSym);
   
   EmitGlobalConstant(C);
-  if (GV->isThreadLocal()) {
-    for (unsigned i = 1; i < MaxThreads; ++i)
-      EmitGlobalConstant(C);
-  }
   // The ABI requires that unsigned scalar types smaller than 32 bits
   // are padded to 32 bits.
   if (Size < 4)
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index fbf86c5..eb29b50 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -68,8 +68,6 @@ namespace {
 
     // Complex Pattern Selectors.
     bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
-    bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset);
-    bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset);
     
     virtual const char *getPassName() const {
       return "XCore DAG->DAG Pattern Instruction Selection";
@@ -110,48 +108,6 @@ bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
   return false;
 }
 
-bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
-                                       SDValue &Offset) {
-  if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
-    Base = Addr.getOperand(0);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return true;
-  }
-  if (Addr.getOpcode() == ISD::ADD) {
-    ConstantSDNode *CN = 0;
-    if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper)
-      && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
-      && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
-      // Constant word offset from a object in the data region
-      Base = Addr.getOperand(0).getOperand(0);
-      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
-      return true;
-    }
-  }
-  return false;
-}
-
-bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
-                                       SDValue &Offset) {
-  if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
-    Base = Addr.getOperand(0);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return true;
-  }
-  if (Addr.getOpcode() == ISD::ADD) {
-    ConstantSDNode *CN = 0;
-    if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper)
-      && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
-      && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
-      // Constant word offset from a object in the data region
-      Base = Addr.getOperand(0).getOperand(0);
-      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
-      return true;
-    }
-  }
-  return false;
-}
-
 SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
@@ -185,36 +141,36 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         N->getOperand(2) };
     return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
-                                  Ops, 3);
+                                  Ops);
   }
   case XCoreISD::LSUB: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                         N->getOperand(2) };
     return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
-                                  Ops, 3);
+                                  Ops);
   }
   case XCoreISD::MACCU: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                       N->getOperand(2), N->getOperand(3) };
     return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32,
-                                  Ops, 4);
+                                  Ops);
   }
   case XCoreISD::MACCS: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                       N->getOperand(2), N->getOperand(3) };
     return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32,
-                                  Ops, 4);
+                                  Ops);
   }
   case XCoreISD::LMUL: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                       N->getOperand(2), N->getOperand(3) };
     return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
-                                  Ops, 4);
+                                  Ops);
   }
   case XCoreISD::CRC8: {
     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
     return CurDAG->getMachineNode(XCore::CRC8_l4r, dl, MVT::i32, MVT::i32,
-                                  Ops, 3);
+                                  Ops);
   }
   case ISD::BRIND:
     if (SDNode *ResNode = SelectBRIND(N))
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index a5d2be8..2d27f1a 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -36,6 +36,8 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
 using namespace llvm;
 
 const char *XCoreTargetLowering::
@@ -120,9 +122,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
   setOperationAction(ISD::BlockAddress, MVT::i32 , Custom);
 
-  // Thread Local Storage
-  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
-
   // Conversion of i64 -> double produces constantpool nodes
   setOperationAction(ISD::ConstantPool, MVT::i32,   Custom);
 
@@ -172,7 +171,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode())
   {
   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
-  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
   case ISD::BR_JT:              return LowerBR_JT(Op, DAG);
@@ -245,9 +243,20 @@ getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
 SDValue XCoreTargetLowering::
 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
 {
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32);
-  return getGlobalAddressWrapper(GA, GV, DAG);
+  DebugLoc DL = Op.getDebugLoc();
+  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *GV = GN->getGlobal();
+  int64_t Offset = GN->getOffset();
+  // We can only fold positive offsets that are a multiple of the word size.
+  int64_t FoldedOffset = std::max(Offset & ~3, (int64_t)0);
+  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, FoldedOffset);
+  GA = getGlobalAddressWrapper(GA, GV, DAG);
+  // Handle the rest of the offset.
+  if (Offset != FoldedOffset) {
+    SDValue Remaining = DAG.getConstant(Offset - FoldedOffset, MVT::i32);
+    GA = DAG.getNode(ISD::ADD, DL, MVT::i32, GA, Remaining);
+  }
+  return GA;
 }
 
 static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
@@ -255,44 +264,6 @@ static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
                      DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
 }
 
-static inline bool isZeroLengthArray(Type *Ty) {
-  ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
-  return AT && (AT->getNumElements() == 0);
-}
-
-SDValue XCoreTargetLowering::
-LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
-{
-  // FIXME there isn't really debug info here
-  DebugLoc dl = Op.getDebugLoc();
-  // transform to label + getid() * size
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
-  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
-  if (!GVar) {
-    // If GV is an alias then use the aliasee to determine size
-    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-      GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
-  }
-  if (!GVar) {
-    llvm_unreachable("Thread local object not a GlobalVariable?");
-  }
-  Type *Ty = cast<PointerType>(GV->getType())->getElementType();
-  if (!Ty->isSized() || isZeroLengthArray(Ty)) {
-#ifndef NDEBUG
-    errs() << "Size of thread local object " << GVar->getName()
-           << " is unknown\n";
-#endif
-    llvm_unreachable(0);
-  }
-  SDValue base = getGlobalAddressWrapper(GA, GV, DAG);
-  const DataLayout *TD = TM.getDataLayout();
-  unsigned Size = TD->getTypeAllocSize(Ty);
-  SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl),
-                       DAG.getConstant(Size, MVT::i32));
-  return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset);
-}
-
 SDValue XCoreTargetLowering::
 LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
 {
@@ -350,55 +321,58 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
                      ScaledIndex);
 }
 
-static bool
-IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
-                                    int64_t &Offset)
+SDValue XCoreTargetLowering::
+lowerLoadWordFromAlignedBasePlusOffset(DebugLoc DL, SDValue Chain, SDValue Base,
+                                       int64_t Offset, SelectionDAG &DAG) const
 {
-  if (Addr.getOpcode() != ISD::ADD) {
-    return false;
+  if ((Offset & 0x3) == 0) {
+    return DAG.getLoad(getPointerTy(), DL, Chain, Base, MachinePointerInfo(),
+                       false, false, false, 0);
   }
-  ConstantSDNode *CN = 0;
-  if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
-    return false;
-  }
-  int64_t off = CN->getSExtValue();
-  const SDValue &Base = Addr.getOperand(0);
-  const SDValue *Root = &Base;
-  if (Base.getOpcode() == ISD::ADD &&
-      Base.getOperand(1).getOpcode() == ISD::SHL) {
-    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1)
-                                                      .getOperand(1));
-    if (CN && (CN->getSExtValue() >= 2)) {
-      Root = &Base.getOperand(0);
-    }
-  }
-  if (isa<FrameIndexSDNode>(*Root)) {
-    // All frame indicies are word aligned
-    AlignedBase = Base;
-    Offset = off;
-    return true;
-  }
-  if (Root->getOpcode() == XCoreISD::DPRelativeWrapper ||
-      Root->getOpcode() == XCoreISD::CPRelativeWrapper) {
-    // All dp / cp relative addresses are word aligned
-    AlignedBase = Base;
-    Offset = off;
-    return true;
-  }
-  // Check for an aligned global variable.
-  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(*Root)) {
-    const GlobalValue *GV = GA->getGlobal();
-    if (GA->getOffset() == 0 && GV->getAlignment() >= 4) {
-      AlignedBase = Base;
-      Offset = off;
-      return true;
-    }
+  // Lower to pair of consecutive word aligned loads plus some bit shifting.
+  int32_t HighOffset = RoundUpToAlignment(Offset, 4);
+  int32_t LowOffset = HighOffset - 4;
+  SDValue LowAddr, HighAddr;
+  if (GlobalAddressSDNode *GASD =
+        dyn_cast<GlobalAddressSDNode>(Base.getNode())) {
+    LowAddr = DAG.getGlobalAddress(GASD->getGlobal(), DL, Base.getValueType(),
+                                   LowOffset);
+    HighAddr = DAG.getGlobalAddress(GASD->getGlobal(), DL, Base.getValueType(),
+                                    HighOffset);
+  } else {
+    LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base,
+                          DAG.getConstant(LowOffset, MVT::i32));
+    HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base,
+                           DAG.getConstant(HighOffset, MVT::i32));
   }
-  return false;
+  SDValue LowShift = DAG.getConstant((Offset - LowOffset) * 8, MVT::i32);
+  SDValue HighShift = DAG.getConstant((HighOffset - Offset) * 8, MVT::i32);
+
+  SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
+                            LowAddr, MachinePointerInfo(),
+                            false, false, false, 0);
+  SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
+                             HighAddr, MachinePointerInfo(),
+                             false, false, false, 0);
+  SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
+  SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
+  SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
+  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
+                      High.getValue(1));
+  SDValue Ops[] = { Result, Chain };
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+static bool isWordAligned(SDValue Value, SelectionDAG &DAG)
+{
+  APInt KnownZero, KnownOne;
+  DAG.ComputeMaskedBits(Value, KnownZero, KnownOne);
+  return KnownZero.countTrailingOnes() >= 2;
 }
 
 SDValue XCoreTargetLowering::
 LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   LoadSDNode *LD = cast<LoadSDNode>(Op);
   assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
          "Unexpected extension type");
@@ -416,45 +390,23 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   SDValue BasePtr = LD->getBasePtr();
   DebugLoc DL = Op.getDebugLoc();
 
-  SDValue Base;
-  int64_t Offset;
-  if (!LD->isVolatile() &&
-      IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) {
-    if (Offset % 4 == 0) {
-      // We've managed to infer better alignment information than the load
-      // already has. Use an aligned load.
-      //
-      return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
-                         MachinePointerInfo(),
-                         false, false, false, 0);
+  if (!LD->isVolatile()) {
+    const GlobalValue *GV;
+    int64_t Offset = 0;
+    if (DAG.isBaseWithConstantOffset(BasePtr) &&
+        isWordAligned(BasePtr->getOperand(0), DAG)) {
+      SDValue NewBasePtr = BasePtr->getOperand(0);
+      Offset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
+      return lowerLoadWordFromAlignedBasePlusOffset(DL, Chain, NewBasePtr,
+                                                    Offset, DAG);
+    }
+    if (TLI.isGAPlusOffset(BasePtr.getNode(), GV, Offset) &&
+        MinAlign(GV->getAlignment(), 4) == 4) {
+      SDValue NewBasePtr = DAG.getGlobalAddress(GV, DL,
+                                                BasePtr->getValueType(0));
+      return lowerLoadWordFromAlignedBasePlusOffset(DL, Chain, NewBasePtr,
+                                                    Offset, DAG);
     }
-    // Lower to
-    // ldw low, base[offset >> 2]
-    // ldw high, base[(offset >> 2) + 1]
-    // shr low_shifted, low, (offset & 0x3) * 8
-    // shl high_shifted, high, 32 - (offset & 0x3) * 8
-    // or result, low_shifted, high_shifted
-    SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32);
-    SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
-    SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
-    SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
-
-    SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
-    SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
-
-    SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
-                              LowAddr, MachinePointerInfo(),
-                              false, false, false, 0);
-    SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
-                               HighAddr, MachinePointerInfo(),
-                               false, false, false, 0);
-    SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
-    SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
-    SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
-    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
-                             High.getValue(1));
-    SDValue Ops[] = { Result, Chain };
-    return DAG.getMergeValues(Ops, 2, DL);
   }
 
   if (LD->getAlignment() == 2) {
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 8d258f5..c7dfa26 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -133,6 +133,9 @@ namespace llvm {
     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
     SDValue getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
                                     SelectionDAG &DAG) const;
+    SDValue lowerLoadWordFromAlignedBasePlusOffset(DebugLoc DL, SDValue Chain,
+                                                   SDValue Base, int64_t Offset,
+                                                   SelectionDAG &DAG) const;
 
     // Lower Operand specifics
     SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 03653cb..587166c 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -168,21 +168,20 @@ def ldawb : PatFrag<(ops node:$addr, node:$offset),
                      (sub node:$addr, (shl node:$offset, 2))>;
 
 // Instruction operand types
-def calltarget  : Operand<i32>;
+def pcrel_imm  : Operand<i32>;
+def pcrel_imm_neg  : Operand<i32> {
+  let DecoderMethod = "DecodeNegImmOperand";
+}
 def brtarget : Operand<OtherVT>;
-def pclabel : Operand<i32>;
+def brtarget_neg : Operand<OtherVT> {
+  let DecoderMethod = "DecodeNegImmOperand";
+}
 
 // Addressing modes
 def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
-def ADDRdpii : ComplexPattern<i32, 2, "SelectADDRdpii", [add, dprelwrapper],
-                 []>;
-def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper],
-                 []>;
 
 // Address operands
 def MEMii : Operand<i32> {
-  let PrintMethod = "printMemOperand";
-  let DecoderMethod = "DecodeMEMiiOperand";
   let MIOperandInfo = (ops i32imm, i32imm);
 }
 
@@ -274,10 +273,10 @@ multiclass FRU6_LRU6_branch<bits<6> opc, string OpcStr> {
 }
 
 multiclass FRU6_LRU6_backwards_branch<bits<6> opc, string OpcStr> {
-  def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
-                  !strconcat(OpcStr, " $a, -$b"), []>;
-  def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget:$b),
-                    !strconcat(OpcStr, " $a, -$b"), []>;
+  def _ru6: _FRU6<opc, (outs), (ins GRRegs:$a, brtarget_neg:$b),
+                  !strconcat(OpcStr, " $a, $b"), []>;
+  def _lru6: _FLRU6<opc, (outs), (ins GRRegs:$a, brtarget_neg:$b),
+                    !strconcat(OpcStr, " $a, $b"), []>;
 }
 
 multiclass FRU6_LRU6_cp<bits<6> opc, string OpcStr> {
@@ -515,29 +514,29 @@ def LMUL_l6r : _FL6R<
 
 //let Uses = [DP] in ...
 let neverHasSideEffects = 1, isReMaterializable = 1 in
-def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
+def LDAWDP_ru6: _FRU6<0b011000, (outs RRegs:$a), (ins i32imm:$b),
                       "ldaw $a, dp[$b]", []>;
 
 let isReMaterializable = 1 in                    
-def LDAWDP_lru6: _FLRU6<0b011000, (outs RRegs:$a), (ins MEMii:$b),
+def LDAWDP_lru6: _FLRU6<0b011000, (outs RRegs:$a), (ins i32imm:$b),
                         "ldaw $a, dp[$b]",
-                        [(set RRegs:$a, ADDRdpii:$b)]>;
+                        [(set RRegs:$a, (dprelwrapper tglobaladdr:$b))]>;
 
 let mayLoad=1 in
-def LDWDP_ru6: _FRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
+def LDWDP_ru6: _FRU6<0b010110, (outs RRegs:$a), (ins i32imm:$b),
                      "ldw $a, dp[$b]", []>;
 
-def LDWDP_lru6: _FLRU6<0b010110, (outs RRegs:$a), (ins MEMii:$b),
+def LDWDP_lru6: _FLRU6<0b010110, (outs RRegs:$a), (ins i32imm:$b),
                        "ldw $a, dp[$b]",
-                       [(set RRegs:$a, (load ADDRdpii:$b))]>;
+                       [(set RRegs:$a, (load (dprelwrapper tglobaladdr:$b)))]>;
 
 let mayStore=1 in
-def STWDP_ru6 : _FRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
+def STWDP_ru6 : _FRU6<0b010100, (outs), (ins RRegs:$a, i32imm:$b),
                       "stw $a, dp[$b]", []>;
 
-def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, MEMii:$b),
+def STWDP_lru6 : _FLRU6<0b010100, (outs), (ins RRegs:$a, i32imm:$b),
                         "stw $a, dp[$b]",
-                        [(store RRegs:$a, ADDRdpii:$b)]>;
+                        [(store RRegs:$a, (dprelwrapper tglobaladdr:$b))]>;
 
 //let Uses = [CP] in ..
 let mayLoad = 1, isReMaterializable = 1, neverHasSideEffects = 1 in
@@ -615,9 +614,9 @@ let Uses = [R11], isCall=1 in
 defm BLAT : FU6_LU6_np<0b0111001101, "blat">;
 
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
-def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
+def BRBU_u6 : _FU6<0b0111011100, (outs), (ins brtarget_neg:$a), "bu $a", []>;
 
-def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget:$a), "bu -$a", []>;
+def BRBU_lu6 : _FLU6<0b0111011100, (outs), (ins brtarget_neg:$a), "bu $a", []>;
 
 def BRFU_u6 : _FU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
 
@@ -626,12 +625,12 @@ def BRFU_lu6 : _FLU6<0b0111001100, (outs), (ins brtarget:$a), "bu $a", []>;
 
 //let Uses = [CP] in ...
 let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in
-def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
+def LDAWCP_u6: _FU6<0b0111111101, (outs), (ins i32imm:$a), "ldaw r11, cp[$a]",
                     []>;
 
 let Defs = [R11], isReMaterializable = 1 in
-def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins MEMii:$a), "ldaw r11, cp[$a]",
-                      [(set R11, ADDRcpii:$a)]>;
+def LDAWCP_lu6: _FLU6<0b0111111101, (outs), (ins i32imm:$a), "ldaw r11, cp[$a]",
+                      [(set R11, (cprelwrapper tglobaladdr:$a))]>;
 
 let Defs = [R11] in
 defm GETSR : FU6_LU6_np<0b0111111100, "getsr r11,">;
@@ -658,16 +657,26 @@ defm KRESTSP : FU6_LU6_np<0b0111101111, "krestsp">;
 
 // U10
 
-let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in
-def LDAPF_u10 : _FU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a", []>;
+let Defs = [R11], isReMaterializable = 1 in {
+let neverHasSideEffects = 1 in
+def LDAPF_u10 : _FU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a", []>;
+
+def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a",
+                        [(set R11, (pcrelwrapper tglobaladdr:$a))]>;
 
-let Defs = [R11], isReMaterializable = 1 in
-def LDAPF_lu10 : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+let neverHasSideEffects = 1 in
+def LDAPB_u10 : _FU10<0b110111, (outs), (ins pcrel_imm_neg:$a), "ldap r11, $a",
+                      []>;
+
+let neverHasSideEffects = 1 in
+def LDAPB_lu10 : _FLU10<0b110111, (outs), (ins pcrel_imm_neg:$a),
+                        "ldap r11, $a",
                         [(set R11, (pcrelwrapper tglobaladdr:$a))]>;
 
-let Defs = [R11], isReMaterializable = 1, isCodeGenOnly = 1 in
-def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins i32imm:$a), "ldap r11, $a",
+let isCodeGenOnly = 1 in
+def LDAPF_lu10_ba : _FLU10<0b110110, (outs), (ins pcrel_imm:$a), "ldap r11, $a",
                            [(set R11, (pcrelwrapper tblockaddress:$a))]>;
+}
 
 let isCall=1,
 // All calls clobber the link register and the non-callee-saved registers:
@@ -676,11 +685,15 @@ def BLACP_u10 : _FU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
 
 def BLACP_lu10 : _FLU10<0b111000, (outs), (ins i32imm:$a), "bla cp[$a]", []>;
 
-def BLRF_u10 : _FU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+def BLRF_u10 : _FU10<0b110100, (outs), (ins pcrel_imm:$a), "bl $a",
                      [(XCoreBranchLink immU10:$a)]>;
 
-def BLRF_lu10 : _FLU10<0b110100, (outs), (ins calltarget:$a), "bl $a",
+def BLRF_lu10 : _FLU10<0b110100, (outs), (ins pcrel_imm:$a), "bl $a",
                        [(XCoreBranchLink immU20:$a)]>;
+
+def BLRB_u10 : _FU10<0b110101, (outs), (ins pcrel_imm_neg:$a), "bl $a", []>;
+
+def BLRB_lu10 : _FLU10<0b110101, (outs), (ins pcrel_imm_neg:$a), "bl $a", []>;
 }
 
 let Defs = [R11], mayLoad = 1, isReMaterializable = 1,
diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
new file mode 100644
index 0000000..2e328b4
--- /dev/null
+++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -0,0 +1,145 @@
+//===-- XCoreLowerThreadLocal - Lower thread local variables --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains a pass that lowers thread local variables on the
+///        XCore.
+///
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+
+#define DEBUG_TYPE "xcore-lower-thread-local"
+
+using namespace llvm;
+
+static cl::opt<unsigned> MaxThreads(
+  "xcore-max-threads", cl::Optional,
+  cl::desc("Maximum number of threads (for emulation thread-local storage)"),
+  cl::Hidden, cl::value_desc("number"), cl::init(8));
+
+namespace {
+  /// Lowers thread local variables on the XCore. Each thread local variable is
+  /// expanded to an array of n elements indexed by the thread ID where n is the
+  /// fixed number hardware threads supported by the device.
+  struct XCoreLowerThreadLocal : public ModulePass {
+    static char ID;
+
+    XCoreLowerThreadLocal() : ModulePass(ID) {
+      initializeXCoreLowerThreadLocalPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool lowerGlobal(GlobalVariable *GV);
+
+    bool runOnModule(Module &M);
+  };
+}
+
+char XCoreLowerThreadLocal::ID = 0;
+
+INITIALIZE_PASS(XCoreLowerThreadLocal, "xcore-lower-thread-local",
+                "Lower thread local variables", false, false)
+
+ModulePass *llvm::createXCoreLowerThreadLocalPass() {
+  return new XCoreLowerThreadLocal();
+}
+
+static ArrayType *createLoweredType(Type *OriginalType) {
+  return ArrayType::get(OriginalType, MaxThreads);
+}
+
+static Constant *
+createLoweredInitializer(ArrayType *NewType, Constant *OriginalInitializer) {
+  SmallVector<Constant *, 8> Elements(MaxThreads);
+  for (unsigned i = 0; i != MaxThreads; ++i) {
+    Elements[i] = OriginalInitializer;
+  }
+  return ConstantArray::get(NewType, Elements);
+}
+
+static bool hasNonInstructionUse(GlobalVariable *GV) {
+  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;
+       ++UI)
+    if (!isa<Instruction>(*UI))
+      return true;
+
+  return false;
+}
+
+static bool isZeroLengthArray(Type *Ty) {
+  ArrayType *AT = dyn_cast<ArrayType>(Ty);
+  return AT && (AT->getNumElements() == 0);
+}
+
+bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) {
+  Module *M = GV->getParent();
+  LLVMContext &Ctx = M->getContext();
+  if (!GV->isThreadLocal())
+    return false;
+
+  // Skip globals that we can't lower and leave it for the backend to error.
+  if (hasNonInstructionUse(GV) ||
+      !GV->getType()->isSized() || isZeroLengthArray(GV->getType()))
+    return false;
+
+  // Create replacement global.
+  ArrayType *NewType = createLoweredType(GV->getType()->getElementType());
+  Constant *NewInitializer = createLoweredInitializer(NewType,
+                                                      GV->getInitializer());
+  GlobalVariable *NewGV =
+    new GlobalVariable(*M, NewType, GV->isConstant(), GV->getLinkage(),
+                       NewInitializer, "", 0, GlobalVariable::NotThreadLocal,
+                       GV->getType()->getAddressSpace(),
+                       GV->isExternallyInitialized());
+
+  // Update uses.
+  SmallVector<User *, 16> Users(GV->use_begin(), GV->use_end());
+  for (unsigned I = 0, E = Users.size(); I != E; ++I) {
+    User *U = Users[I];
+    Instruction *Inst = cast<Instruction>(U);
+    IRBuilder<> Builder(Inst);
+    Function *GetID = Intrinsic::getDeclaration(GV->getParent(),
+                                                Intrinsic::xcore_getid);
+    Value *ThreadID = Builder.CreateCall(GetID);
+    SmallVector<Value *, 2> Indices;
+    Indices.push_back(Constant::getNullValue(Type::getInt64Ty(Ctx)));
+    Indices.push_back(ThreadID);
+    Value *Addr = Builder.CreateInBoundsGEP(NewGV, Indices);
+    U->replaceUsesOfWith(GV, Addr);
+  }
+
+  // Remove old global.
+  NewGV->takeName(GV);
+  GV->eraseFromParent();
+  return true;
+}
+
+bool XCoreLowerThreadLocal::runOnModule(Module &M) {
+  // Find thread local globals.
+  bool MadeChange = false;
+  SmallVector<GlobalVariable *, 16> ThreadLocalGlobals;
+  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+       GVI != E; ++GVI) {
+    GlobalVariable *GV = GVI;
+    if (GV->isThreadLocal())
+      ThreadLocalGlobals.push_back(GV);
+  }
+  for (unsigned I = 0, E = ThreadLocalGlobals.size(); I != E; ++I) {
+    MadeChange |= lowerGlobal(ThreadLocalGlobals[I]);
+  }
+  return MadeChange;
+}
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 28c3d12..07e5fff 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -46,6 +46,7 @@ public:
     return getTM<XCoreTargetMachine>();
   }
 
+  virtual bool addPreISel();
   virtual bool addInstSelector();
 };
 } // namespace
@@ -54,6 +55,11 @@ TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new XCorePassConfig(this, PM);
 }
 
+bool XCorePassConfig::addPreISel() {
+  addPass(createXCoreLowerThreadLocalPass());
+  return false;
+}
+
 bool XCorePassConfig::addInstSelector() {
   addPass(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel()));
   return false;
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index 8203899..88e3bfd 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -57,9 +57,4 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
                       ELF::SHF_ALLOC |
                       ELF::XCORE_SHF_CP_SECTION,
                       SectionKind::getReadOnlyWithRel());
-
-  // Dynamic linking is not supported. Data with relocations is placed in the
-  // same section as data without relocations.
-  DataRelSection = DataRelLocalSection = DataSection;
-  DataRelROSection = DataRelROLocalSection = ReadOnlySection;
 }
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 8336d3a..a7bf188 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
@@ -66,13 +67,13 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
 static void FindUsedValues(GlobalVariable *LLVMUsed,
                            SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
   if (LLVMUsed == 0) return;
-  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
-  if (Inits == 0) return;
-  
-  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
-    if (GlobalValue *GV = 
-        dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
-      UsedValues.insert(GV);
+  ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
+    Value *Operand = Inits->getOperand(i)->stripPointerCastsNoFollowAliases();
+    GlobalValue *GV = cast<GlobalValue>(Operand);
+    UsedValues.insert(GV);
+  }
 }
 
 // True if A is better than B.
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index dc99492..201f320 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -42,6 +42,7 @@ namespace {
 
   private:
     SmallPtrSet<GlobalValue*, 32> AliveGlobals;
+    SmallPtrSet<Constant *, 8> SeenConstants;
 
     /// GlobalIsNeeded - mark the specific global value as needed, and
     /// recursively mark anything that it uses as also needed.
@@ -151,6 +152,7 @@ bool GlobalDCE::runOnModule(Module &M) {
 
   // Make sure that all memory is released
   AliveGlobals.clear();
+  SeenConstants.clear();
 
   return Changed;
 }
@@ -190,12 +192,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
 void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
   if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
     return GlobalIsNeeded(GV);
-  
+
   // Loop over all of the operands of the constant, adding any globals they
   // use to the list of needed globals.
-  for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
-    if (Constant *OpC = dyn_cast<Constant>(*I))
-      MarkUsedGlobalsAsNeeded(OpC);
+  for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
+    // If we've already processed this constant there's no need to do it again.
+    Constant *Op = dyn_cast<Constant>(*I);
+    if (Op && SeenConstants.insert(Op))
+      MarkUsedGlobalsAsNeeded(Op);
+  }
 }
 
 // RemoveUnusedGlobalValue - Loop over all of the uses of the specified
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index b035a82..0ef900e 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -3041,6 +3041,105 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
   return true;
 }
 
+static Value::use_iterator getFirst(Value *V, SmallPtrSet<Use*, 8> &Tried) {
+  for (Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+    Use *U = &I.getUse();
+    if (Tried.count(U))
+      continue;
+
+    User *Usr = *I;
+    GlobalVariable *GV = dyn_cast<GlobalVariable>(Usr);
+    if (!GV || !GV->hasName()) {
+      Tried.insert(U);
+      return I;
+    }
+
+    StringRef Name = GV->getName();
+    if (Name != "llvm.used" && Name != "llvm.compiler_used") {
+      Tried.insert(U);
+      return I;
+    }
+  }
+  return V->use_end();
+}
+
+static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New);
+
+static bool replaceUsesOfWithOnConstant(ConstantArray *CA, Value *From,
+                                        Value *ToV, Use *U) {
+  Constant *To = cast<Constant>(ToV);
+
+  SmallVector<Constant*, 8> NewOps;
+  for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
+    Constant *Op = CA->getOperand(i);
+    NewOps.push_back(Op == From ? To : Op);
+  }
+
+  Constant *Replacement = ConstantArray::get(CA->getType(), NewOps);
+  assert(Replacement != CA && "CA didn't contain From!");
+
+  bool Ret = replaceAllNonLLVMUsedUsesWith(CA, Replacement);
+  if (Replacement->use_empty())
+    Replacement->destroyConstant();
+  if (CA->use_empty())
+    CA->destroyConstant();
+  return Ret;
+}
+
+static bool replaceUsesOfWithOnConstant(ConstantExpr *CE, Value *From,
+                                        Value *ToV, Use *U) {
+  Constant *To = cast<Constant>(ToV);
+  SmallVector<Constant*, 8> NewOps;
+  for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+    Constant *Op = CE->getOperand(i);
+    NewOps.push_back(Op == From ? To : Op);
+  }
+
+  Constant *Replacement = CE->getWithOperands(NewOps);
+  assert(Replacement != CE && "CE didn't contain From!");
+
+  bool Ret = replaceAllNonLLVMUsedUsesWith(CE, Replacement);
+  if (Replacement->use_empty())
+    Replacement->destroyConstant();
+  if (CE->use_empty())
+    CE->destroyConstant();
+  return Ret;
+}
+
+static bool replaceUsesOfWithOnConstant(Constant *C, Value *From, Value *To,
+                                        Use *U) {
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+    return replaceUsesOfWithOnConstant(CA, From, To, U);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    return replaceUsesOfWithOnConstant(CE, From, To, U);
+  C->replaceUsesOfWithOnConstant(From, To, U);
+  return true;
+}
+
+static bool replaceAllNonLLVMUsedUsesWith(Constant *Old, Constant *New) {
+  SmallPtrSet<Use*, 8> Tried;
+  bool Ret = false;
+  for (;;) {
+    Value::use_iterator I = getFirst(Old, Tried);
+    if (I == Old->use_end())
+      break;
+    Use &U = I.getUse();
+
+    // Must handle Constants specially, we cannot call replaceUsesOfWith on a
+    // constant because they are uniqued.
+    if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+      if (!isa<GlobalValue>(C)) {
+        Ret |= replaceUsesOfWithOnConstant(C, Old, New, &U);
+        continue;
+      }
+    }
+
+    U.set(New);
+    Ret = true;
+  }
+  return Ret;
+}
+
 bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
   bool Changed = false;
 
@@ -3060,11 +3159,12 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
     bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
 
     // Make all users of the alias use the aliasee instead.
-    if (!J->use_empty()) {
-      J->replaceAllUsesWith(Aliasee);
+    if (replaceAllNonLLVMUsedUsesWith(J, Aliasee)) {
       ++NumAliasesResolved;
       Changed = true;
     }
+    if (!J->use_empty())
+      continue;
 
     // If the alias is externally visible, we may still be able to simplify it.
     if (!J->hasLocalLinkage()) {
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 892100f..4ce749c 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -72,6 +72,15 @@ STATISTIC(NumThunksWritten, "Number of thunks generated");
 STATISTIC(NumAliasesWritten, "Number of aliases generated");
 STATISTIC(NumDoubleWeak, "Number of new functions created");
 
+/// Returns the type id for a type to be hashed. We turn pointer types into
+/// integers here because the actual compare logic below considers pointers and
+/// integers of the same size as equal.
+static Type::TypeID getTypeIDForHash(Type *Ty) {
+  if (Ty->isPointerTy())
+    return Type::IntegerTyID;
+  return Ty->getTypeID();
+}
+
 /// Creates a hash-code for the function which is the same for any two
 /// functions that will compare equal, without looking at the instructions
 /// inside the function.
@@ -83,9 +92,9 @@ static unsigned profileFunction(const Function *F) {
   ID.AddInteger(F->getCallingConv());
   ID.AddBoolean(F->hasGC());
   ID.AddBoolean(FTy->isVarArg());
-  ID.AddInteger(FTy->getReturnType()->getTypeID());
+  ID.AddInteger(getTypeIDForHash(FTy->getReturnType()));
   for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
-    ID.AddInteger(FTy->getParamType(i)->getTypeID());
+    ID.AddInteger(getTypeIDForHash(FTy->getParamType(i)));
   return ID.ComputeHash();
 }
 
@@ -200,8 +209,7 @@ private:
 
 // Any two pointers in the same address space are equivalent, intptr_t and
 // pointers are equivalent. Otherwise, standard type equivalence rules apply.
-bool FunctionComparator::isEquivalentType(Type *Ty1,
-                                          Type *Ty2) const {
+bool FunctionComparator::isEquivalentType(Type *Ty1, Type *Ty2) const {
   if (Ty1 == Ty2)
     return true;
   if (Ty1->getTypeID() != Ty2->getTypeID()) {
@@ -740,7 +748,13 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
   if (NewG->getReturnType()->isVoidTy()) {
     Builder.CreateRetVoid();
   } else {
-    Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
+    Type *RetTy = NewG->getReturnType();
+    if (CI->getType()->isIntegerTy() && RetTy->isPointerTy())
+      Builder.CreateRet(Builder.CreateIntToPtr(CI, RetTy));
+    else if (CI->getType()->isPointerTy() && RetTy->isIntegerTy())
+      Builder.CreateRet(Builder.CreatePtrToInt(CI, RetTy));
+    else
+      Builder.CreateRet(Builder.CreateBitCast(CI, RetTy));
   }
 
   NewG->copyAttributesFrom(G);
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 027a9f2..986c0b8 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -33,7 +33,12 @@ RunLoopVectorization("vectorize-loops",
                      cl::desc("Run the Loop vectorization passes"));
 
 static cl::opt<bool>
-RunBBVectorization("vectorize", cl::desc("Run the BB vectorization passes"));
+RunSLPVectorization("vectorize-slp",
+                    cl::desc("Run the SLP vectorization passes"));
+
+static cl::opt<bool>
+RunBBVectorization("vectorize-slp-aggressive",
+                    cl::desc("Run the BB vectorization passes"));
 
 static cl::opt<bool>
 UseGVNAfterVectorization("use-gvn-after-vectorization",
@@ -52,7 +57,8 @@ PassManagerBuilder::PassManagerBuilder() {
     DisableSimplifyLibCalls = false;
     DisableUnitAtATime = false;
     DisableUnrollLoops = false;
-    Vectorize = RunBBVectorization;
+    BBVectorize = RunBBVectorization;
+    SLPVectorize = RunSLPVectorization;
     LoopVectorize = RunLoopVectorization;
 }
 
@@ -207,7 +213,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
 
   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
 
-  if (Vectorize) {
+  if (SLPVectorize)
+    MPM.add(createSLPVectorizerPass());     // Vectorize parallel scalar chains.
+
+  if (BBVectorize) {
     MPM.add(createBBVectorizePass());
     MPM.add(createInstructionCombiningPass());
     if (OptLevel > 1 && UseGVNAfterVectorization)
@@ -321,6 +330,14 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
   PM.add(createGlobalDCEPass());
 }
 
+inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
+    return reinterpret_cast<PassManagerBuilder*>(P);
+}
+
+inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
+  return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
+}
+
 LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
   PassManagerBuilder *PMB = new PassManagerBuilder();
   return wrap(PMB);
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 5f8681f..3396f79 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -195,10 +195,9 @@ static void findUsedValues(GlobalVariable *LLVMUsed,
                            SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
   if (LLVMUsed == 0) return;
   UsedValues.insert(LLVMUsed);
-  
-  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
-  if (Inits == 0) return;
-  
+
+  ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
+
   for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
     if (GlobalValue *GV = 
           dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
index 72cfe2c..a25696e 100644
--- a/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -9,7 +9,7 @@ add_llvm_library(LLVMInstCombine
   InstCombineMulDivRem.cpp
   InstCombinePHI.cpp
   InstCombineSelect.cpp
-  InstCombineShifts.cpp 
+  InstCombineShifts.cpp
   InstCombineSimplifyDemanded.cpp
   InstCombineVectorOps.cpp
   )
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 1f6a3a5..2a36074 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -233,6 +233,7 @@ private:
   Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
   bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
   Value *EmitGEPOffset(User *GEP);
+  Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
 
 public:
   // InsertNewInstBefore - insert an instruction New before instruction Old
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 7595da0..166f8df 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -24,9 +24,9 @@ namespace {
   /// Class representing coefficient of floating-point addend.
   /// This class needs to be highly efficient, which is especially true for
   /// the constructor. As of I write this comment, the cost of the default
-  /// constructor is merely 4-byte-store-zero (Assuming compiler is able to 
+  /// constructor is merely 4-byte-store-zero (Assuming compiler is able to
   /// perform write-merging).
-  /// 
+  ///
   class FAddendCoef {
   public:
     // The constructor has to initialize a APFloat, which is uncessary for
@@ -37,31 +37,31 @@ namespace {
     //
     FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {}
     ~FAddendCoef();
-  
+
     void set(short C) {
       assert(!insaneIntVal(C) && "Insane coefficient");
       IsFp = false; IntVal = C;
     }
-  
+
     void set(const APFloat& C);
 
     void negate();
-  
+
     bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); }
     Value *getValue(Type *) const;
-  
+
     // If possible, don't define operator+/operator- etc because these
     // operators inevitably call FAddendCoef's constructor which is not cheap.
     void operator=(const FAddendCoef &A);
     void operator+=(const FAddendCoef &A);
     void operator-=(const FAddendCoef &A);
     void operator*=(const FAddendCoef &S);
-  
+
     bool isOne() const { return isInt() && IntVal == 1; }
     bool isTwo() const { return isInt() && IntVal == 2; }
     bool isMinusOne() const { return isInt() && IntVal == -1; }
     bool isMinusTwo() const { return isInt() && IntVal == -2; }
-  
+
   private:
     bool insaneIntVal(int V) { return V > 4 || V < -4; }
     APFloat *getFpValPtr(void)
@@ -74,26 +74,28 @@ namespace {
       return *getFpValPtr();
     }
 
-    APFloat &getFpVal(void)
-      { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); }
-  
+    APFloat &getFpVal(void) {
+      assert(IsFp && BufHasFpVal && "Incorret state");
+      return *getFpValPtr();
+    }
+
     bool isInt() const { return !IsFp; }
 
     // If the coefficient is represented by an integer, promote it to a
-    // floating point. 
+    // floating point.
     void convertToFpType(const fltSemantics &Sem);
 
     // Construct an APFloat from a signed integer.
     // TODO: We should get rid of this function when APFloat can be constructed
-    //       from an *SIGNED* integer. 
+    //       from an *SIGNED* integer.
     APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
   private:
 
     bool IsFp;
-  
+
     // True iff FpValBuf contains an instance of APFloat.
     bool BufHasFpVal;
-  
+
     // The integer coefficient of an individual addend is either 1 or -1,
     // and we try to simplify at most 4 addends from neighboring at most
     // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt
@@ -102,7 +104,7 @@ namespace {
 
     AlignedCharArrayUnion<APFloat> FpValBuf;
   };
-  
+
   /// FAddend is used to represent floating-point addend. An addend is
   /// represented as <C, V>, where the V is a symbolic value, and C is a
   /// constant coefficient. A constant addend is represented as <C, 0>.
@@ -110,10 +112,10 @@ namespace {
   class FAddend {
   public:
     FAddend() { Val = 0; }
-  
+
     Value *getSymVal (void) const { return Val; }
     const FAddendCoef &getCoef(void) const { return Coeff; }
-  
+
     bool isConstant() const { return Val == 0; }
     bool isZero() const { return Coeff.isZero(); }
 
@@ -122,17 +124,17 @@ namespace {
       { Coeff.set(Coefficient); Val = V; }
     void set(const ConstantFP* Coefficient, Value *V)
       { Coeff.set(Coefficient->getValueAPF()); Val = V; }
-  
+
     void negate() { Coeff.negate(); }
-  
+
     /// Drill down the U-D chain one step to find the definition of V, and
     /// try to break the definition into one or two addends.
     static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1);
-  
+
     /// Similar to FAddend::drillDownOneStep() except that the value being
     /// splitted is the addend itself.
     unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const;
-  
+
     void operator+=(const FAddend &T) {
       assert((Val == T.Val) && "Symbolic-values disagree");
       Coeff += T.Coeff;
@@ -140,12 +142,12 @@ namespace {
 
   private:
     void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; }
-  
+
     // This addend has the value of "Coeff * Val".
     Value *Val;
     FAddendCoef Coeff;
   };
-  
+
   /// FAddCombine is the class for optimizing an unsafe fadd/fsub along
   /// with its neighboring at most two instructions.
   ///
@@ -153,17 +155,17 @@ namespace {
   public:
     FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
     Value *simplify(Instruction *FAdd);
-  
+
   private:
     typedef SmallVector<const FAddend*, 4> AddendVect;
-  
+
     Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota);
 
     Value *performFactorization(Instruction *I);
 
     /// Convert given addend to a Value
     Value *createAddendVal(const FAddend &A, bool& NeedNeg);
-    
+
     /// Return the number of instructions needed to emit the N-ary addition.
     unsigned calcInstrNumber(const AddendVect& Vect);
     Value *createFSub(Value *Opnd0, Value *Opnd1);
@@ -173,10 +175,10 @@ namespace {
     Value *createFNeg(Value *V);
     Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota);
     void createInstPostProc(Instruction *NewInst);
-  
+
     InstCombiner::BuilderTy *Builder;
     Instruction *Instr;
-  
+
   private:
      // Debugging stuff are clustered here.
     #ifndef NDEBUG
@@ -188,7 +190,7 @@ namespace {
       void incCreateInstNum() {}
     #endif
   };
-} 
+}
 
 //===----------------------------------------------------------------------===//
 //
@@ -211,7 +213,7 @@ void FAddendCoef::set(const APFloat& C) {
   } else
     *P = C;
 
-  IsFp = BufHasFpVal = true; 
+  IsFp = BufHasFpVal = true;
 }
 
 void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
@@ -225,7 +227,7 @@ void FAddendCoef::convertToFpType(const fltSemantics &Sem) {
     new(P) APFloat(Sem, 0 - IntVal);
     P->changeSign();
   }
-  IsFp = BufHasFpVal = true; 
+  IsFp = BufHasFpVal = true;
 }
 
 APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) {
@@ -254,14 +256,14 @@ void FAddendCoef::operator+=(const FAddendCoef &That) {
       getFpVal().add(That.getFpVal(), RndMode);
     return;
   }
-  
+
   if (isInt()) {
     const APFloat &T = That.getFpVal();
     convertToFpType(T.getSemantics());
     getFpVal().add(T, RndMode);
     return;
   }
-  
+
   APFloat &T = getFpVal();
   T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode);
 }
@@ -275,7 +277,7 @@ void FAddendCoef::operator-=(const FAddendCoef &That) {
       getFpVal().subtract(That.getFpVal(), RndMode);
     return;
   }
-  
+
   if (isInt()) {
     const APFloat &T = That.getFpVal();
     convertToFpType(T.getSemantics());
@@ -303,7 +305,7 @@ void FAddendCoef::operator*=(const FAddendCoef &That) {
     return;
   }
 
-  const fltSemantics &Semantic = 
+  const fltSemantics &Semantic =
     isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics();
 
   if (isInt())
@@ -338,11 +340,11 @@ Value *FAddendCoef::getValue(Type *Ty) const {
 //  A - B                     <1, A>, <1,B>
 //  0 - B                     <-1, B>
 //  C * A,                    <C, A>
-//  A + C                     <1, A> <C, NULL> 
+//  A + C                     <1, A> <C, NULL>
 //  0 +/- 0                   <0, NULL> (corner case)
 //
 // Legend: A and B are not constant, C is constant
-// 
+//
 unsigned FAddend::drillValueDownOneStep
   (Value *Val, FAddend &Addend0, FAddend &Addend1) {
   Instruction *I = 0;
@@ -413,7 +415,7 @@ unsigned FAddend::drillAddendDownOneStep
     return 0;
 
   unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1);
-  if (!BreakNum || Coeff.isOne()) 
+  if (!BreakNum || Coeff.isOne())
     return BreakNum;
 
   Addend0.Scale(Coeff);
@@ -435,10 +437,10 @@ unsigned FAddend::drillAddendDownOneStep
 Value *FAddCombine::performFactorization(Instruction *I) {
   assert((I->getOpcode() == Instruction::FAdd ||
           I->getOpcode() == Instruction::FSub) && "Expect add/sub");
-  
+
   Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0));
   Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
-  
+
   if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
     return 0;
 
@@ -453,14 +455,14 @@ Value *FAddCombine::performFactorization(Instruction *I) {
   Value *Opnd1_0 = I1->getOperand(0);
   Value *Opnd1_1 = I1->getOperand(1);
 
-  //  Input Instr I       Factor   AddSub0  AddSub1 
+  //  Input Instr I       Factor   AddSub0  AddSub1
   //  ----------------------------------------------
   // (x*y) +/- (x*z)        x        y         z
   // (y/x) +/- (z/x)        x        y         z
   //
   Value *Factor = 0;
   Value *AddSub0 = 0, *AddSub1 = 0;
-  
+
   if (isMpy) {
     if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
       Factor = Opnd0_0;
@@ -492,7 +494,7 @@ Value *FAddCombine::performFactorization(Instruction *I) {
 
   if (isMpy)
     return createFMul(Factor, NewAddSub);
- 
+
   return createFDiv(NewAddSub, Factor);
 }
 
@@ -506,7 +508,7 @@ Value *FAddCombine::simplify(Instruction *I) {
   assert((I->getOpcode() == Instruction::FAdd ||
           I->getOpcode() == Instruction::FSub) && "Expect add/sub");
 
-  // Save the instruction before calling other member-functions. 
+  // Save the instruction before calling other member-functions.
   Instr = I;
 
   FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1;
@@ -517,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) {
   unsigned Opnd0_ExpNum = 0;
   unsigned Opnd1_ExpNum = 0;
 
-  if (!Opnd0.isConstant()) 
+  if (!Opnd0.isConstant())
     Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1);
 
   // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1.
@@ -539,7 +541,7 @@ Value *FAddCombine::simplify(Instruction *I) {
 
     Value *V0 = I->getOperand(0);
     Value *V1 = I->getOperand(1);
-    InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&  
+    InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) &&
                  (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1;
 
     if (Value *R = simplifyFAdd(AllOpnds, InstQuota))
@@ -579,7 +581,7 @@ Value *FAddCombine::simplify(Instruction *I) {
       return R;
   }
 
-  // step 6: Try factorization as the last resort, 
+  // step 6: Try factorization as the last resort,
   return performFactorization(I);
 }
 
@@ -588,7 +590,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
   unsigned AddendNum = Addends.size();
   assert(AddendNum <= 4 && "Too many addends");
 
-  // For saving intermediate results; 
+  // For saving intermediate results;
   unsigned NextTmpIdx = 0;
   FAddend TmpResult[3];
 
@@ -604,7 +606,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
   AddendVect SimpVect;
 
   // The outer loop works on one symbolic-value at a time. Suppose the input
-  // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... 
+  // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ...
   // The symbolic-values will be processed in this order: x, y, z.
   //
   for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) {
@@ -631,7 +633,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
       if (T && T->getSymVal() == Val) {
         // Set null such that next iteration of the outer loop will not process
         // this addend again.
-        Addends[SameSymIdx] = 0; 
+        Addends[SameSymIdx] = 0;
         SimpVect.push_back(T);
       }
     }
@@ -644,7 +646,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
         R += *SimpVect[Idx];
 
       // Pop all addends being folded and push the resulting folded addend.
-      SimpVect.resize(StartIdx); 
+      SimpVect.resize(StartIdx);
       if (Val != 0) {
         if (!R.isZero()) {
           SimpVect.push_back(&R);
@@ -657,7 +659,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
     }
   }
 
-  assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && 
+  assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) &&
          "out-of-bound access");
 
   if (ConstAdd)
@@ -679,7 +681,7 @@ Value *FAddCombine::createNaryFAdd
   assert(!Opnds.empty() && "Expect at least one addend");
 
   // Step 1: Check if the # of instructions needed exceeds the quota.
-  // 
+  //
   unsigned InstrNeeded = calcInstrNumber(Opnds);
   if (InstrNeeded > InstrQuota)
     return 0;
@@ -700,7 +702,7 @@ Value *FAddCombine::createNaryFAdd
   // Iterate the addends, creating fadd/fsub using adjacent two addends.
   for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
        I != E; I++) {
-    bool NeedNeg; 
+    bool NeedNeg;
     Value *V = createAddendVal(**I, NeedNeg);
     if (!LastVal) {
       LastVal = V;
@@ -726,7 +728,7 @@ Value *FAddCombine::createNaryFAdd
   }
 
   #ifndef NDEBUG
-    assert(CreateInstrNum == InstrNeeded && 
+    assert(CreateInstrNum == InstrNeeded &&
            "Inconsistent in instruction numbers");
   #endif
 
@@ -784,8 +786,8 @@ unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) {
   unsigned OpndNum = Opnds.size();
   unsigned InstrNeeded = OpndNum - 1;
 
-  // The number of addends in the form of "(-1)*x". 
-  unsigned NegOpndNum = 0; 
+  // The number of addends in the form of "(-1)*x".
+  unsigned NegOpndNum = 0;
 
   // Adjust the number of instructions needed to emit the N-ary add.
   for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end();
@@ -972,6 +974,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
           return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
                                            XorLHS);
       }
+      // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
+      // transform them into (X + (signbit ^ C))
+      if (XorRHS->getValue().isSignBit())
+          return BinaryOperator::CreateAdd(XorLHS,
+                                           ConstantExpr::getXor(XorRHS, CI));
     }
   }
 
@@ -1230,6 +1237,31 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
     }
   }
 
+  // select C, 0, B + select C, A, 0 -> select C, A, B
+  {
+    Value *A1, *B1, *C1, *A2, *B2, *C2;
+    if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) &&
+        match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) {
+      if (C1 == C2) {
+        Constant *Z1=0, *Z2=0;
+        Value *A, *B, *C=C1;
+        if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) {
+            Z1 = dyn_cast<Constant>(A1); A = A2;
+            Z2 = dyn_cast<Constant>(B2); B = B1;
+        } else if (match(B1, m_AnyZero()) && match(A2, m_AnyZero())) {
+            Z1 = dyn_cast<Constant>(B1); B = B2;
+            Z2 = dyn_cast<Constant>(A2); A = A1; 
+        }
+        
+        if (Z1 && Z2 && 
+            (I.hasNoSignedZeros() || 
+             (Z1->isNegativeZeroValue() && Z2->isNegativeZeroValue()))) {
+          return SelectInst::Create(C, A, B);
+        }
+      }
+    }
+  }
+
   if (I.hasUnsafeAlgebra()) {
     if (Value *V = FAddCombine(Builder).simplify(&I))
       return ReplaceInstUsesWith(I, V);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 990cbc3..ec75dd2 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -266,9 +266,8 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
   return 0;
 }
 
-
-/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
-/// true, otherwise (V < Lo || V >= Hi).  In practice, we emit the more efficient
+/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
+/// (V < Lo || V >= Hi).  In practice, we emit the more efficient
 /// (V-Lo) \<u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
 /// whether to treat the V, Lo and HI as signed or not. IB is the location to
 /// insert new instructions.
@@ -935,6 +934,9 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
 Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
       RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+    if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType())
+      return 0;
+
     // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
     if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
       if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
@@ -1545,14 +1547,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
     switch (RHSCC) {
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:
-      if (LHSCst == SubOne(RHSCst)) {
-        // (X == 13 | X == 14) -> X-13 <u 2
-        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
-        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
-        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
-        return Builder->CreateICmpULT(Add, AddCST);
-      }
-
       if (LHS->getOperand(0) == RHS->getOperand(0)) {
         // if LHSCst and RHSCst differ only by one bit:
         // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1
@@ -1566,6 +1560,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
         }
       }
 
+      if (LHSCst == SubOne(RHSCst)) {
+        // (X == 13 | X == 14) -> X-13 <u 2
+        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+        return Builder->CreateICmpULT(Add, AddCST);
+      }
+
       break;                         // (X == 13 | X == 15) -> no change
     case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
     case ICmpInst::ICMP_SGT:         // (X == 13 | X s> 14) -> no change
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 64cd1bd..78b4a2c 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1372,7 +1372,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
         NestF->getType() == PointerType::getUnqual(NewFTy) ?
         NestF : ConstantExpr::getBitCast(NestF,
                                          PointerType::getUnqual(NewFTy));
-      const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs);
+      const AttributeSet &NewPAL =
+          AttributeSet::get(FTy->getContext(), NewAttrs);
 
       Instruction *NewCaller;
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index a96e754..4c252c0 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -232,7 +232,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   Constant *Init = GV->getInitializer();
   if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
     return 0;
-  
+
   uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
   if (ArrayElementCount > 1024) return 0;  // Don't blow up on huge arrays.
 
@@ -2487,6 +2487,55 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       return new ICmpInst(Pred, Y, Z);
     }
 
+    // icmp slt (X + -1), Y -> icmp sle X, Y
+    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
+        match(B, m_AllOnes()))
+      return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
+
+    // icmp sge (X + -1), Y -> icmp sgt X, Y
+    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
+        match(B, m_AllOnes()))
+      return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
+
+    // icmp sle (X + 1), Y -> icmp slt X, Y
+    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE &&
+        match(B, m_One()))
+      return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
+
+    // icmp sgt (X + 1), Y -> icmp sge X, Y
+    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT &&
+        match(B, m_One()))
+      return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
+
+    // if C1 has greater magnitude than C2:
+    //  icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
+    //  s.t. C3 = C1 - C2
+    //
+    // if C2 has greater magnitude than C1:
+    //  icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
+    //  s.t. C3 = C2 - C1
+    if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
+        (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
+      if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
+        if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
+          const APInt &AP1 = C1->getValue();
+          const APInt &AP2 = C2->getValue();
+          if (AP1.isNegative() == AP2.isNegative()) {
+            APInt AP1Abs = C1->getValue().abs();
+            APInt AP2Abs = C2->getValue().abs();
+            if (AP1Abs.uge(AP2Abs)) {
+              ConstantInt *C3 = Builder->getInt(AP1 - AP2);
+              Value *NewAdd = Builder->CreateNSWAdd(A, C3);
+              return new ICmpInst(Pred, NewAdd, C);
+            } else {
+              ConstantInt *C3 = Builder->getInt(AP2 - AP1);
+              Value *NewAdd = Builder->CreateNSWAdd(C, C3);
+              return new ICmpInst(Pred, A, NewAdd);
+            }
+          }
+        }
+
+
     // Analyze the case when either Op0 or Op1 is a sub instruction.
     // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
     A = 0; B = 0; C = 0; D = 0;
@@ -2620,6 +2669,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   }
 
   { Value *A, *B;
+    // Transform (A & ~B) == 0 --> (A & B) != 0
+    // and       (A & ~B) != 0 --> (A & B) == 0
+    // if A is a power of 2.
+    if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+        match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A) && I.isEquality())
+      return new ICmpInst(I.getInversePredicate(),
+                          Builder->CreateAnd(A, B),
+                          Op1);
+
     // ~x < ~y --> y < x
     // ~x < cst --> ~cst < x
     if (match(Op0, m_Not(m_Value(A)))) {
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 337cfe3..e2d7966 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -69,8 +69,8 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
       // If the GEP has all zero indices, it doesn't offset the pointer.  If it
       // doesn't, it does.
-      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
-                                          IsOffset || !GEP->hasAllZeroIndices()))
+      if (!isOnlyCopiedFromConstantGlobal(
+              GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices()))
         return false;
       continue;
     }
@@ -166,7 +166,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
   // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
   if (AI.isArrayAllocation()) {  // Check C != 1
     if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
-      Type *NewTy = 
+      Type *NewTy =
         ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
       AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
       New->setAlignment(AI.getAlignment());
@@ -294,7 +294,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
 
     Type *SrcPTy = SrcTy->getElementType();
 
-    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 
+    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
          DestPTy->isVectorTy()) {
       // If the source is an array, the code below will not succeed.  Check to
       // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
@@ -311,7 +311,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
           }
 
       if (IC.getDataLayout() &&
-          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 
+          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
             SrcPTy->isVectorTy()) &&
           // Do not allow turning this into a load of an integer, which is then
           // casted to a pointer, this pessimizes pointer analysis a lot.
@@ -322,7 +322,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
         // Okay, we are casting from one integer or pointer type to another of
         // the same size.  Instead of casting the pointer before the load, cast
         // the result of the loaded value.
-        LoadInst *NewLoad = 
+        LoadInst *NewLoad =
           IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
         NewLoad->setAlignment(LI.getAlignment());
         NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
@@ -359,7 +359,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   // None of the following transforms are legal for volatile/atomic loads.
   // FIXME: Some of it is okay for atomic loads; needs refactoring.
   if (!LI.isSimple()) return 0;
-  
+
   // Do really simple store-to-load forwarding and load CSE, to catch cases
   // where there are several consecutive memory accesses to the same location,
   // separated by a few arithmetic operations.
@@ -380,7 +380,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
                     Constant::getNullValue(Op->getType()), &LI);
       return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
     }
-  } 
+  }
 
   // load null/undef -> unreachable
   // TODO: Consider a target hook for valid address spaces for this xform.
@@ -399,7 +399,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
     if (CE->isCast())
       if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
         return Res;
-  
+
   if (Op->hasOneUse()) {
     // Change select and PHI nodes to select values instead of addresses: this
     // helps alias analysis out a lot, allows many others simplifications, and
@@ -453,18 +453,18 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
   PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
   if (SrcTy == 0) return 0;
-  
+
   Type *SrcPTy = SrcTy->getElementType();
 
   if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
     return 0;
-  
+
   /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
   /// to its first element.  This allows us to handle things like:
   ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
   /// on 32-bit hosts.
   SmallVector<Value*, 4> NewGEPIndices;
-  
+
   // If the source is an array, the code below will not succeed.  Check to
   // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
   // constants.
@@ -472,7 +472,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
     // Index through pointer.
     Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
     NewGEPIndices.push_back(Zero);
-    
+
     while (1) {
       if (StructType *STy = dyn_cast<StructType>(SrcPTy)) {
         if (!STy->getNumElements()) /* Struct can be empty {} */
@@ -486,24 +486,24 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
         break;
       }
     }
-    
+
     SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
   }
 
   if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
     return 0;
-  
+
   // If the pointers point into different address spaces or if they point to
   // values with different sizes, we can't do the transformation.
   if (!IC.getDataLayout() ||
-      SrcTy->getAddressSpace() != 
+      SrcTy->getAddressSpace() !=
         cast<PointerType>(CI->getType())->getAddressSpace() ||
       IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
       IC.getDataLayout()->getTypeSizeInBits(DestPTy))
     return 0;
 
   // Okay, we are casting from one integer or pointer type to another of
-  // the same size.  Instead of casting the pointer before 
+  // the same size.  Instead of casting the pointer before
   // the store, cast the value to be stored.
   Value *NewCast;
   Value *SIOp0 = SI.getOperand(0);
@@ -517,12 +517,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
     if (SIOp0->getType()->isPointerTy())
       opcode = Instruction::PtrToInt;
   }
-  
+
   // SIOp0 is a pointer to aggregate and this is a store to the first field,
   // emit a GEP to index into its first field.
   if (!NewGEPIndices.empty())
     CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices);
-  
+
   NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
                                    SIOp0->getName()+".c");
   SI.setOperand(0, NewCast);
@@ -541,7 +541,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
 static bool equivalentAddressValues(Value *A, Value *B) {
   // Test if the values are trivially equivalent.
   if (A == B) return true;
-  
+
   // Test if the values come form identical arithmetic instructions.
   // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
   // its only used to compare two uses within the same basic block, which
@@ -554,7 +554,7 @@ static bool equivalentAddressValues(Value *A, Value *B) {
     if (Instruction *BI = dyn_cast<Instruction>(B))
       if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
         return true;
-  
+
   // Otherwise they may not be equivalent.
   return false;
 }
@@ -585,7 +585,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   // If the RHS is an alloca with a single use, zapify the store, making the
   // alloca dead.
   if (Ptr->hasOneUse()) {
-    if (isa<AllocaInst>(Ptr)) 
+    if (isa<AllocaInst>(Ptr))
       return EraseInstFromFunction(SI);
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
       if (isa<AllocaInst>(GEP->getOperand(0))) {
@@ -608,8 +608,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
         (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
       ScanInsts++;
       continue;
-    }    
-    
+    }
+
     if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
       // Prev store isn't volatile, and stores to the same location?
       if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
@@ -621,7 +621,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
       }
       break;
     }
-    
+
     // If this is a load, we have to stop.  However, if the loaded value is from
     // the pointer we're loading and is producing the pointer we're storing,
     // then *this* store is dead (X = load P; store X -> P).
@@ -629,12 +629,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
       if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
           LI->isSimple())
         return EraseInstFromFunction(SI);
-      
+
       // Otherwise, this is a load from some other location.  Stores before it
       // may not be dead.
       break;
     }
-    
+
     // Don't skip over loads or things that can modify memory.
     if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
       break;
@@ -664,11 +664,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
       if (Instruction *Res = InstCombineStoreToCast(*this, SI))
         return Res;
 
-  
+
   // If this store is the last instruction in the basic block (possibly
   // excepting debug info instructions), and if the block ends with an
   // unconditional branch, try to move it to the successor block.
-  BBI = &SI; 
+  BBI = &SI;
   do {
     ++BBI;
   } while (isa<DbgInfoIntrinsic>(BBI) ||
@@ -677,7 +677,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
     if (BI->isUnconditional())
       if (SimplifyStoreAtEndOfBlock(SI))
         return 0;  // xform done!
-  
+
   return 0;
 }
 
@@ -691,12 +691,12 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
 ///
 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   BasicBlock *StoreBB = SI.getParent();
-  
+
   // Check to see if the successor block has exactly two incoming edges.  If
   // so, see if the other predecessor contains a store to the same location.
   // if so, insert a PHI node (if needed) and move the stores down.
   BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
-  
+
   // Determine whether Dest has exactly two predecessors and, if so, compute
   // the other predecessor.
   pred_iterator PI = pred_begin(DestBB);
@@ -708,7 +708,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
 
   if (++PI == pred_end(DestBB))
     return false;
-  
+
   P = *PI;
   if (P != StoreBB) {
     if (OtherBB)
@@ -728,7 +728,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
   if (!OtherBr || BBI == OtherBB->begin())
     return false;
-  
+
   // If the other block ends in an unconditional branch, check for the 'if then
   // else' case.  there is an instruction before the branch.
   StoreInst *OtherStore = 0;
@@ -750,10 +750,10 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   } else {
     // Otherwise, the other block ended with a conditional branch. If one of the
     // destinations is StoreBB, then we have the if/then case.
-    if (OtherBr->getSuccessor(0) != StoreBB && 
+    if (OtherBr->getSuccessor(0) != StoreBB &&
         OtherBr->getSuccessor(1) != StoreBB)
       return false;
-    
+
     // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
     // if/then triangle.  See if there is a store to the same ptr as SI that
     // lives in OtherBB.
@@ -771,7 +771,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
           BBI == OtherBB->begin())
         return false;
     }
-    
+
     // In order to eliminate the store in OtherBr, we have to
     // make sure nothing reads or overwrites the stored value in
     // StoreBB.
@@ -781,7 +781,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
         return false;
     }
   }
-  
+
   // Insert a PHI node now if we need it.
   Value *MergedVal = OtherStore->getOperand(0);
   if (MergedVal != SI.getOperand(0)) {
@@ -790,7 +790,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
     PN->addIncoming(OtherStore->getOperand(0), OtherBB);
     MergedVal = InsertNewInstBefore(PN, DestBB->front());
   }
-  
+
   // Advance to a place where it is safe to insert the new store and
   // insert it.
   BBI = DestBB->getFirstInsertionPt();
@@ -800,7 +800,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
                                    SI.getOrdering(),
                                    SI.getSynchScope());
   InsertNewInstBefore(NewSI, *BBI);
-  NewSI->setDebugLoc(OtherStore->getDebugLoc()); 
+  NewSI->setDebugLoc(OtherStore->getDebugLoc());
 
   // If the two stores had the same TBAA tag, preserve it.
   if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa))
@@ -808,7 +808,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
                                OtherStore->getMetadata(LLVMContext::MD_tbaa))))
       NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
 
-  
+
   // Nuke the old stores.
   EraseInstFromFunction(SI);
   EraseInstFromFunction(*OtherStore);
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 173f2bf..ecc9fc3 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -28,7 +28,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
   // if this is safe.  For example, the use could be in dynamically unreached
   // code.
   if (!V->hasOneUse()) return 0;
-  
+
   bool MadeChange = false;
 
   // ((1 << A) >>u B) --> (1 << (A-B))
@@ -41,7 +41,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
     A = IC.Builder->CreateSub(A, B);
     return IC.Builder->CreateShl(PowerOf2, A);
   }
-  
+
   // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it
   // inexact.  Similarly for <<.
   if (BinaryOperator *I = dyn_cast<BinaryOperator>(V))
@@ -52,12 +52,12 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
         I->setOperand(0, V2);
         MadeChange = true;
       }
-      
+
       if (I->getOpcode() == Instruction::LShr && !I->isExact()) {
         I->setIsExact();
         MadeChange = true;
       }
-      
+
       if (I->getOpcode() == Instruction::Shl && !I->hasNoUnsignedWrap()) {
         I->setHasNoUnsignedWrap();
         MadeChange = true;
@@ -67,7 +67,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
   // TODO: Lots more we could do here:
   //    If V is a phi node, we can call this on each of its operands.
   //    "select cond, X, 0" can simplify to "X".
-  
+
   return MadeChange ? V : 0;
 }
 
@@ -84,12 +84,12 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
     LHSExt = LHSExt.zext(W * 2);
     RHSExt = RHSExt.zext(W * 2);
   }
-  
+
   APInt MulExt = LHSExt * RHSExt;
-  
+
   if (!sign)
     return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
-  
+
   APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
   APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
   return MulExt.slt(Min) || MulExt.sgt(Max);
@@ -107,16 +107,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
 
   if (match(Op1, m_AllOnes()))  // X * -1 == 0 - X
     return BinaryOperator::CreateNeg(Op0, I.getName());
-  
+
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-    
+
     // ((X << C1)*C2) == (X * (C2 << C1))
     if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
       if (SI->getOpcode() == Instruction::Shl)
         if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
           return BinaryOperator::CreateMul(SI->getOperand(0),
                                            ConstantExpr::getShl(CI, ShOp));
-    
+
     const APInt &Val = CI->getValue();
     if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
       Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
@@ -125,7 +125,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
       return Shl;
     }
-    
+
     // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
     { Value *X; ConstantInt *C1;
       if (Op0->hasOneUse() &&
@@ -158,9 +158,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
       }
     }
   }
-  
+
   // Simplify mul instructions with a constant RHS.
-  if (isa<Constant>(Op1)) {    
+  if (isa<Constant>(Op1)) {
     // Try to fold constant mul into select arguments.
     if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
       if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -181,7 +181,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     Value *Op1C = Op1;
     BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
     if (!BO ||
-        (BO->getOpcode() != Instruction::UDiv && 
+        (BO->getOpcode() != Instruction::UDiv &&
          BO->getOpcode() != Instruction::SDiv)) {
       Op1C = Op0;
       BO = dyn_cast<BinaryOperator>(Op1);
@@ -227,14 +227,14 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     if (match(Op1, m_Shl(m_One(), m_Value(Y))))
       return BinaryOperator::CreateShl(Op0, Y);
   }
-  
+
   // If one of the operands of the multiply is a cast from a boolean value, then
   // we know the bool is either zero or one, so this is a 'masking' multiply.
   //   X * Y (where Y is 0 or 1) -> X & (0-Y)
   if (!I.getType()->isVectorTy()) {
     // -2 is "-1 << 1" so it is all bits set except the low one.
     APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
-    
+
     Value *BoolCast = 0, *OtherOp = 0;
     if (MaskedValueIsZero(Op0, Negative2))
       BoolCast = Op0, OtherOp = Op1;
@@ -280,7 +280,7 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
      return;
    if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
      return;
-              
+
    ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0));
    if (CFP && CFP->isExactlyValue(0.5)) {
      Y = I->getOperand(1);
@@ -289,14 +289,14 @@ static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
    CFP = dyn_cast<ConstantFP>(I->getOperand(1));
    if (CFP && CFP->isExactlyValue(0.5))
      Y = I->getOperand(0);
-} 
+}
 
 /// Helper function of InstCombiner::visitFMul(BinaryOperator(). It returns
 /// true iff the given value is FMul or FDiv with one and only one operand
 /// being a normal constant (i.e. not Zero/NaN/Infinity).
 static bool isFMulOrFDivWithConstant(Value *V) {
   Instruction *I = dyn_cast<Instruction>(V);
-  if (!I || (I->getOpcode() != Instruction::FMul && 
+  if (!I || (I->getOpcode() != Instruction::FMul &&
              I->getOpcode() != Instruction::FDiv))
     return false;
 
@@ -318,10 +318,10 @@ static bool isNormalFp(const ConstantFP *C) {
 /// foldFMulConst() is a helper routine of InstCombiner::visitFMul().
 /// The input \p FMulOrDiv is a FMul/FDiv with one and only one operand
 /// being a constant (i.e. isFMulOrFDivWithConstant(FMulOrDiv) == true).
-/// This function is to simplify "FMulOrDiv * C" and returns the 
+/// This function is to simplify "FMulOrDiv * C" and returns the
 /// resulting expression. Note that this function could return NULL in
 /// case the constants cannot be folded into a normal floating-point.
-/// 
+///
 Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
                                    Instruction *InsertBefore) {
   assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid");
@@ -351,7 +351,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, ConstantFP *C,
       if (isNormalFp(F)) {
         R = BinaryOperator::CreateFMul(Opnd0, F);
       } else {
-        // (X / C1) * C => X / (C1/C) 
+        // (X / C1) * C => X / (C1/C)
         Constant *F = ConstantExpr::getFDiv(C1, C);
         if (isNormalFp(cast<ConstantFP>(F)))
           R = BinaryOperator::CreateFDiv(Opnd0, F);
@@ -415,13 +415,13 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
         if (C0) {
           std::swap(C0, C1);
           std::swap(Opnd0, Opnd1);
-          Swap = true; 
+          Swap = true;
         }
 
         if (C1 && C1->getValueAPF().isNormal() &&
             isFMulOrFDivWithConstant(Opnd0)) {
           Value *M1 = ConstantExpr::getFMul(C1, C);
-          Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ? 
+          Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ?
                       foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
                       0;
           if (M0 && M1) {
@@ -495,7 +495,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     }
 
     // (X*Y) * X => (X*X) * Y where Y != X
-    //  The purpose is two-fold: 
+    //  The purpose is two-fold:
     //   1) to form a power expression (of X).
     //   2) potentially shorten the critical path: After transformation, the
     //  latency of the instruction Y is amortized by the expression of X*X,
@@ -524,6 +524,35 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
       }
     }
 
+    // B * (uitofp i1 C) -> select C, B, 0
+    if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+      Value *LHS = Op0, *RHS = Op1;
+      Value *B, *C;
+      if (!match(RHS, m_UIToFp(m_Value(C))))
+        std::swap(LHS, RHS);
+
+      if (match(RHS, m_UIToFp(m_Value(C))) && C->getType()->isIntegerTy(1)) {
+        B = LHS;
+        Value *Zero = ConstantFP::getNegativeZero(B->getType());
+        return SelectInst::Create(C, B, Zero);
+      }
+    }
+
+    // A * (1 - uitofp i1 C) -> select C, 0, A
+    if (I.hasNoNaNs() && I.hasNoInfs() && I.hasNoSignedZeros()) {
+      Value *LHS = Op0, *RHS = Op1;
+      Value *A, *C;
+      if (!match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))))
+        std::swap(LHS, RHS);
+
+      if (match(RHS, m_FSub(m_FPOne(), m_UIToFp(m_Value(C)))) &&
+          C->getType()->isIntegerTy(1)) {
+        A = LHS;
+        Value *Zero = ConstantFP::getNegativeZero(A->getType());
+        return SelectInst::Create(C, Zero, A);
+      }
+    }
+
     if (!isa<Constant>(Op1))
       std::swap(Opnd0, Opnd1);
     else
@@ -537,7 +566,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
 /// instruction.
 bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
   SelectInst *SI = cast<SelectInst>(I.getOperand(1));
-  
+
   // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
   int NonNullOperand = -1;
   if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
@@ -547,36 +576,36 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
   if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
     if (ST->isNullValue())
       NonNullOperand = 1;
-  
+
   if (NonNullOperand == -1)
     return false;
-  
+
   Value *SelectCond = SI->getOperand(0);
-  
+
   // Change the div/rem to use 'Y' instead of the select.
   I.setOperand(1, SI->getOperand(NonNullOperand));
-  
+
   // Okay, we know we replace the operand of the div/rem with 'Y' with no
   // problem.  However, the select, or the condition of the select may have
   // multiple uses.  Based on our knowledge that the operand must be non-zero,
   // propagate the known value for the select into other uses of it, and
   // propagate a known value of the condition into its other users.
-  
+
   // If the select and condition only have a single use, don't bother with this,
   // early exit.
   if (SI->use_empty() && SelectCond->hasOneUse())
     return true;
-  
+
   // Scan the current block backward, looking for other uses of SI.
   BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
-  
+
   while (BBI != BBFront) {
     --BBI;
     // If we found a call to a function, we can't assume it will return, so
     // information from below it cannot be propagated above it.
     if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
       break;
-    
+
     // Replace uses of the select or its condition with the known values.
     for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
          I != E; ++I) {
@@ -589,17 +618,17 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
         Worklist.Add(BBI);
       }
     }
-    
+
     // If we past the instruction, quit looking for it.
     if (&*BBI == SI)
       SI = 0;
     if (&*BBI == SelectCond)
       SelectCond = 0;
-    
+
     // If we ran out of things to eliminate, break out of the loop.
     if (SelectCond == 0 && SI == 0)
       break;
-    
+
   }
   return true;
 }
@@ -617,7 +646,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
     I.setOperand(1, V);
     return &I;
   }
-  
+
   // Handle cases involving: [su]div X, (select Cond, Y, Z)
   // This does not apply for fdiv.
   if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
@@ -683,16 +712,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   // Handle the integer div common cases
   if (Instruction *Common = commonIDivTransforms(I))
     return Common;
-  
-  { 
+
+  {
     // X udiv 2^C -> X >> C
     // Check to see if this is an unsigned division with an exact power of 2,
     // if so, convert to a right shift.
     const APInt *C;
     if (match(Op1, m_Power2(C))) {
       BinaryOperator *LShr =
-      BinaryOperator::CreateLShr(Op0, 
-                                 ConstantInt::get(Op0->getType(), 
+      BinaryOperator::CreateLShr(Op0,
+                                 ConstantInt::get(Op0->getType(),
                                                   C->logBase2()));
       if (I.isExact()) LShr->setIsExact();
       return LShr;
@@ -732,7 +761,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
       return BinaryOperator::CreateLShr(Op0, N);
     }
   }
-  
+
   // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
   // where C1&C2 are powers of two.
   { Value *Cond; const APInt *C1, *C2;
@@ -740,11 +769,11 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
       // Construct the "on true" case of the select
       Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
                                        I.isExact());
-  
+
       // Construct the "on false" case of the select
       Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
                                        I.isExact());
-      
+
       // construct the select instruction and return it.
       return SelectInst::Create(Cond, TSI, FSI);
     }
@@ -799,7 +828,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
         // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
         return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
       }
-      
+
       if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
         // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
         // Safe because the only negative value (1 << Y) can take on is
@@ -809,13 +838,13 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
       }
     }
   }
-  
+
   return 0;
 }
 
 /// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
 /// FP value and:
-///    1) 1/C is exact, or 
+///    1) 1/C is exact, or
 ///    2) reciprocal is allowed.
 /// If the convertion was successful, the simplified expression "X * 1/C" is
 /// returned; otherwise, NULL is returned.
@@ -826,7 +855,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
   const APFloat &FpVal = Divisor->getValueAPF();
   APFloat Reciprocal(FpVal.getSemantics());
   bool Cvt = FpVal.getExactInverse(&Reciprocal);
-    
+
   if (!Cvt && AllowReciprocal && FpVal.isNormal()) {
     Reciprocal = APFloat(FpVal.getSemantics(), 1.0f);
     (void)Reciprocal.divide(FpVal, APFloat::rmNearestTiesToEven);
@@ -870,10 +899,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
         Constant *C = ConstantExpr::getFMul(C1, C2);
         const APFloat &F = cast<ConstantFP>(C)->getValueAPF();
         if (F.isNormal() && !F.isDenormal()) {
-          Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C), 
+          Res = CvtFDivConstToReciprocal(X, cast<ConstantFP>(C),
                                          AllowReciprocal);
           if (!Res)
-            Res = BinaryOperator::CreateFDiv(X, C); 
+            Res = BinaryOperator::CreateFDiv(X, C);
         }
       }
 
@@ -911,7 +940,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
     if (Fold) {
       const APFloat &FoldC = cast<ConstantFP>(Fold)->getValueAPF();
       if (FoldC.isNormal() && !FoldC.isDenormal()) {
-        Instruction *R = CreateDiv ? 
+        Instruction *R = CreateDiv ?
                          BinaryOperator::CreateFDiv(Fold, X) :
                          BinaryOperator::CreateFMul(X, Fold);
         R->setFastMathFlags(I.getFastMathFlags());
@@ -997,7 +1026,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
 
   if (Instruction *common = commonIRemTransforms(I))
     return common;
-  
+
   // X urem C^2 -> X and C-1
   { const APInt *C;
     if (match(Op1, m_Power2(C)))
@@ -1005,7 +1034,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
                                        ConstantInt::get(I.getType(), *C-1));
   }
 
-  // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)  
+  // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)
   if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
     Constant *N1 = Constant::getAllOnesValue(I.getType());
     Value *Add = Builder->CreateAdd(Op1, N1);
@@ -1041,7 +1070,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
   // Handle the integer rem common cases
   if (Instruction *Common = commonIRemTransforms(I))
     return Common;
-  
+
   if (Value *RHSNeg = dyn_castNegVal(Op1))
     if (!isa<Constant>(RHSNeg) ||
         (isa<ConstantInt>(RHSNeg) &&
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index b0a998c..bd14e81 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -27,10 +27,10 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   unsigned Opc = FirstInst->getOpcode();
   Value *LHSVal = FirstInst->getOperand(0);
   Value *RHSVal = FirstInst->getOperand(1);
-    
+
   Type *LHSType = LHSVal->getType();
   Type *RHSType = RHSVal->getType();
-  
+
   bool isNUW = false, isNSW = false, isExact = false;
   if (OverflowingBinaryOperator *BO =
         dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
@@ -39,7 +39,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   } else if (PossiblyExactOperator *PEO =
                dyn_cast<PossiblyExactOperator>(FirstInst))
     isExact = PEO->isExact();
-  
+
   // Scan to see if all operands are the same opcode, and all have one use.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
@@ -54,14 +54,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
     if (CmpInst *CI = dyn_cast<CmpInst>(I))
       if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
         return 0;
-    
+
     if (isNUW)
       isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
     if (isNSW)
       isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
     if (isExact)
       isExact = cast<PossiblyExactOperator>(I)->isExact();
-    
+
     // Keep track of which operand needs a phi node.
     if (I->getOperand(0) != LHSVal) LHSVal = 0;
     if (I->getOperand(1) != RHSVal) RHSVal = 0;
@@ -73,9 +73,9 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   // bad when the PHIs are in the header of a loop.
   if (!LHSVal && !RHSVal)
     return 0;
-  
+
   // Otherwise, this is safe to transform!
-  
+
   Value *InLHS = FirstInst->getOperand(0);
   Value *InRHS = FirstInst->getOperand(1);
   PHINode *NewLHS = 0, *NewRHS = 0;
@@ -86,7 +86,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
     InsertNewInstBefore(NewLHS, PN);
     LHSVal = NewLHS;
   }
-  
+
   if (RHSVal == 0) {
     NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
                              FirstInst->getOperand(1)->getName() + ".pn");
@@ -94,7 +94,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
     InsertNewInstBefore(NewRHS, PN);
     RHSVal = NewRHS;
   }
-  
+
   // Add all operands to the new PHIs.
   if (NewLHS || NewRHS) {
     for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
@@ -109,7 +109,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
       }
     }
   }
-    
+
   if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) {
     CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
                                      LHSVal, RHSVal);
@@ -129,8 +129,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
 
 Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
-  
-  SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), 
+
+  SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(),
                                         FirstInst->op_end());
   // This is true if all GEP bases are allocas and if all indices into them are
   // constants.
@@ -140,9 +140,9 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   // more than one phi, which leads to higher register pressure. This is
   // especially bad when the PHIs are in the header of a loop.
   bool NeededPhi = false;
-  
+
   bool AllInBounds = true;
-  
+
   // Scan to see if all operands are the same opcode, and all have one use.
   for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
     GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
@@ -151,18 +151,18 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
       return 0;
 
     AllInBounds &= GEP->isInBounds();
-    
+
     // Keep track of whether or not all GEPs are of alloca pointers.
     if (AllBasePointersAreAllocas &&
         (!isa<AllocaInst>(GEP->getOperand(0)) ||
          !GEP->hasAllConstantIndices()))
       AllBasePointersAreAllocas = false;
-    
+
     // Compare the operand lists.
     for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
       if (FirstInst->getOperand(op) == GEP->getOperand(op))
         continue;
-      
+
       // Don't merge two GEPs when two operands differ (introducing phi nodes)
       // if one of the PHIs has a constant for the index.  The index may be
       // substantially cheaper to compute for the constants, so making it a
@@ -171,7 +171,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
       if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
           isa<ConstantInt>(GEP->getOperand(op)))
         return 0;
-      
+
       if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
         return 0;
 
@@ -186,7 +186,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
       NeededPhi = true;
     }
   }
-  
+
   // If all of the base pointers of the PHI'd GEPs are from allocas, don't
   // bother doing this transformation.  At best, this will just save a bit of
   // offset calculation, but all the predecessors will have to materialize the
@@ -195,11 +195,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
   // which can usually all be folded into the load.
   if (AllBasePointersAreAllocas)
     return 0;
-  
+
   // Otherwise, this is safe to transform.  Insert PHI nodes for each operand
   // that is variable.
   SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
-  
+
   bool HasAnyPHIs = false;
   for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
     if (FixedOperands[i]) continue;  // operand doesn't need a phi.
@@ -207,28 +207,28 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
     PHINode *NewPN = PHINode::Create(FirstOp->getType(), e,
                                      FirstOp->getName()+".pn");
     InsertNewInstBefore(NewPN, PN);
-    
+
     NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
     OperandPhis[i] = NewPN;
     FixedOperands[i] = NewPN;
     HasAnyPHIs = true;
   }
 
-  
+
   // Add all operands to the new PHIs.
   if (HasAnyPHIs) {
     for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
       GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
       BasicBlock *InBB = PN.getIncomingBlock(i);
-      
+
       for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
         if (PHINode *OpPhi = OperandPhis[op])
           OpPhi->addIncoming(InGEP->getOperand(op), InBB);
     }
   }
-  
+
   Value *Base = FixedOperands[0];
-  GetElementPtrInst *NewGEP = 
+  GetElementPtrInst *NewGEP =
     GetElementPtrInst::Create(Base, makeArrayRef(FixedOperands).slice(1));
   if (AllInBounds) NewGEP->setIsInBounds();
   NewGEP->setDebugLoc(FirstInst->getDebugLoc());
@@ -246,11 +246,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
 /// to a register.
 static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
   BasicBlock::iterator BBI = L, E = L->getParent()->end();
-  
+
   for (++BBI; BBI != E; ++BBI)
     if (BBI->mayWriteToMemory())
       return false;
-  
+
   // Check for non-address taken alloca.  If not address-taken already, it isn't
   // profitable to do this xform.
   if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
@@ -266,11 +266,11 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
       isAddressTaken = true;
       break;
     }
-    
+
     if (!isAddressTaken && AI->isStaticAlloca())
       return false;
   }
-  
+
   // If this load is a load from a GEP with a constant offset from an alloca,
   // then we don't want to sink it.  In its present form, it will be
   // load [constant stack offset].  Sinking it will cause us to have to
@@ -280,7 +280,7 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
     if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
       if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
         return false;
-  
+
   return true;
 }
 
@@ -300,41 +300,41 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
   bool isVolatile = FirstLI->isVolatile();
   unsigned LoadAlignment = FirstLI->getAlignment();
   unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
-  
+
   // We can't sink the load if the loaded value could be modified between the
   // load and the PHI.
   if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
       !isSafeAndProfitableToSinkLoad(FirstLI))
     return 0;
-  
+
   // If the PHI is of volatile loads and the load block has multiple
   // successors, sinking it would remove a load of the volatile value from
   // the path through the other successor.
-  if (isVolatile && 
+  if (isVolatile &&
       FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
     return 0;
-  
+
   // Check to see if all arguments are the same operation.
   for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
     LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
     if (!LI || !LI->hasOneUse())
       return 0;
-    
-    // We can't sink the load if the loaded value could be modified between 
+
+    // We can't sink the load if the loaded value could be modified between
     // the load and the PHI.
     if (LI->isVolatile() != isVolatile ||
         LI->getParent() != PN.getIncomingBlock(i) ||
         LI->getPointerAddressSpace() != LoadAddrSpace ||
         !isSafeAndProfitableToSinkLoad(LI))
       return 0;
-      
+
     // If some of the loads have an alignment specified but not all of them,
     // we can't do the transformation.
     if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
       return 0;
-    
+
     LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
-    
+
     // If the PHI is of volatile loads and the load block has multiple
     // successors, sinking it would remove a load of the volatile value from
     // the path through the other successor.
@@ -342,16 +342,16 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
         LI->getParent()->getTerminator()->getNumSuccessors() != 1)
       return 0;
   }
-  
+
   // Okay, they are all the same operation.  Create a new PHI node of the
   // correct type, and PHI together all of the LHS's of the instructions.
   PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
                                    PN.getNumIncomingValues(),
                                    PN.getName()+".in");
-  
+
   Value *InVal = FirstLI->getOperand(0);
   NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
-  
+
   // Add all operands to the new PHI.
   for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
     Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
@@ -359,7 +359,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
       InVal = 0;
     NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
   }
-  
+
   Value *PhiVal;
   if (InVal) {
     // The new PHI unions all of the same values together.  This is really
@@ -370,14 +370,14 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
     InsertNewInstBefore(NewPN, PN);
     PhiVal = NewPN;
   }
-  
+
   // If this was a volatile load that we are merging, make sure to loop through
   // and mark all the input loads as non-volatile.  If we don't do this, we will
   // insert a new volatile load and the old ones will not be deletable.
   if (isVolatile)
     for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
       cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
-  
+
   LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
   NewLI->setDebugLoc(FirstLI->getDebugLoc());
   return NewLI;
@@ -395,7 +395,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     return FoldPHIArgGEPIntoPHI(PN);
   if (isa<LoadInst>(FirstInst))
     return FoldPHIArgLoadIntoPHI(PN);
-  
+
   // Scan the instruction, looking for input operations that can be folded away.
   // If all input operands to the phi are the same instruction (e.g. a cast from
   // the same type or "+42") we can pull the operation through the PHI, reducing
@@ -403,7 +403,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   Constant *ConstantOp = 0;
   Type *CastSrcTy = 0;
   bool isNUW = false, isNSW = false, isExact = false;
-  
+
   if (isa<CastInst>(FirstInst)) {
     CastSrcTy = FirstInst->getOperand(0)->getType();
 
@@ -414,12 +414,12 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
         return 0;
     }
   } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
-    // Can fold binop, compare or shift here if the RHS is a constant, 
+    // Can fold binop, compare or shift here if the RHS is a constant,
     // otherwise call FoldPHIArgBinOpIntoPHI.
     ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
     if (ConstantOp == 0)
       return FoldPHIArgBinOpIntoPHI(PN);
-    
+
     if (OverflowingBinaryOperator *BO =
         dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
       isNUW = BO->hasNoUnsignedWrap();
@@ -442,7 +442,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     } else if (I->getOperand(1) != ConstantOp) {
       return 0;
     }
-    
+
     if (isNUW)
       isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
     if (isNSW)
@@ -486,7 +486,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     NewCI->setDebugLoc(FirstInst->getDebugLoc());
     return NewCI;
   }
-  
+
   if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
     BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
     if (isNUW) BinOp->setHasNoUnsignedWrap();
@@ -495,7 +495,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     BinOp->setDebugLoc(FirstInst->getDebugLoc());
     return BinOp;
   }
-  
+
   CmpInst *CIOp = cast<CmpInst>(FirstInst);
   CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
                                    PhiVal, ConstantOp);
@@ -513,7 +513,7 @@ static bool DeadPHICycle(PHINode *PN,
   // Remember this node, and if we find the cycle, return.
   if (!PotentiallyDeadPHIs.insert(PN))
     return true;
-  
+
   // Don't scan crazily complex things.
   if (PotentiallyDeadPHIs.size() == 16)
     return false;
@@ -527,16 +527,16 @@ static bool DeadPHICycle(PHINode *PN,
 /// PHIsEqualValue - Return true if this phi node is always equal to
 /// NonPhiInVal.  This happens with mutually cyclic phi nodes like:
 ///   z = some value; x = phi (y, z); y = phi (x, z)
-static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, 
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
                            SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
   // See if we already saw this PHI node.
   if (!ValueEqualPHIs.insert(PN))
     return true;
-  
+
   // Don't scan crazily complex things.
   if (ValueEqualPHIs.size() == 16)
     return false;
- 
+
   // Scan the operands to see if they are either phi nodes or are equal to
   // the value.
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
@@ -547,7 +547,7 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
     } else if (Op != NonPhiInVal)
       return false;
   }
-  
+
   return true;
 }
 
@@ -557,10 +557,10 @@ struct PHIUsageRecord {
   unsigned PHIId;     // The ID # of the PHI (something determinstic to sort on)
   unsigned Shift;     // The amount shifted.
   Instruction *Inst;  // The trunc instruction.
-  
+
   PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
     : PHIId(pn), Shift(Sh), Inst(User) {}
-  
+
   bool operator<(const PHIUsageRecord &RHS) const {
     if (PHIId < RHS.PHIId) return true;
     if (PHIId > RHS.PHIId) return false;
@@ -570,15 +570,15 @@ struct PHIUsageRecord {
            RHS.Inst->getType()->getPrimitiveSizeInBits();
   }
 };
-  
+
 struct LoweredPHIRecord {
   PHINode *PN;        // The PHI that was lowered.
   unsigned Shift;     // The amount shifted.
   unsigned Width;     // The width extracted.
-  
+
   LoweredPHIRecord(PHINode *pn, unsigned Sh, Type *Ty)
     : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
-  
+
   // Ctor form used by DenseMap.
   LoweredPHIRecord(PHINode *pn, unsigned Sh)
     : PN(pn), Shift(Sh), Width(0) {}
@@ -621,20 +621,20 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
   // PHIUsers - Keep track of all of the truncated values extracted from a set
   // of PHIs, along with their offset.  These are the things we want to rewrite.
   SmallVector<PHIUsageRecord, 16> PHIUsers;
-  
+
   // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
   // nodes which are extracted from. PHIsToSlice is a set we use to avoid
   // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
   // check the uses of (to ensure they are all extracts).
   SmallVector<PHINode*, 8> PHIsToSlice;
   SmallPtrSet<PHINode*, 8> PHIsInspected;
-  
+
   PHIsToSlice.push_back(&FirstPhi);
   PHIsInspected.insert(&FirstPhi);
-  
+
   for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
     PHINode *PN = PHIsToSlice[PHIId];
-    
+
     // Scan the input list of the PHI.  If any input is an invoke, and if the
     // input is defined in the predecessor, then we won't be split the critical
     // edge which is required to insert a truncate.  Because of this, we have to
@@ -644,85 +644,85 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
       if (II == 0) continue;
       if (II->getParent() != PN->getIncomingBlock(i))
         continue;
-     
+
       // If we have a phi, and if it's directly in the predecessor, then we have
       // a critical edge where we need to put the truncate.  Since we can't
       // split the edge in instcombine, we have to bail out.
       return 0;
     }
-      
-    
+
+
     for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
          UI != E; ++UI) {
       Instruction *User = cast<Instruction>(*UI);
-      
+
       // If the user is a PHI, inspect its uses recursively.
       if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
         if (PHIsInspected.insert(UserPN))
           PHIsToSlice.push_back(UserPN);
         continue;
       }
-      
+
       // Truncates are always ok.
       if (isa<TruncInst>(User)) {
         PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
         continue;
       }
-      
+
       // Otherwise it must be a lshr which can only be used by one trunc.
       if (User->getOpcode() != Instruction::LShr ||
           !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
           !isa<ConstantInt>(User->getOperand(1)))
         return 0;
-      
+
       unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
       PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
     }
   }
-  
+
   // If we have no users, they must be all self uses, just nuke the PHI.
   if (PHIUsers.empty())
     return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
-  
+
   // If this phi node is transformable, create new PHIs for all the pieces
   // extracted out of it.  First, sort the users by their offset and size.
   array_pod_sort(PHIUsers.begin(), PHIUsers.end());
-  
+
   DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
             for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
               errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
         );
-  
+
   // PredValues - This is a temporary used when rewriting PHI nodes.  It is
   // hoisted out here to avoid construction/destruction thrashing.
   DenseMap<BasicBlock*, Value*> PredValues;
-  
+
   // ExtractedVals - Each new PHI we introduce is saved here so we don't
   // introduce redundant PHIs.
   DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
-  
+
   for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
     unsigned PHIId = PHIUsers[UserI].PHIId;
     PHINode *PN = PHIsToSlice[PHIId];
     unsigned Offset = PHIUsers[UserI].Shift;
     Type *Ty = PHIUsers[UserI].Inst->getType();
-    
+
     PHINode *EltPHI;
-    
+
     // If we've already lowered a user like this, reuse the previously lowered
     // value.
     if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
-      
+
       // Otherwise, Create the new PHI node for this user.
       EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
                                PN->getName()+".off"+Twine(Offset), PN);
       assert(EltPHI->getType() != PN->getType() &&
              "Truncate didn't shrink phi?");
-    
+
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
         BasicBlock *Pred = PN->getIncomingBlock(i);
         Value *&PredVal = PredValues[Pred];
-        
+
         // If we already have a value for this predecessor, reuse it.
         if (PredVal) {
           EltPHI->addIncoming(PredVal, Pred);
@@ -736,7 +736,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
           EltPHI->addIncoming(PredVal, Pred);
           continue;
         }
-        
+
         if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
           // If the incoming value was a PHI, and if it was one of the PHIs we
           // already rewrote it, just use the lowered value.
@@ -746,7 +746,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
             continue;
           }
         }
-        
+
         // Otherwise, do an extract in the predecessor.
         Builder->SetInsertPoint(Pred, Pred->getTerminator());
         Value *Res = InVal;
@@ -756,7 +756,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
         Res = Builder->CreateTrunc(Res, Ty, "extract.t");
         PredVal = Res;
         EltPHI->addIncoming(Res, Pred);
-        
+
         // If the incoming value was a PHI, and if it was one of the PHIs we are
         // rewriting, we will ultimately delete the code we inserted.  This
         // means we need to revisit that PHI to make sure we extract out the
@@ -765,22 +765,22 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
           if (PHIsInspected.count(OldInVal)) {
             unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
                                           OldInVal)-PHIsToSlice.begin();
-            PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, 
+            PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
                                               cast<Instruction>(Res)));
             ++UserE;
           }
       }
       PredValues.clear();
-      
+
       DEBUG(errs() << "  Made element PHI for offset " << Offset << ": "
                    << *EltPHI << '\n');
       ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
     }
-    
+
     // Replace the use of this piece with the PHI node.
     ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
   }
-  
+
   // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
   // with undefs.
   Value *Undef = UndefValue::get(FirstPhi.getType());
@@ -818,7 +818,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
       if (DeadPHICycle(PU, PotentiallyDeadPHIs))
         return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
     }
-   
+
     // If this phi has a single use, and if that use just computes a value for
     // the next iteration of a loop, delete the phi.  This occurs with unused
     // induction variables, e.g. "for (int j = 0; ; ++j);".  Detecting this
@@ -847,7 +847,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
 
     if (InValNo != NumIncomingVals) {
       Value *NonPhiInVal = PN.getIncomingValue(InValNo);
-      
+
       // Scan the rest of the operands to see if there are any conflicts, if so
       // there is no need to recursively scan other phis.
       for (++InValNo; InValNo != NumIncomingVals; ++InValNo) {
@@ -855,7 +855,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
         if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
           break;
       }
-      
+
       // If we scanned over all operands, then we have one unique value plus
       // phi values.  Scan PHI nodes to see if they all merge in each other or
       // the value.
@@ -899,6 +899,6 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
       !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
     if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
       return Res;
-  
+
   return 0;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 121aa1f..59502fb 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -350,6 +350,68 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
   return 0;
 }
 
+/// foldSelectICmpAndOr - We want to turn:
+///   (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
+/// into:
+///   (or (shl (and X, C1), C3), y)
+/// iff:
+///   C1 and C2 are both powers of 2
+/// where:
+///   C3 = Log(C2) - Log(C1)
+///
+/// This transform handles cases where:
+/// 1. The icmp predicate is inverted
+/// 2. The select operands are reversed
+/// 3. The magnitude of C2 and C1 are flipped
+static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
+                                  Value *FalseVal,
+                                  InstCombiner::BuilderTy *Builder) {
+  const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+  if (!IC || !IC->isEquality())
+    return 0;
+
+  Value *CmpLHS = IC->getOperand(0);
+  Value *CmpRHS = IC->getOperand(1);
+
+  if (!match(CmpRHS, m_Zero()))
+    return 0;
+
+  Value *X;
+  const APInt *C1;
+  if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1))))
+    return 0;
+
+  const APInt *C2;
+  bool OrOnTrueVal = false;
+  bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2)));
+  if (!OrOnFalseVal)
+    OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2)));
+
+  if (!OrOnFalseVal && !OrOnTrueVal)
+    return 0;
+
+  Value *V = CmpLHS;
+  Value *Y = OrOnFalseVal ? TrueVal : FalseVal;
+
+  unsigned C1Log = C1->logBase2();
+  unsigned C2Log = C2->logBase2();
+  if (C2Log > C1Log) {
+    V = Builder->CreateZExtOrTrunc(V, Y->getType());
+    V = Builder->CreateShl(V, C2Log - C1Log);
+  } else if (C1Log > C2Log) {
+    V = Builder->CreateLShr(V, C1Log - C2Log);
+    V = Builder->CreateZExtOrTrunc(V, Y->getType());
+  } else
+    V = Builder->CreateZExtOrTrunc(V, Y->getType());
+
+  ICmpInst::Predicate Pred = IC->getPredicate();
+  if ((Pred == ICmpInst::ICMP_NE && OrOnFalseVal) ||
+      (Pred == ICmpInst::ICMP_EQ && OrOnTrueVal))
+    V = Builder->CreateXor(V, *C2);
+
+  return Builder->CreateOr(V, Y);
+}
+
 /// visitSelectInstWithICmp - Visit a SelectInst that has an
 /// ICmpInst as its first operand.
 ///
@@ -521,6 +583,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
     }
   }
 
+  if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder))
+    return ReplaceInstUsesWith(SI, V);
+
   return Changed ? &SI : 0;
 }
 
@@ -676,7 +741,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       // Change: A = select B, false, C --> A = and !B, C
       Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
       return BinaryOperator::CreateAnd(NotCond, FalseVal);
-    } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+    }
+    if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
       if (C->getZExtValue() == false) {
         // Change: A = select B, C, false --> A = and B, C
         return BinaryOperator::CreateAnd(CondVal, TrueVal);
@@ -690,14 +756,14 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     // select a, a, b  -> a|b
     if (CondVal == TrueVal)
       return BinaryOperator::CreateOr(CondVal, FalseVal);
-    else if (CondVal == FalseVal)
+    if (CondVal == FalseVal)
       return BinaryOperator::CreateAnd(CondVal, TrueVal);
 
     // select a, ~a, b -> (~a)&b
     // select a, b, ~a -> (~a)|b
     if (match(TrueVal, m_Not(m_Specific(CondVal))))
       return BinaryOperator::CreateAnd(TrueVal, FalseVal);
-    else if (match(FalseVal, m_Not(m_Specific(CondVal))))
+    if (match(FalseVal, m_Not(m_Specific(CondVal))))
       return BinaryOperator::CreateOr(TrueVal, FalseVal);
   }
 
@@ -838,7 +904,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
             Value *NewFalseOp = NegVal;
             if (AddOp != TI)
               std::swap(NewTrueOp, NewFalseOp);
-            Value *NewSel = 
+            Value *NewSel =
               Builder->CreateSelect(CondVal, NewTrueOp,
                                     NewFalseOp, SI.getName() + ".p");
 
@@ -862,7 +928,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     Value *LHS, *RHS, *LHS2, *RHS2;
     if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
       if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
-        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, 
+        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
                                           SI, SPF, RHS))
           return R;
       if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
@@ -908,7 +974,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     return &SI;
   }
 
-  if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) {
+  if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
     unsigned VWidth = VecTy->getNumElements();
     APInt UndefElts(VWidth, 0);
     APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
@@ -918,24 +984,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       return &SI;
     }
 
-    if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) {
-      // Form a shufflevector instruction.
-      SmallVector<Constant *, 8> Mask(VWidth);
-      Type *Int32Ty = Type::getInt32Ty(CV->getContext());
-      for (unsigned i = 0; i != VWidth; ++i) {
-        Constant *Elem = cast<Constant>(CV->getOperand(i));
-        if (ConstantInt *E = dyn_cast<ConstantInt>(Elem))
-          Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0));
-        else if (isa<UndefValue>(Elem))
-          Mask[i] = UndefValue::get(Int32Ty);
-        else
-          return 0;
-      }
-      Constant *MaskVal = ConstantVector::get(Mask);
-      Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal);
-      return ReplaceInstUsesWith(SI, V);
-    }
-
     if (isa<ConstantAggregateZero>(CondVal)) {
       return ReplaceInstUsesWith(SI, FalseVal);
     }
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 4f71db1..4301ddb 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -105,6 +105,75 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
   return 0;
 }
 
+// If we have a PHI node with a vector type that has only 2 uses: feed
+// itself and be an operand of extractelemnt at a constant location,
+// try to replace the PHI of the vector type with a PHI of a scalar type
+Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
+  // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
+  if (!PN->hasNUses(2))
+    return NULL;
+
+  // If so, it's known at this point that one operand is PHI and the other is
+  // an extractelement node. Find the PHI user that is not the extractelement
+  // node.
+  Value::use_iterator iu = PN->use_begin();
+  Instruction *PHIUser = dyn_cast<Instruction>(*iu);
+  if (PHIUser == cast<Instruction>(&EI))
+    PHIUser = cast<Instruction>(*(++iu));
+
+  // Verify that this PHI user has one use, which is the PHI itself,
+  // and that it is a binary operation which is cheap to scalarize.
+  // otherwise return NULL.
+  if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
+    !(isa<BinaryOperator>(PHIUser)) ||
+    !CheapToScalarize(PHIUser, true))
+    return NULL;
+
+  // Create a scalar PHI node that will replace the vector PHI node
+  // just before the current PHI node.
+  PHINode * scalarPHI = cast<PHINode>(
+    InsertNewInstWith(PHINode::Create(EI.getType(),
+    PN->getNumIncomingValues(), ""), *PN));
+  // Scalarize each PHI operand.
+  for (unsigned i=0; i < PN->getNumIncomingValues(); i++) {
+    Value *PHIInVal = PN->getIncomingValue(i);
+    BasicBlock *inBB = PN->getIncomingBlock(i);
+    Value *Elt = EI.getIndexOperand();
+    // If the operand is the PHI induction variable:
+    if (PHIInVal == PHIUser) {
+      // Scalarize the binary operation. Its first operand is the
+      // scalar PHI and the second operand is extracted from the other
+      // vector operand.
+      BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
+      unsigned opId = (B0->getOperand(0) == PN) ? 1: 0;
+      Value *Op = Builder->CreateExtractElement(
+        B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt");
+      Value *newPHIUser = InsertNewInstWith(
+        BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op),
+        *B0);
+      scalarPHI->addIncoming(newPHIUser, inBB);
+    } else {
+      // Scalarize PHI input:
+      Instruction *newEI =
+        ExtractElementInst::Create(PHIInVal, Elt, "");
+      // Insert the new instruction into the predecessor basic block.
+      Instruction *pos = dyn_cast<Instruction>(PHIInVal);
+      BasicBlock::iterator InsertPos;
+      if (pos && !isa<PHINode>(pos)) {
+        InsertPos = pos;
+        ++InsertPos;
+      } else {
+        InsertPos = inBB->getFirstInsertionPt();
+      }
+
+      InsertNewInstWith(newEI, *InsertPos);
+
+      scalarPHI->addIncoming(newEI, inBB);
+    }
+  }
+  return ReplaceInstUsesWith(EI, scalarPHI);
+}
+
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   // If vector val is constant with all elements the same, replace EI with
   // that element.  We handle a known element # below.
@@ -149,6 +218,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
           if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
             return new BitCastInst(Elt, EI.getType());
     }
+
+    // If there's a vector PHI feeding a scalar use through this extractelement
+    // instruction, try to scalarize the PHI.
+    if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
+      Instruction *scalarPHI = scalarizePHI(EI, PN);
+      if (scalarPHI)
+        return (scalarPHI);
+    }
   }
 
   if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
@@ -201,10 +278,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
       // Canonicalize extractelement(cast) -> cast(extractelement)
       // bitcasts can change the number of vector elements and they cost nothing
-      if (CI->hasOneUse() && EI.hasOneUse() &&
-          (CI->getOpcode() != Instruction::BitCast)) {
+      if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
         Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
                                                   EI.getIndexOperand());
+        Worklist.AddValue(EE);
         return CastInst::Create(CI->getOpcode(), EE, EI.getType());
       }
     }
@@ -336,6 +413,10 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
 
         if (VecOp == RHS) {
           Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+          // Update Mask to reflect that `ScalarOp' has been inserted at
+          // position `InsertedIdx' within the vector returned by IEI.
+          Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
+
           // Everything but the extracted element is replaced with the RHS.
           for (unsigned i = 0; i != NumElts; ++i) {
             if (i != InsertedIdx)
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index c6115e3..ec10751 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1483,7 +1483,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
       Module *M = II->getParent()->getParent()->getParent();
       Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
       InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
-                         ArrayRef<Value *>(), "", II->getParent());
+                         None, "", II->getParent());
     }
     return EraseInstFromFunction(MI);
   }
diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp
index 927982d..39de4b0 100644
--- a/lib/Transforms/Instrumentation/BlackList.cpp
+++ b/lib/Transforms/Instrumentation/BlackList.cpp
@@ -110,7 +110,8 @@ static StringRef GetGVTypeString(const GlobalVariable &G) {
 bool BlackList::isInInit(const GlobalVariable &G) const {
   return (isIn(*G.getParent()) ||
           inSection("global-init", G.getName()) ||
-          inSection("global-init-type", GetGVTypeString(G)));
+          inSection("global-init-type", GetGVTypeString(G)) ||
+          inSection("global-init-src", G.getParent()->getModuleIdentifier()));
 }
 
 bool BlackList::inSection(const StringRef Section,
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index 8ba1025..9f35396 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 #include "llvm-c/Initialization.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp
index 53a31b0..373168e 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -30,6 +30,7 @@ using namespace llvm::objcarc;
 bool llvm::objcarc::EnableARCOpts;
 static cl::opt<bool, true>
 EnableARCOptimizations("enable-objc-arc-opts",
+                       cl::desc("enable/disable all ARC Optimizations"),
                        cl::location(EnableARCOpts),
                        cl::init(true));
 
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index b96c64f..c43f4f4 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -66,6 +66,8 @@ namespace {
     Constant *RetainAutoreleaseCallee;
     /// Declaration for objc_retainAutoreleaseReturnValue().
     Constant *RetainAutoreleaseRVCallee;
+    /// Declaration for objc_retainAutoreleasedReturnValue().
+    Constant *RetainRVCallee;
 
     /// The inline asm string to insert between calls and RetainRV calls to make
     /// the optimization work on targets which need it.
@@ -77,9 +79,12 @@ namespace {
     SmallPtrSet<CallInst *, 8> StoreStrongCalls;
 
     Constant *getStoreStrongCallee(Module *M);
+    Constant *getRetainRVCallee(Module *M);
     Constant *getRetainAutoreleaseCallee(Module *M);
     Constant *getRetainAutoreleaseRVCallee(Module *M);
 
+    bool OptimizeRetainCall(Function &F, Instruction *Retain);
+
     bool ContractAutorelease(Function &F, Instruction *Autorelease,
                              InstructionClass Class,
                              SmallPtrSet<Instruction *, 4>
@@ -172,6 +177,57 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
   return RetainAutoreleaseRVCallee;
 }
 
+Constant *ObjCARCContract::getRetainRVCallee(Module *M) {
+  if (!RetainRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    Type *Params[] = { I8X };
+    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttributeSet Attribute =
+      AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
+                                  Attribute::NoUnwind);
+    RetainRVCallee =
+      M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
+                             Attribute);
+  }
+  return RetainRVCallee;
+}
+
+/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
+/// return value. We do this late so we do not disrupt the dataflow analysis in
+/// ObjCARCOpt.
+bool
+ObjCARCContract::OptimizeRetainCall(Function &F, Instruction *Retain) {
+  ImmutableCallSite CS(GetObjCArg(Retain));
+  const Instruction *Call = CS.getInstruction();
+  if (!Call)
+    return false;
+  if (Call->getParent() != Retain->getParent())
+    return false;
+
+  // Check that the call is next to the retain.
+  BasicBlock::const_iterator I = Call;
+  ++I;
+  while (IsNoopInstruction(I)) ++I;
+  if (&*I != Retain)
+    return false;
+
+  // Turn it to an objc_retainAutoreleasedReturnValue.
+  Changed = true;
+  ++NumPeeps;
+
+  DEBUG(dbgs() << "Transforming objc_retain => "
+                  "objc_retainAutoreleasedReturnValue since the operand is a "
+                  "return value.\nOld: "<< *Retain << "\n");
+
+  // We do not have to worry about tail calls/does not throw since
+  // retain/retainRV have the same properties.
+  cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+
+  DEBUG(dbgs() << "New: " << *Retain << "\n");
+  return true;
+}
+
 /// Merge an autorelease with a retain into a fused call.
 bool
 ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
@@ -329,6 +385,7 @@ bool ObjCARCContract::doInitialization(Module &M) {
   StoreStrongCallee = 0;
   RetainAutoreleaseCallee = 0;
   RetainAutoreleaseRVCallee = 0;
+  RetainRVCallee = 0;
 
   // Initialize RetainRVMarker.
   RetainRVMarker = 0;
@@ -380,7 +437,6 @@ bool ObjCARCContract::runOnFunction(Function &F) {
     // objc_retainBlock does not necessarily return its argument.
     InstructionClass Class = GetBasicInstructionClass(Inst);
     switch (Class) {
-    case IC_Retain:
     case IC_FusedRetainAutorelease:
     case IC_FusedRetainAutoreleaseRV:
       break;
@@ -389,6 +445,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
       if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
         continue;
       break;
+    case IC_Retain:
+      // Attempt to convert retains to retainrvs if they are next to function
+      // calls.
+      if (!OptimizeRetainCall(F, Inst))
+        break;
+      // If we succeed in our optimization, fall through.
+      // FALLTHROUGH
     case IC_RetainRV: {
       // If we're compiling for a target which needs a special inline-asm
       // marker to do the retainAutoreleasedReturnValue optimization,
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 92d6fc4..43e2e20 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -191,13 +191,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
   do {
     const Value *V = Worklist.pop_back_val();
 
-    DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Visiting: " << *V << "\n");
+    DEBUG(dbgs() << "Visiting: " << *V << "\n");
 
     for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
          UI != UE; ++UI) {
       const User *UUser = *UI;
 
-      DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User: " << *UUser << "\n");
+      DEBUG(dbgs() << "User: " << *UUser << "\n");
 
       // Special - Use by a call (callee or argument) is not considered
       // to be an escape.
@@ -207,8 +207,7 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
       case IC_StoreStrong:
       case IC_Autorelease:
       case IC_AutoreleaseRV: {
-        DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies pointer "
-              "arguments. Pointer Escapes!\n");
+        DEBUG(dbgs() << "User copies pointer arguments. Pointer Escapes!\n");
         // These special functions make copies of their pointer arguments.
         return true;
       }
@@ -223,12 +222,11 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
             isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
 
           if (VisitedSet.insert(UUser)) {
-            DEBUG(dbgs() << "DoesRetainableObjPtrEscape: User copies value. "
-                  "Ptr escapes if result escapes. Adding to list.\n");
+            DEBUG(dbgs() << "User copies value. Ptr escapes if result escapes."
+                  " Adding to list.\n");
             Worklist.push_back(UUser);
           } else {
-            DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Already visited node."
-                  "\n");
+            DEBUG(dbgs() << "Already visited node.\n");
           }
           continue;
         }
@@ -245,13 +243,13 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) {
         continue;
       }
       // Otherwise, conservatively assume an escape.
-      DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Assuming ptr escapes.\n");
+      DEBUG(dbgs() << "Assuming ptr escapes.\n");
       return true;
     }
   } while (!Worklist.empty());
 
   // No escapes found.
-  DEBUG(dbgs() << "DoesRetainableObjPtrEscape: Ptr does not escape.\n");
+  DEBUG(dbgs() << "Ptr does not escape.\n");
   return false;
 }
 
@@ -305,6 +303,16 @@ STATISTIC(NumRets,        "Number of return value forwarding "
                           "retain+autoreleaes eliminated");
 STATISTIC(NumRRs,         "Number of retain+release paths eliminated");
 STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
+STATISTIC(NumRetainsBeforeOpt,
+          "Number of retains before optimization.");
+STATISTIC(NumReleasesBeforeOpt,
+          "Number of releases before optimization.");
+#ifndef NDEBUG
+STATISTIC(NumRetainsAfterOpt,
+          "Number of retains after optimization.");
+STATISTIC(NumReleasesAfterOpt,
+          "Number of releases after optimization.");
+#endif
 
 namespace {
   /// \enum Sequence
@@ -375,7 +383,7 @@ static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
 namespace {
   /// \brief Unidirectional information about either a
   /// retain-decrement-use-release sequence or release-use-decrement-retain
-  /// reverese sequence.
+  /// reverse sequence.
   struct RRInfo {
     /// After an objc_retain, the reference count of the referenced
     /// object is known to be positive. Similarly, before an objc_release, the
@@ -410,6 +418,10 @@ namespace {
       KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {}
 
     void clear();
+
+    bool IsTrackingImpreciseReleases() {
+      return ReleaseMetadata != 0;
+    }
   };
 }
 
@@ -428,7 +440,7 @@ namespace {
     /// True if the reference count is known to be incremented.
     bool KnownPositiveRefCount;
 
-    /// True of we've seen an opportunity for partial RR elimination, such as
+    /// True if we've seen an opportunity for partial RR elimination, such as
     /// pushing calls into a CFG triangle or into one side of a CFG diamond.
     bool Partial;
 
@@ -457,6 +469,7 @@ namespace {
     }
 
     void SetSeq(Sequence NewSeq) {
+      DEBUG(dbgs() << "Old: " << Seq << "; New: " << NewSeq << "\n");
       Seq = NewSeq;
     }
 
@@ -469,7 +482,8 @@ namespace {
     }
 
     void ResetSequenceProgress(Sequence NewSeq) {
-      Seq = NewSeq;
+      DEBUG(dbgs() << "Resetting sequence progress.\n");
+      SetSeq(NewSeq);
       Partial = false;
       RRI.clear();
     }
@@ -706,7 +720,19 @@ void BBState::MergeSucc(const BBState &Other) {
 
 /// Enable/disable ARC sequence annotations.
 static cl::opt<bool>
-EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false));
+EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false),
+                     cl::desc("Enable emission of arc data flow analysis "
+                              "annotations"));
+static cl::opt<bool>
+DisableCheckForCFGHazards("disable-objc-arc-checkforcfghazards", cl::init(false),
+                          cl::desc("Disable check for cfg hazards when "
+                                   "annotating"));
+static cl::opt<std::string>
+ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier",
+                              cl::init(""),
+                              cl::desc("filter out all data flow annotations "
+                                       "but those that apply to the given "
+                                       "target llvm identifier."));
 
 /// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an
 /// instruction so that we can track backwards when post processing via the llvm
@@ -791,6 +817,12 @@ static void AppendMDNodeToInstForPtr(unsigned NodeId,
 /// state of a pointer at the entrance to a basic block.
 static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
                                             Value *Ptr, Sequence Seq) {
+  // If we have a target identifier, make sure that we match it before
+  // continuing.
+  if(!ARCAnnotationTargetIdentifier.empty() &&
+     !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+    return;
+
   Module *M = BB->getParent()->getParent();
   LLVMContext &C = M->getContext();
   Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
@@ -828,6 +860,12 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
 /// of the pointer at the bottom of the basic block.
 static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
                                               Value *Ptr, Sequence Seq) {
+  // If we have a target identifier, make sure that we match it before emitting
+  // an annotation.
+  if(!ARCAnnotationTargetIdentifier.empty() &&
+     !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+    return;
+
   Module *M = BB->getParent()->getParent();
   LLVMContext &C = M->getContext();
   Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
@@ -869,6 +907,12 @@ static void GenerateARCAnnotation(unsigned InstMDId,
                                   Sequence OldSeq,
                                   Sequence NewSeq) {
   if (EnableARCAnnotations) {
+    // If we have a target identifier, make sure that we match it before
+    // emitting an annotation.
+    if(!ARCAnnotationTargetIdentifier.empty() &&
+       !Ptr->getName().equals(ARCAnnotationTargetIdentifier))
+      return;
+
     // First generate the source annotation on our pointer. This will return an
     // MDString* if Ptr actually comes from an instruction implying we can put
     // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL),
@@ -909,27 +953,27 @@ static void GenerateARCAnnotation(unsigned InstMDId,
 
 #define ANNOTATE_BB(_states, _bb, _name, _type, _direction)                   \
   do {                                                                        \
-  if (EnableARCAnnotations) {                                                 \
-    for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(),   \
+    if (EnableARCAnnotations) {                                               \
+      for(BBState::ptr_const_iterator I = (_states)._direction##_ptr_begin(), \
           E = (_states)._direction##_ptr_end(); I != E; ++I) {                \
-      Value *Ptr = const_cast<Value*>(I->first);                              \
-      Sequence Seq = I->second.GetSeq();                                      \
-      GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq);           \
+        Value *Ptr = const_cast<Value*>(I->first);                            \
+        Sequence Seq = I->second.GetSeq();                                    \
+        GenerateARCBB ## _type ## Annotation(_name, (_bb), Ptr, Seq);         \
+      }                                                                       \
     }                                                                         \
-  }                                                                           \
-} while (0)
+  } while (0)
 
-#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock) \
+#define ANNOTATE_BOTTOMUP_BBSTART(_states, _basicblock)                       \
     ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbstart", \
                 Entrance, bottom_up)
-#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock) \
-    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend", \
+#define ANNOTATE_BOTTOMUP_BBEND(_states, _basicblock)                         \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.bottomup.bbend",   \
                 Terminator, bottom_up)
-#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock) \
-    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart", \
+#define ANNOTATE_TOPDOWN_BBSTART(_states, _basicblock)                        \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbstart",  \
                 Entrance, top_down)
-#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock) \
-    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend", \
+#define ANNOTATE_TOPDOWN_BBEND(_states, _basicblock)                          \
+    ANNOTATE_BB(_states, _basicblock, "llvm.arc.annotation.topdown.bbend",    \
                 Terminator, top_down)
 
 #else // !ARC_ANNOTATION
@@ -955,9 +999,6 @@ namespace {
     /// them. These are initialized lazily to avoid cluttering up the Module
     /// with unused declarations.
 
-    /// Declaration for ObjC runtime function
-    /// objc_retainAutoreleasedReturnValue.
-    Constant *RetainRVCallee;
     /// Declaration for ObjC runtime function objc_autoreleaseReturnValue.
     Constant *AutoreleaseRVCallee;
     /// Declaration for ObjC runtime function objc_release.
@@ -991,7 +1032,6 @@ namespace {
     unsigned ARCAnnotationProvenanceSourceMDKind;
 #endif // ARC_ANNOATIONS
 
-    Constant *getRetainRVCallee(Module *M);
     Constant *getAutoreleaseRVCallee(Module *M);
     Constant *getReleaseCallee(Module *M);
     Constant *getRetainCallee(Module *M);
@@ -1000,7 +1040,6 @@ namespace {
 
     bool IsRetainBlockOptimizable(const Instruction *Inst);
 
-    void OptimizeRetainCall(Function &F, Instruction *Retain);
     bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
     void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
                                    InstructionClass &Class);
@@ -1059,6 +1098,10 @@ namespace {
 
     void OptimizeReturns(Function &F);
 
+#ifndef NDEBUG
+    void GatherStatistics(Function &F, bool AfterOptimization = false);
+#endif
+
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
     virtual bool doInitialization(Module &M);
     virtual bool runOnFunction(Function &F);
@@ -1106,22 +1149,6 @@ bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
   return true;
 }
 
-Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
-  if (!RetainRVCallee) {
-    LLVMContext &C = M->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *Params[] = { I8X };
-    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttributeSet Attribute =
-      AttributeSet().addAttribute(M->getContext(), AttributeSet::FunctionIndex,
-                                  Attribute::NoUnwind);
-    RetainRVCallee =
-      M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
-                             Attribute);
-  }
-  return RetainRVCallee;
-}
-
 Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
   if (!AutoreleaseRVCallee) {
     LLVMContext &C = M->getContext();
@@ -1201,38 +1228,6 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
   return AutoreleaseCallee;
 }
 
-/// Turn objc_retain into objc_retainAutoreleasedReturnValue if the operand is a
-/// return value.
-void
-ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
-  ImmutableCallSite CS(GetObjCArg(Retain));
-  const Instruction *Call = CS.getInstruction();
-  if (!Call) return;
-  if (Call->getParent() != Retain->getParent()) return;
-
-  // Check that the call is next to the retain.
-  BasicBlock::const_iterator I = Call;
-  ++I;
-  while (IsNoopInstruction(I)) ++I;
-  if (&*I != Retain)
-    return;
-
-  // Turn it to an objc_retainAutoreleasedReturnValue..
-  Changed = true;
-  ++NumPeeps;
-
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainCall: Transforming "
-                  "objc_retain => objc_retainAutoreleasedReturnValue"
-                  " since the operand is a return value.\n"
-                  "                                Old: "
-               << *Retain << "\n");
-
-  cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
-
-  DEBUG(dbgs() << "                                New: "
-               << *Retain << "\n");
-}
-
 /// Turn objc_retainAutoreleasedReturnValue into objc_retain if the operand is
 /// not a return value.  Or, if it can be paired with an
 /// objc_autoreleaseReturnValue, delete the pair and return true.
@@ -1269,9 +1264,8 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
       Changed = true;
       ++NumPeeps;
 
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Erasing " << *I << "\n"
-                   << "                                  Erasing " << *RetainRV
-                   << "\n");
+      DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n"
+                   << "Erasing " << *RetainRV << "\n");
 
       EraseInstruction(I);
       EraseInstruction(RetainRV);
@@ -1283,16 +1277,13 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
   Changed = true;
   ++NumPeeps;
 
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeRetainRVCall: Transforming "
-                  "objc_retainAutoreleasedReturnValue => "
+  DEBUG(dbgs() << "Transforming objc_retainAutoreleasedReturnValue => "
                   "objc_retain since the operand is not a return value.\n"
-                  "                                  Old: "
-               << *RetainRV << "\n");
+                  "Old = " << *RetainRV << "\n");
 
   cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
 
-  DEBUG(dbgs() << "                                  New: "
-               << *RetainRV << "\n");
+  DEBUG(dbgs() << "New = " << *RetainRV << "\n");
 
   return false;
 }
@@ -1321,12 +1312,10 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
   Changed = true;
   ++NumPeeps;
 
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeAutoreleaseRVCall: Transforming "
-                  "objc_autoreleaseReturnValue => "
+  DEBUG(dbgs() << "Transforming objc_autoreleaseReturnValue => "
                   "objc_autorelease since its operand is not used as a return "
                   "value.\n"
-                  "                                       Old: "
-               << *AutoreleaseRV << "\n");
+                  "Old = " << *AutoreleaseRV << "\n");
 
   CallInst *AutoreleaseRVCI = cast<CallInst>(AutoreleaseRV);
   AutoreleaseRVCI->
@@ -1334,8 +1323,7 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV,
   AutoreleaseRVCI->setTailCall(false); // Never tail call objc_autorelease.
   Class = IC_Autorelease;
 
-  DEBUG(dbgs() << "                                       New: "
-               << *AutoreleaseRV << "\n");
+  DEBUG(dbgs() << "New: " << *AutoreleaseRV << "\n");
 
 }
 
@@ -1359,18 +1347,24 @@ ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst,
   if (!IsRetainBlockOptimizable(Inst))
     return false;
 
+  Changed = true;
+  ++NumPeeps;
+
+  DEBUG(dbgs() << "Strength reduced retainBlock => retain.\n");
+  DEBUG(dbgs() << "Old: " << *Inst << "\n");
   CallInst *RetainBlock = cast<CallInst>(Inst);
   RetainBlock->setCalledFunction(getRetainCallee(F.getParent()));
   // Remove copy_on_escape metadata.
   RetainBlock->setMetadata(CopyOnEscapeMDKind, 0);
   Class = IC_Retain;
-
+  DEBUG(dbgs() << "New: " << *Inst << "\n");
   return true;
 }
 
 /// Visit each call, one at a time, and make simplifications without doing any
 /// additional analysis.
 void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+  DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeIndividualCalls ==\n");
   // Reset all the flags in preparation for recomputing them.
   UsedInThisFunction = 0;
 
@@ -1380,8 +1374,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
 
     InstructionClass Class = GetBasicInstructionClass(Inst);
 
-    DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Visiting: Class: "
-          << Class << "; " << *Inst << "\n");
+    DEBUG(dbgs() << "Visiting: Class: " << Class << "; " << *Inst << "\n");
 
     switch (Class) {
     default: break;
@@ -1397,8 +1390,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     case IC_NoopCast:
       Changed = true;
       ++NumNoops;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Erasing no-op cast:"
-                   " " << *Inst << "\n");
+      DEBUG(dbgs() << "Erasing no-op cast: " << *Inst << "\n");
       EraseInstruction(Inst);
       continue;
 
@@ -1416,11 +1408,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
                       Constant::getNullValue(Ty),
                       CI);
         llvm::Value *NewValue = UndefValue::get(CI->getType());
-        DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
-                        "pointer-to-weak-pointer is undefined behavior.\n"
-                        "                                     Old = " << *CI <<
-                        "\n                                     New = " <<
-                        *NewValue << "\n");
+        DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+                       "\nOld = " << *CI << "\nNew = " << *NewValue << "\n");
         CI->replaceAllUsesWith(NewValue);
         CI->eraseFromParent();
         continue;
@@ -1439,11 +1428,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
                       CI);
 
         llvm::Value *NewValue = UndefValue::get(CI->getType());
-        DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: A null "
-                        "pointer-to-weak-pointer is undefined behavior.\n"
-                        "                                     Old = " << *CI <<
-                        "\n                                     New = " <<
-                        *NewValue << "\n");
+        DEBUG(dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
+                        "\nOld = " << *CI << "\nNew = " << *NewValue << "\n");
 
         CI->replaceAllUsesWith(NewValue);
         CI->eraseFromParent();
@@ -1452,13 +1438,13 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       break;
     }
     case IC_RetainBlock:
-      // If we strength reduce an objc_retainBlock to amn objc_retain, continue
+      // If we strength reduce an objc_retainBlock to an objc_retain, continue
       // onto the objc_retain peephole optimizations. Otherwise break.
       if (!OptimizeRetainBlockCall(F, Inst, Class))
         break;
       // FALLTHROUGH
     case IC_Retain:
-      OptimizeRetainCall(F, Inst);
+      ++NumRetainsBeforeOpt;
       break;
     case IC_RetainRV:
       if (OptimizeRetainRVCall(F, Inst))
@@ -1467,6 +1453,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     case IC_AutoreleaseRV:
       OptimizeAutoreleaseRVCall(F, Inst, Class);
       break;
+    case IC_Release:
+      ++NumReleasesBeforeOpt;
+      break;
     }
 
     // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
@@ -1483,15 +1472,11 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
         CallInst *NewCall =
           CallInst::Create(getReleaseCallee(F.getParent()),
                            Call->getArgOperand(0), "", Call);
-        NewCall->setMetadata(ImpreciseReleaseMDKind,
-                             MDNode::get(C, ArrayRef<Value *>()));
+        NewCall->setMetadata(ImpreciseReleaseMDKind, MDNode::get(C, None));
 
-        DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Replacing "
-                        "objc_autorelease(x) with objc_release(x) since x is "
-                        "otherwise unused.\n"
-                        "                                     Old: " << *Call <<
-                        "\n                                     New: " <<
-                        *NewCall << "\n");
+        DEBUG(dbgs() << "Replacing autorelease{,RV}(x) with objc_release(x) "
+              "since x is otherwise unused.\nOld: " << *Call << "\nNew: "
+              << *NewCall << "\n");
 
         EraseInstruction(Call);
         Inst = NewCall;
@@ -1503,9 +1488,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     // a tail keyword.
     if (IsAlwaysTail(Class)) {
       Changed = true;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Adding tail keyword"
-            " to function since it can never be passed stack args: " << *Inst <<
-            "\n");
+      DEBUG(dbgs() << "Adding tail keyword to function since it can never be "
+                      "passed stack args: " << *Inst << "\n");
       cast<CallInst>(Inst)->setTailCall();
     }
 
@@ -1513,8 +1497,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     // semantics of ARC truly do not do so.
     if (IsNeverTail(Class)) {
       Changed = true;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Removing tail "
-            "keyword from function: " << *Inst <<
+      DEBUG(dbgs() << "Removing tail keyword from function: " << *Inst <<
             "\n");
       cast<CallInst>(Inst)->setTailCall(false);
     }
@@ -1522,8 +1505,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     // Set nounwind as needed.
     if (IsNoThrow(Class)) {
       Changed = true;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Found no throw"
-            " class. Setting nounwind on: " << *Inst << "\n");
+      DEBUG(dbgs() << "Found no throw class. Setting nounwind on: " << *Inst
+                   << "\n");
       cast<CallInst>(Inst)->setDoesNotThrow();
     }
 
@@ -1538,8 +1521,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     if (IsNullOrUndef(Arg)) {
       Changed = true;
       ++NumNoops;
-      DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with "
-            " null are no-ops. Erasing: " << *Inst << "\n");
+      DEBUG(dbgs() << "ARC calls with  null are no-ops. Erasing: " << *Inst
+            << "\n");
       EraseInstruction(Inst);
       continue;
     }
@@ -1633,10 +1616,9 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
               Clone->setArgOperand(0, Op);
               Clone->insertBefore(InsertPos);
 
-              DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Cloning "
+              DEBUG(dbgs() << "Cloning "
                            << *CInst << "\n"
-                           "                                     And inserting "
-                           "clone at " << *InsertPos << "\n");
+                           "And inserting clone at " << *InsertPos << "\n");
               Worklist.push_back(std::make_pair(Clone, Incoming));
             }
           }
@@ -1648,7 +1630,65 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       }
     } while (!Worklist.empty());
   }
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: Finished List.\n");
+}
+
+/// If we have a top down pointer in the S_Use state, make sure that there are
+/// no CFG hazards by checking the states of various bottom up pointers.
+static void CheckForUseCFGHazard(const Sequence SuccSSeq,
+                                 const bool SuccSRRIKnownSafe,
+                                 PtrState &S,
+                                 bool &SomeSuccHasSame,
+                                 bool &AllSuccsHaveSame,
+                                 bool &ShouldContinue) {
+  switch (SuccSSeq) {
+  case S_CanRelease: {
+    if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
+      S.ClearSequenceProgress();
+      break;
+    }
+    ShouldContinue = true;
+    break;
+  }
+  case S_Use:
+    SomeSuccHasSame = true;
+    break;
+  case S_Stop:
+  case S_Release:
+  case S_MovableRelease:
+    if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+      AllSuccsHaveSame = false;
+    break;
+  case S_Retain:
+    llvm_unreachable("bottom-up pointer in retain state!");
+  case S_None:
+    llvm_unreachable("This should have been handled earlier.");
+  }
+}
+
+/// If we have a Top Down pointer in the S_CanRelease state, make sure that
+/// there are no CFG hazards by checking the states of various bottom up
+/// pointers.
+static void CheckForCanReleaseCFGHazard(const Sequence SuccSSeq,
+                                        const bool SuccSRRIKnownSafe,
+                                        PtrState &S,
+                                        bool &SomeSuccHasSame,
+                                        bool &AllSuccsHaveSame) {
+  switch (SuccSSeq) {
+  case S_CanRelease:
+    SomeSuccHasSame = true;
+    break;
+  case S_Stop:
+  case S_Release:
+  case S_MovableRelease:
+  case S_Use:
+    if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
+      AllSuccsHaveSame = false;
+    break;
+  case S_Retain:
+    llvm_unreachable("bottom-up pointer in retain state!");
+  case S_None:
+    llvm_unreachable("This should have been handled earlier.");
+  }
 }
 
 /// Check for critical edges, loop boundaries, irreducible control flow, or
@@ -1661,106 +1701,82 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
   // If any top-down local-use or possible-dec has a succ which is earlier in
   // the sequence, forget it.
   for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
-       E = MyStates.top_down_ptr_end(); I != E; ++I)
-    switch (I->second.GetSeq()) {
-    default: break;
-    case S_Use: {
-      const Value *Arg = I->first;
-      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-      bool SomeSuccHasSame = false;
-      bool AllSuccsHaveSame = true;
-      PtrState &S = I->second;
-      succ_const_iterator SI(TI), SE(TI, false);
-
-      for (; SI != SE; ++SI) {
-        Sequence SuccSSeq = S_None;
-        bool SuccSRRIKnownSafe = false;
-        // If VisitBottomUp has pointer information for this successor, take
-        // what we know about it.
-        DenseMap<const BasicBlock *, BBState>::iterator BBI =
-          BBStates.find(*SI);
-        assert(BBI != BBStates.end());
-        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
-        SuccSSeq = SuccS.GetSeq();
-        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
-        switch (SuccSSeq) {
-        case S_None:
-        case S_CanRelease: {
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
-            S.ClearSequenceProgress();
-            break;
-          }
-          continue;
-        }
-        case S_Use:
-          SomeSuccHasSame = true;
-          break;
-        case S_Stop:
-        case S_Release:
-        case S_MovableRelease:
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
-            AllSuccsHaveSame = false;
-          break;
-        case S_Retain:
-          llvm_unreachable("bottom-up pointer in retain state!");
-        }
-      }
-      // If the state at the other end of any of the successor edges
-      // matches the current state, require all edges to match. This
-      // guards against loops in the middle of a sequence.
-      if (SomeSuccHasSame && !AllSuccsHaveSame)
+         E = MyStates.top_down_ptr_end(); I != E; ++I) {
+    PtrState &S = I->second;
+    const Sequence Seq = I->second.GetSeq();
+
+    // We only care about S_Retain, S_CanRelease, and S_Use.
+    if (Seq == S_None)
+      continue;
+
+    // Make sure that if extra top down states are added in the future that this
+    // code is updated to handle it.
+    assert((Seq == S_Retain || Seq == S_CanRelease || Seq == S_Use) &&
+           "Unknown top down sequence state.");
+
+    const Value *Arg = I->first;
+    const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+    bool SomeSuccHasSame = false;
+    bool AllSuccsHaveSame = true;
+
+    succ_const_iterator SI(TI), SE(TI, false);
+
+    for (; SI != SE; ++SI) {
+      // If VisitBottomUp has pointer information for this successor, take
+      // what we know about it.
+      const DenseMap<const BasicBlock *, BBState>::iterator BBI =
+        BBStates.find(*SI);
+      assert(BBI != BBStates.end());
+      const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+      const Sequence SuccSSeq = SuccS.GetSeq();
+
+      // If bottom up, the pointer is in an S_None state, clear the sequence
+      // progress since the sequence in the bottom up state finished
+      // suggesting a mismatch in between retains/releases. This is true for
+      // all three cases that we are handling here: S_Retain, S_Use, and
+      // S_CanRelease.
+      if (SuccSSeq == S_None) {
         S.ClearSequenceProgress();
-      break;
-    }
-    case S_CanRelease: {
-      const Value *Arg = I->first;
-      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-      bool SomeSuccHasSame = false;
-      bool AllSuccsHaveSame = true;
-      PtrState &S = I->second;
-      succ_const_iterator SI(TI), SE(TI, false);
-
-      for (; SI != SE; ++SI) {
-        Sequence SuccSSeq = S_None;
-        bool SuccSRRIKnownSafe = false;
-        // If VisitBottomUp has pointer information for this successor, take
-        // what we know about it.
-        DenseMap<const BasicBlock *, BBState>::iterator BBI =
-          BBStates.find(*SI);
-        assert(BBI != BBStates.end());
-        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
-        SuccSSeq = SuccS.GetSeq();
-        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
-        switch (SuccSSeq) {
-        case S_None: {
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
-            S.ClearSequenceProgress();
-            break;
-          }
+        continue;
+      }
+
+      // If we have S_Use or S_CanRelease, perform our check for cfg hazard
+      // checks.
+      const bool SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+
+      // *NOTE* We do not use Seq from above here since we are allowing for
+      // S.GetSeq() to change while we are visiting basic blocks.
+      switch(S.GetSeq()) {
+      case S_Use: {
+        bool ShouldContinue = false;
+        CheckForUseCFGHazard(SuccSSeq, SuccSRRIKnownSafe, S,
+                             SomeSuccHasSame, AllSuccsHaveSame,
+                             ShouldContinue);
+        if (ShouldContinue)
           continue;
-        }
-        case S_CanRelease:
-          SomeSuccHasSame = true;
-          break;
-        case S_Stop:
-        case S_Release:
-        case S_MovableRelease:
-        case S_Use:
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
-            AllSuccsHaveSame = false;
-          break;
-        case S_Retain:
-          llvm_unreachable("bottom-up pointer in retain state!");
-        }
+        break;
+      }
+      case S_CanRelease: {
+        CheckForCanReleaseCFGHazard(SuccSSeq, SuccSRRIKnownSafe,
+                                    S, SomeSuccHasSame,
+                                    AllSuccsHaveSame);
+        break;
+      }
+      case S_Retain:
+      case S_None:
+      case S_Stop:
+      case S_Release:
+      case S_MovableRelease:
+        break;
       }
-      // If the state at the other end of any of the successor edges
-      // matches the current state, require all edges to match. This
-      // guards against loops in the middle of a sequence.
-      if (SomeSuccHasSame && !AllSuccsHaveSame)
-        S.ClearSequenceProgress();
-      break;
-    }
     }
+
+    // If the state at the other end of any of the successor edges
+    // matches the current state, require all edges to match. This
+    // guards against loops in the middle of a sequence.
+    if (SomeSuccHasSame && !AllSuccsHaveSame)
+      S.ClearSequenceProgress();
+  }
 }
 
 bool
@@ -1772,6 +1788,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
   InstructionClass Class = GetInstructionClass(Inst);
   const Value *Arg = 0;
 
+  DEBUG(dbgs() << "Class: " << Class << "\n");
+
   switch (Class) {
   case IC_Release: {
     Arg = GetObjCArg(Inst);
@@ -1786,8 +1804,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
     // pairs by making PtrState hold a stack of states, but this is
     // simple and avoids adding overhead for the non-nested case.
     if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) {
-      DEBUG(dbgs() << "ObjCARCOpt::VisitInstructionBottomUp: Found nested "
-                      "releases (i.e. a release pair)\n");
+      DEBUG(dbgs() << "Found nested releases (i.e. a release pair)\n");
       NestingDetected = true;
     }
 
@@ -1820,7 +1837,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
     case S_Release:
     case S_MovableRelease:
     case S_Use:
-      S.RRI.ReverseInsertPts.clear();
+      // If OldSeq is not S_Use or OldSeq is S_Use and we are tracking an
+      // imprecise release, clear our reverse insertion points.
+      if (OldSeq != S_Use || S.RRI.IsTrackingImpreciseReleases())
+        S.RRI.ReverseInsertPts.clear();
       // FALL THROUGH
     case S_CanRelease:
       // Don't do retain+release tracking for IC_RetainRV, because it's
@@ -1835,7 +1855,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
       llvm_unreachable("bottom-up pointer in retain state!");
     }
     ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq());
-    return NestingDetected;
+    // A retain moving bottom up can be a use.
+    break;
   }
   case IC_AutoreleasepoolPop:
     // Conservatively, clear MyStates for all known pointers.
@@ -1861,6 +1882,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
 
     // Check for possible releases.
     if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+      DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
+            << "\n");
       S.ClearKnownPositiveRefCount();
       switch (Seq) {
       case S_Use:
@@ -1883,6 +1906,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
     case S_Release:
     case S_MovableRelease:
       if (CanUse(Inst, Ptr, PA, Class)) {
+        DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
+              << "\n");
         assert(S.RRI.ReverseInsertPts.empty());
         // If this is an invoke instruction, we're scanning it as part of
         // one of its successor blocks, since we can't insert code after it
@@ -1894,6 +1919,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
         S.SetSeq(S_Use);
         ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
       } else if (Seq == S_Release && IsUser(Class)) {
+        DEBUG(dbgs() << "PreciseReleaseUse: Seq: " << Seq << "; " << *Ptr
+              << "\n");
         // Non-movable releases depend on any possible objc pointer use.
         S.SetSeq(S_Stop);
         ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop);
@@ -1907,6 +1934,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
       break;
     case S_Stop:
       if (CanUse(Inst, Ptr, PA, Class)) {
+        DEBUG(dbgs() << "PreciseStopUse: Seq: " << Seq << "; " << *Ptr
+              << "\n");
         S.SetSeq(S_Use);
         ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use);
       }
@@ -1927,6 +1956,9 @@ bool
 ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
                           DenseMap<const BasicBlock *, BBState> &BBStates,
                           MapVector<Value *, RRInfo> &Retains) {
+
+  DEBUG(dbgs() << "\n== ObjCARCOpt::VisitBottomUp ==\n");
+
   bool NestingDetected = false;
   BBState &MyStates = BBStates[BB];
 
@@ -1960,7 +1992,7 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
     if (isa<InvokeInst>(Inst))
       continue;
 
-    DEBUG(dbgs() << "ObjCARCOpt::VisitButtonUp: Visiting " << *Inst << "\n");
+    DEBUG(dbgs() << "Visiting " << *Inst << "\n");
 
     NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
   }
@@ -2033,13 +2065,18 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
     PtrState &S = MyStates.getPtrTopDownState(Arg);
     S.ClearKnownPositiveRefCount();
 
-    switch (S.GetSeq()) {
+    Sequence OldSeq = S.GetSeq();
+
+    MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+
+    switch (OldSeq) {
     case S_Retain:
     case S_CanRelease:
-      S.RRI.ReverseInsertPts.clear();
+      if (OldSeq == S_Retain || ReleaseMetadata != 0)
+        S.RRI.ReverseInsertPts.clear();
       // FALL THROUGH
     case S_Use:
-      S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+      S.RRI.ReleaseMetadata = ReleaseMetadata;
       S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
       Releases[Inst] = S.RRI;
       ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None);
@@ -2078,6 +2115,8 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
 
     // Check for possible releases.
     if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+      DEBUG(dbgs() << "CanAlterRefCount: Seq: " << Seq << "; " << *Ptr
+            << "\n");
       S.ClearKnownPositiveRefCount();
       switch (Seq) {
       case S_Retain:
@@ -2105,6 +2144,8 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
     switch (Seq) {
     case S_CanRelease:
       if (CanUse(Inst, Ptr, PA, Class)) {
+        DEBUG(dbgs() << "CanUse: Seq: " << Seq << "; " << *Ptr
+              << "\n");
         S.SetSeq(S_Use);
         ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use);
       }
@@ -2127,6 +2168,7 @@ bool
 ObjCARCOpt::VisitTopDown(BasicBlock *BB,
                          DenseMap<const BasicBlock *, BBState> &BBStates,
                          DenseMap<Value *, RRInfo> &Releases) {
+  DEBUG(dbgs() << "\n== ObjCARCOpt::VisitTopDown ==\n");
   bool NestingDetected = false;
   BBState &MyStates = BBStates[BB];
 
@@ -2156,7 +2198,7 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
     Instruction *Inst = I;
 
-    DEBUG(dbgs() << "ObjCARCOpt::VisitTopDown: Visiting " << *Inst << "\n");
+    DEBUG(dbgs() << "Visiting " << *Inst << "\n");
 
     NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
   }
@@ -2165,6 +2207,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
   // bottom of the basic block.
   ANNOTATE_TOPDOWN_BBEND(MyStates, BB);
 
+#ifdef ARC_ANNOTATIONS
+  if (!(EnableARCAnnotations && DisableCheckForCFGHazards))
+#endif
   CheckForCFGHazards(BB, BBStates, MyStates);
   return NestingDetected;
 }
@@ -2296,6 +2341,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
   Type *ArgTy = Arg->getType();
   Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
 
+  DEBUG(dbgs() << "== ObjCARCOpt::MoveCalls ==\n");
+
   // Insert the new retain and release calls.
   for (SmallPtrSet<Instruction *, 2>::const_iterator
        PI = ReleasesToMove.ReverseInsertPts.begin(),
@@ -2308,10 +2355,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
     Call->setDoesNotThrow();
     Call->setTailCall();
 
-    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call
-                 << "\n"
-                    "                       At insertion point: " << *InsertPt
-                 << "\n");
+    DEBUG(dbgs() << "Inserting new Retain: " << *Call << "\n"
+                    "At insertion point: " << *InsertPt << "\n");
   }
   for (SmallPtrSet<Instruction *, 2>::const_iterator
        PI = RetainsToMove.ReverseInsertPts.begin(),
@@ -2328,10 +2373,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
     if (ReleasesToMove.IsTailCallRelease)
       Call->setTailCall();
 
-    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Retain: " << *Call
-                 << "\n"
-                    "                       At insertion point: " << *InsertPt
-                 << "\n");
+    DEBUG(dbgs() << "Inserting new Release: " << *Call << "\n"
+                    "At insertion point: " << *InsertPt << "\n");
   }
 
   // Delete the original retain and release calls.
@@ -2341,8 +2384,7 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
     Instruction *OrigRetain = *AI;
     Retains.blot(OrigRetain);
     DeadInsts.push_back(OrigRetain);
-    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting retain: " << *OrigRetain <<
-                    "\n");
+    DEBUG(dbgs() << "Deleting retain: " << *OrigRetain << "\n");
   }
   for (SmallPtrSet<Instruction *, 2>::const_iterator
        AI = ReleasesToMove.Calls.begin(),
@@ -2350,9 +2392,9 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
     Instruction *OrigRelease = *AI;
     Releases.erase(OrigRelease);
     DeadInsts.push_back(OrigRelease);
-    DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Deleting release: " << *OrigRelease
-                 << "\n");
+    DEBUG(dbgs() << "Deleting release: " << *OrigRelease << "\n");
   }
+
 }
 
 bool
@@ -2506,6 +2548,12 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
   if (OldDelta != 0)
     return false;
 
+#ifdef ARC_ANNOTATIONS
+  // Do not move calls if ARC annotations are requested.
+  if (EnableARCAnnotations)
+    return false;
+#endif // ARC_ANNOTATIONS
+
   Changed = true;
   assert(OldCount != 0 && "Unreachable code?");
   NumRRs += OldCount - NewCount;
@@ -2524,6 +2572,8 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
                                  MapVector<Value *, RRInfo> &Retains,
                                  DenseMap<Value *, RRInfo> &Releases,
                                  Module *M) {
+  DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
+
   bool AnyPairsCompletelyEliminated = false;
   RRInfo RetainsToMove;
   RRInfo ReleasesToMove;
@@ -2539,8 +2589,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
 
     Instruction *Retain = cast<Instruction>(V);
 
-    DEBUG(dbgs() << "ObjCARCOpt::PerformCodePlacement: Visiting: " << *Retain
-          << "\n");
+    DEBUG(dbgs() << "Visiting: " << *Retain << "\n");
 
     Value *Arg = GetObjCArg(Retain);
 
@@ -2567,12 +2616,6 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
                             ReleasesToMove, Arg, KnownSafe,
                             AnyPairsCompletelyEliminated);
 
-#ifdef ARC_ANNOTATIONS
-    // Do not move calls if ARC annotations are requested. If we were to move
-    // calls in this case, we would not be able
-    PerformMoveCalls = PerformMoveCalls && !EnableARCAnnotations;
-#endif // ARC_ANNOTATIONS
-
     if (PerformMoveCalls) {
       // Ok, everything checks out and we're all set. Let's move/delete some
       // code!
@@ -2597,14 +2640,15 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
 
 /// Weak pointer optimizations.
 void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+  DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeWeakCalls ==\n");
+
   // First, do memdep-style RLE and S2L optimizations. We can't use memdep
   // itself because it uses AliasAnalysis and we need to do provenance
   // queries instead.
   for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
     Instruction *Inst = &*I++;
 
-    DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Visiting: " << *Inst <<
-          "\n");
+    DEBUG(dbgs() << "Visiting: " << *Inst << "\n");
 
     InstructionClass Class = GetBasicInstructionClass(Inst);
     if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
@@ -2752,9 +2796,6 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
     done:;
     }
   }
-
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeWeakCalls: Finished List.\n\n");
-
 }
 
 /// Identify program paths which execute sequences of retains and releases which
@@ -2820,17 +2861,17 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
                    BB, Autorelease, DepInsts, Visited, PA);
   if (DepInsts.size() != 1)
     return 0;
-  
+
   CallInst *Retain =
     dyn_cast_or_null<CallInst>(*DepInsts.begin());
-  
+
   // Check that we found a retain with the same argument.
   if (!Retain ||
       !IsRetain(GetBasicInstructionClass(Retain)) ||
       GetObjCArg(Retain) != Arg) {
     return 0;
   }
-  
+
   return Retain;
 }
 
@@ -2847,7 +2888,7 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
                    BB, Ret, DepInsts, V, PA);
   if (DepInsts.size() != 1)
     return 0;
-  
+
   CallInst *Autorelease =
     dyn_cast_or_null<CallInst>(*DepInsts.begin());
   if (!Autorelease)
@@ -2857,7 +2898,7 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
     return 0;
   if (GetObjCArg(Autorelease) != Arg)
     return 0;
-  
+
   return Autorelease;
 }
 
@@ -2873,60 +2914,87 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
   if (!F.getReturnType()->isPointerTy())
     return;
 
+  DEBUG(dbgs() << "\n== ObjCARCOpt::OptimizeReturns ==\n");
+
   SmallPtrSet<Instruction *, 4> DependingInstructions;
   SmallPtrSet<const BasicBlock *, 4> Visited;
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
     BasicBlock *BB = FI;
     ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
 
-    DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Visiting: " << *Ret << "\n");
+    DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
 
     if (!Ret)
       continue;
-    
+
     const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
-    
-    // Look for an ``autorelease'' instruction that is a predecssor of Ret and
+
+    // Look for an ``autorelease'' instruction that is a predecessor of Ret and
     // dependent on Arg such that there are no instructions dependent on Arg
     // that need a positive ref count in between the autorelease and Ret.
     CallInst *Autorelease =
       FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret,
                                              DependingInstructions, Visited,
                                              PA);
-    if (Autorelease) {
-      DependingInstructions.clear();
-      Visited.clear();
-      
-      CallInst *Retain =
-        FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
-                                          DependingInstructions, Visited, PA);
-      if (Retain) {
-        DependingInstructions.clear();
-        Visited.clear();
-        
-        // Check that there is nothing that can affect the reference count
-        // between the retain and the call.  Note that Retain need not be in BB.
-        if (HasSafePathToPredecessorCall(Arg, Retain, DependingInstructions,
-                                         Visited, PA)) {
-          // If so, we can zap the retain and autorelease.
-          Changed = true;
-          ++NumRets;
-          DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Erasing: " << *Retain
-                       << "\n                             Erasing: "
-                       << *Autorelease << "\n");
-          EraseInstruction(Retain);
-          EraseInstruction(Autorelease);
-        }
-      }
-    }
-    
     DependingInstructions.clear();
     Visited.clear();
+
+    if (!Autorelease)
+      continue;
+
+    CallInst *Retain =
+      FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
+                                        DependingInstructions, Visited, PA);
+    DependingInstructions.clear();
+    Visited.clear();
+
+    if (!Retain)
+      continue;
+
+    // Check that there is nothing that can affect the reference count
+    // between the retain and the call.  Note that Retain need not be in BB.
+    bool HasSafePathToCall = HasSafePathToPredecessorCall(Arg, Retain,
+                                                          DependingInstructions,
+                                                          Visited, PA);
+    DependingInstructions.clear();
+    Visited.clear();
+
+    if (!HasSafePathToCall)
+      continue;
+
+    // If so, we can zap the retain and autorelease.
+    Changed = true;
+    ++NumRets;
+    DEBUG(dbgs() << "Erasing: " << *Retain << "\nErasing: "
+          << *Autorelease << "\n");
+    EraseInstruction(Retain);
+    EraseInstruction(Autorelease);
   }
+}
 
-  DEBUG(dbgs() << "ObjCARCOpt::OptimizeReturns: Finished List.\n\n");
+#ifndef NDEBUG
+void
+ObjCARCOpt::GatherStatistics(Function &F, bool AfterOptimization) {
+  llvm::Statistic &NumRetains =
+    AfterOptimization? NumRetainsAfterOpt : NumRetainsBeforeOpt;
+  llvm::Statistic &NumReleases =
+    AfterOptimization? NumReleasesAfterOpt : NumReleasesBeforeOpt;
 
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+    switch (GetBasicInstructionClass(Inst)) {
+    default:
+      break;
+    case IC_Retain:
+      ++NumRetains;
+      break;
+    case IC_Release:
+      ++NumReleases;
+      break;
+    }
+  }
 }
+#endif
 
 bool ObjCARCOpt::doInitialization(Module &M) {
   if (!EnableARCOpts)
@@ -2958,7 +3026,6 @@ bool ObjCARCOpt::doInitialization(Module &M) {
   // calls finalizers which can have arbitrary side effects.
 
   // These are initialized lazily.
-  RetainRVCallee = 0;
   AutoreleaseRVCallee = 0;
   ReleaseCallee = 0;
   RetainCallee = 0;
@@ -2978,7 +3045,8 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
 
   Changed = false;
 
-  DEBUG(dbgs() << "ObjCARCOpt: Visiting Function: " << F.getName() << "\n");
+  DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() << " >>>"
+        "\n");
 
   PA.setAA(&getAnalysis<AliasAnalysis>());
 
@@ -2986,7 +3054,7 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
   // when compiling code that isn't ObjC, skip these if the relevant ObjC
   // library functions aren't declared.
 
-  // Preliminary optimizations. This also computs UsedInThisFunction.
+  // Preliminary optimizations. This also computes UsedInThisFunction.
   OptimizeIndividualCalls(F);
 
   // Optimizations for weak pointers.
@@ -3013,6 +3081,13 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
                             (1 << IC_AutoreleaseRV)))
     OptimizeReturns(F);
 
+  // Gather statistics after optimization.
+#ifndef NDEBUG
+  if (AreStatisticsEnabled()) {
+    GatherStatistics(F, true);
+  }
+#endif
+
   DEBUG(dbgs() << "\n");
 
   return Changed;
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 015fd2e..f0d29c8 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
 #include "llvm/Analysis/DominatorInternals.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -88,7 +89,7 @@ namespace {
     /// Keeps track of non-local addresses that have been sunk into a block.
     /// This allows us to avoid inserting duplicate code for blocks with
     /// multiple load/stores of the same address.
-    DenseMap<Value*, Value*> SunkAddrs;
+    ValueMap<Value*, Value*> SunkAddrs;
 
     /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
     /// be updated.
@@ -1653,10 +1654,6 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       // start of the block.
       CurInstIterator = BB->begin();
       SunkAddrs.clear();
-    } else {
-      // This address is now available for reassignment, so erase the table
-      // entry; we don't want to match some completely different instruction.
-      SunkAddrs[Addr] = 0;
     }
   }
   ++NumMemoryInsts;
@@ -1761,7 +1758,7 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
   if (!DefIsLiveOut)
     return false;
 
-  // Make sure non of the uses are PHI nodes.
+  // Make sure none of the uses are PHI nodes.
   for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
        UI != E; ++UI) {
     Instruction *User = cast<Instruction>(*UI);
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 129af8d..f350b9b 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -498,6 +498,75 @@ void ValueTable::verifyRemoved(const Value *V) const {
 //===----------------------------------------------------------------------===//
 
 namespace {
+  class GVN;
+  struct AvailableValueInBlock {
+    /// BB - The basic block in question.
+    BasicBlock *BB;
+    enum ValType {
+      SimpleVal,  // A simple offsetted value that is accessed.
+      LoadVal,    // A value produced by a load.
+      MemIntrin   // A memory intrinsic which is loaded from.
+    };
+  
+    /// V - The value that is live out of the block.
+    PointerIntPair<Value *, 2, ValType> Val;
+  
+    /// Offset - The byte offset in Val that is interesting for the load query.
+    unsigned Offset;
+  
+    static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+                                     unsigned Offset = 0) {
+      AvailableValueInBlock Res;
+      Res.BB = BB;
+      Res.Val.setPointer(V);
+      Res.Val.setInt(SimpleVal);
+      Res.Offset = Offset;
+      return Res;
+    }
+  
+    static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+                                       unsigned Offset = 0) {
+      AvailableValueInBlock Res;
+      Res.BB = BB;
+      Res.Val.setPointer(MI);
+      Res.Val.setInt(MemIntrin);
+      Res.Offset = Offset;
+      return Res;
+    }
+  
+    static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
+                                         unsigned Offset = 0) {
+      AvailableValueInBlock Res;
+      Res.BB = BB;
+      Res.Val.setPointer(LI);
+      Res.Val.setInt(LoadVal);
+      Res.Offset = Offset;
+      return Res;
+    }
+  
+    bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+    bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
+    bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
+  
+    Value *getSimpleValue() const {
+      assert(isSimpleValue() && "Wrong accessor");
+      return Val.getPointer();
+    }
+  
+    LoadInst *getCoercedLoadValue() const {
+      assert(isCoercedLoadValue() && "Wrong accessor");
+      return cast<LoadInst>(Val.getPointer());
+    }
+  
+    MemIntrinsic *getMemIntrinValue() const {
+      assert(isMemIntrinValue() && "Wrong accessor");
+      return cast<MemIntrinsic>(Val.getPointer());
+    }
+  
+    /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+    /// defined here to the specified type.  This handles various coercion cases.
+    Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const;
+  };
 
   class GVN : public FunctionPass {
     bool NoLoads;
@@ -519,6 +588,11 @@ namespace {
     BumpPtrAllocator TableAllocator;
 
     SmallVector<Instruction*, 8> InstrsToErase;
+
+    typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
+    typedef SmallVector<AvailableValueInBlock, 64> AvailValInBlkVect;
+    typedef SmallVector<BasicBlock*, 64> UnavailBlkVect;
+
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit GVN(bool noloads = false)
@@ -599,11 +673,17 @@ namespace {
     }
 
 
-    // Helper fuctions
-    // FIXME: eliminate or document these better
+    // Helper fuctions of redundant load elimination 
     bool processLoad(LoadInst *L);
-    bool processInstruction(Instruction *I);
     bool processNonLocalLoad(LoadInst *L);
+    void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 
+                                 AvailValInBlkVect &ValuesPerBlock,
+                                 UnavailBlkVect &UnavailableBlocks);
+    bool PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, 
+                        UnavailBlkVect &UnavailableBlocks);
+
+    // Other helper routines
+    bool processInstruction(Instruction *I);
     bool processBlock(BasicBlock *BB);
     void dump(DenseMap<uint32_t, Value*> &d);
     bool iterateOnFunction(Function &F);
@@ -1159,114 +1239,6 @@ static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
   return ConstantFoldLoadFromConstPtr(Src, &TD);
 }
 
-namespace {
-
-struct AvailableValueInBlock {
-  /// BB - The basic block in question.
-  BasicBlock *BB;
-  enum ValType {
-    SimpleVal,  // A simple offsetted value that is accessed.
-    LoadVal,    // A value produced by a load.
-    MemIntrin   // A memory intrinsic which is loaded from.
-  };
-
-  /// V - The value that is live out of the block.
-  PointerIntPair<Value *, 2, ValType> Val;
-
-  /// Offset - The byte offset in Val that is interesting for the load query.
-  unsigned Offset;
-
-  static AvailableValueInBlock get(BasicBlock *BB, Value *V,
-                                   unsigned Offset = 0) {
-    AvailableValueInBlock Res;
-    Res.BB = BB;
-    Res.Val.setPointer(V);
-    Res.Val.setInt(SimpleVal);
-    Res.Offset = Offset;
-    return Res;
-  }
-
-  static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
-                                     unsigned Offset = 0) {
-    AvailableValueInBlock Res;
-    Res.BB = BB;
-    Res.Val.setPointer(MI);
-    Res.Val.setInt(MemIntrin);
-    Res.Offset = Offset;
-    return Res;
-  }
-
-  static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI,
-                                       unsigned Offset = 0) {
-    AvailableValueInBlock Res;
-    Res.BB = BB;
-    Res.Val.setPointer(LI);
-    Res.Val.setInt(LoadVal);
-    Res.Offset = Offset;
-    return Res;
-  }
-
-  bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
-  bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; }
-  bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; }
-
-  Value *getSimpleValue() const {
-    assert(isSimpleValue() && "Wrong accessor");
-    return Val.getPointer();
-  }
-
-  LoadInst *getCoercedLoadValue() const {
-    assert(isCoercedLoadValue() && "Wrong accessor");
-    return cast<LoadInst>(Val.getPointer());
-  }
-
-  MemIntrinsic *getMemIntrinValue() const {
-    assert(isMemIntrinValue() && "Wrong accessor");
-    return cast<MemIntrinsic>(Val.getPointer());
-  }
-
-  /// MaterializeAdjustedValue - Emit code into this block to adjust the value
-  /// defined here to the specified type.  This handles various coercion cases.
-  Value *MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
-    Value *Res;
-    if (isSimpleValue()) {
-      Res = getSimpleValue();
-      if (Res->getType() != LoadTy) {
-        const DataLayout *TD = gvn.getDataLayout();
-        assert(TD && "Need target data to handle type mismatch case");
-        Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
-                                   *TD);
-
-        DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
-                     << *getSimpleValue() << '\n'
-                     << *Res << '\n' << "\n\n\n");
-      }
-    } else if (isCoercedLoadValue()) {
-      LoadInst *Load = getCoercedLoadValue();
-      if (Load->getType() == LoadTy && Offset == 0) {
-        Res = Load;
-      } else {
-        Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
-                                  gvn);
-
-        DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
-                     << *getCoercedLoadValue() << '\n'
-                     << *Res << '\n' << "\n\n\n");
-      }
-    } else {
-      const DataLayout *TD = gvn.getDataLayout();
-      assert(TD && "Need target data to handle type mismatch case");
-      Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
-                                   LoadTy, BB->getTerminator(), *TD);
-      DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
-                   << "  " << *getMemIntrinValue() << '\n'
-                   << *Res << '\n' << "\n\n\n");
-    }
-    return Res;
-  }
-};
-
-} // end anonymous namespace
 
 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
 /// construct SSA form, allowing us to eliminate LI.  This returns the value
@@ -1323,48 +1295,59 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   return V;
 }
 
+Value *AvailableValueInBlock::MaterializeAdjustedValue(Type *LoadTy, GVN &gvn) const {
+  Value *Res;
+  if (isSimpleValue()) {
+    Res = getSimpleValue();
+    if (Res->getType() != LoadTy) {
+      const DataLayout *TD = gvn.getDataLayout();
+      assert(TD && "Need target data to handle type mismatch case");
+      Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+                                 *TD);
+  
+      DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
+                   << *getSimpleValue() << '\n'
+                   << *Res << '\n' << "\n\n\n");
+    }
+  } else if (isCoercedLoadValue()) {
+    LoadInst *Load = getCoercedLoadValue();
+    if (Load->getType() == LoadTy && Offset == 0) {
+      Res = Load;
+    } else {
+      Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(),
+                                gvn);
+  
+      DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << "  "
+                   << *getCoercedLoadValue() << '\n'
+                   << *Res << '\n' << "\n\n\n");
+    }
+  } else {
+    const DataLayout *TD = gvn.getDataLayout();
+    assert(TD && "Need target data to handle type mismatch case");
+    Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+                                 LoadTy, BB->getTerminator(), *TD);
+    DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+                 << "  " << *getMemIntrinValue() << '\n'
+                 << *Res << '\n' << "\n\n\n");
+  }
+  return Res;
+}
+
 static bool isLifetimeStart(const Instruction *Inst) {
   if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
     return II->getIntrinsicID() == Intrinsic::lifetime_start;
   return false;
 }
 
-/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
-/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *LI) {
-  // Find the non-local dependencies of the load.
-  SmallVector<NonLocalDepResult, 64> Deps;
-  AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
-  MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
-  //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
-  //             << Deps.size() << *LI << '\n');
-
-  // If we had to process more than one hundred blocks to find the
-  // dependencies, this load isn't worth worrying about.  Optimizing
-  // it will be too expensive.
-  unsigned NumDeps = Deps.size();
-  if (NumDeps > 100)
-    return false;
-
-  // If we had a phi translation failure, we'll have a single entry which is a
-  // clobber in the current block.  Reject this early.
-  if (NumDeps == 1 &&
-      !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
-    DEBUG(
-      dbgs() << "GVN: non-local load ";
-      WriteAsOperand(dbgs(), LI);
-      dbgs() << " has unknown dependencies\n";
-    );
-    return false;
-  }
+void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 
+                                  AvailValInBlkVect &ValuesPerBlock,
+                                  UnavailBlkVect &UnavailableBlocks) {
 
   // Filter out useless results (non-locals, etc).  Keep track of the blocks
   // where we have a value available in repl, also keep track of whether we see
   // dependencies that produce an unknown value for the load (such as a call
   // that could potentially clobber the load).
-  SmallVector<AvailableValueInBlock, 64> ValuesPerBlock;
-  SmallVector<BasicBlock*, 64> UnavailableBlocks;
-
+  unsigned NumDeps = Deps.size();
   for (unsigned i = 0, e = NumDeps; i != e; ++i) {
     BasicBlock *DepBB = Deps[i].getBB();
     MemDepResult DepInfo = Deps[i].getResult();
@@ -1480,35 +1463,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
     }
 
     UnavailableBlocks.push_back(DepBB);
-    continue;
   }
+}
 
-  // If we have no predecessors that produce a known value for this load, exit
-  // early.
-  if (ValuesPerBlock.empty()) return false;
-
-  // If all of the instructions we depend on produce a known value for this
-  // load, then it is fully redundant and we can use PHI insertion to compute
-  // its value.  Insert PHIs and remove the fully redundant value now.
-  if (UnavailableBlocks.empty()) {
-    DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
-
-    // Perform PHI construction.
-    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
-    LI->replaceAllUsesWith(V);
-
-    if (isa<PHINode>(V))
-      V->takeName(LI);
-    if (V->getType()->getScalarType()->isPointerTy())
-      MD->invalidateCachedPointerInfo(V);
-    markInstructionForDeletion(LI);
-    ++NumGVNLoad;
-    return true;
-  }
-
-  if (!EnablePRE || !EnableLoadPRE)
-    return false;
-
+bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, 
+                         UnavailBlkVect &UnavailableBlocks) {
   // Okay, we have *some* definitions of the value.  This means that the value
   // is available in some of our (transitive) predecessors.  Lets think about
   // doing PRE of this load.  This will involve inserting a new load into the
@@ -1526,7 +1485,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
   BasicBlock *LoadBB = LI->getParent();
   BasicBlock *TmpBB = LoadBB;
 
-  bool allSingleSucc = true;
   while (TmpBB->getSinglePredecessor()) {
     TmpBB = TmpBB->getSinglePredecessor();
     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
@@ -1615,13 +1573,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
     // pointer if it is not available.
     PHITransAddr Address(LI->getPointerOperand(), TD);
     Value *LoadPtr = 0;
-    if (allSingleSucc) {
-      LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
-                                                  *DT, NewInsts);
-    } else {
-      Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
-      LoadPtr = Address.getAddr();
-    }
+    LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+                                                *DT, NewInsts);
 
     // If we couldn't find or insert a computation of this phi translated value,
     // we fail PRE.
@@ -1632,24 +1585,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
       break;
     }
 
-    // Make sure it is valid to move this load here.  We have to watch out for:
-    //  @1 = getelementptr (i8* p, ...
-    //  test p and branch if == 0
-    //  load @1
-    // It is valid to have the getelementptr before the test, even if p can
-    // be 0, as getelementptr only does address arithmetic.
-    // If we are not pushing the value through any multiple-successor blocks
-    // we do not have this case.  Otherwise, check that the load is safe to
-    // put anywhere; this can be improved, but should be conservatively safe.
-    if (!allSingleSucc &&
-        // FIXME: REEVALUTE THIS.
-        !isSafeToLoadUnconditionally(LoadPtr,
-                                     UnavailablePred->getTerminator(),
-                                     LI->getAlignment(), TD)) {
-      CanDoPRE = false;
-      break;
-    }
-
     I->second = LoadPtr;
   }
 
@@ -1714,6 +1649,72 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
   return true;
 }
 
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI) {
+  // Step 1: Find the non-local dependencies of the load.
+  LoadDepVect Deps;
+  AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+  MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
+
+  // If we had to process more than one hundred blocks to find the
+  // dependencies, this load isn't worth worrying about.  Optimizing
+  // it will be too expensive.
+  unsigned NumDeps = Deps.size();
+  if (NumDeps > 100)
+    return false;
+
+  // If we had a phi translation failure, we'll have a single entry which is a
+  // clobber in the current block.  Reject this early.
+  if (NumDeps == 1 &&
+      !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
+    DEBUG(
+      dbgs() << "GVN: non-local load ";
+      WriteAsOperand(dbgs(), LI);
+      dbgs() << " has unknown dependencies\n";
+    );
+    return false;
+  }
+
+  // Step 2: Analyze the availability of the load
+  AvailValInBlkVect ValuesPerBlock;
+  UnavailBlkVect UnavailableBlocks;
+  AnalyzeLoadAvailability(LI, Deps, ValuesPerBlock, UnavailableBlocks);
+
+  // If we have no predecessors that produce a known value for this load, exit
+  // early.
+  if (ValuesPerBlock.empty())
+    return false;
+
+  // Step 3: Eliminate fully redundancy.
+  //
+  // If all of the instructions we depend on produce a known value for this
+  // load, then it is fully redundant and we can use PHI insertion to compute
+  // its value.  Insert PHIs and remove the fully redundant value now.
+  if (UnavailableBlocks.empty()) {
+    DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+
+    // Perform PHI construction.
+    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this);
+    LI->replaceAllUsesWith(V);
+
+    if (isa<PHINode>(V))
+      V->takeName(LI);
+    if (V->getType()->getScalarType()->isPointerTy())
+      MD->invalidateCachedPointerInfo(V);
+    markInstructionForDeletion(LI);
+    ++NumGVNLoad;
+    return true;
+  }
+
+  // Step 4: Eliminate partial redundancy.
+  if (!EnablePRE || !EnableLoadPRE)
+    return false;
+
+  return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
+}
+
+
 static void patchReplacementInstruction(Instruction *I, Value *Repl) {
   // Patch the replacement so that it is not more restrictive than the value
   // being replaced.
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 5d02c68..4796eb2 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -200,9 +200,8 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) {
   if (!GV || !GV->hasInitializer()) return;
 
   // Should be an array of 'i8*'.
-  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
-  if (InitList == 0) return;
- 
+  const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
+
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
     if (const GlobalVariable *G =
         dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts()))
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index e98ae95..14c5655 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -56,8 +56,8 @@ namespace {
     }
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
-    void simplifyLoopLatch(Loop *L);
-    bool rotateLoop(Loop *L);
+    bool simplifyLoopLatch(Loop *L);
+    bool rotateLoop(Loop *L, bool SimplifiedLatch);
 
   private:
     LoopInfo *LI;
@@ -84,13 +84,14 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
   // Simplify the loop latch before attempting to rotate the header
   // upward. Rotation may not be needed if the loop tail can be folded into the
   // loop exit.
-  simplifyLoopLatch(L);
+  bool SimplifiedLatch = simplifyLoopLatch(L);
 
   // One loop can be rotated multiple times.
   bool MadeChange = false;
-  while (rotateLoop(L))
+  while (rotateLoop(L, SimplifiedLatch)) {
     MadeChange = true;
-
+    SimplifiedLatch = false;
+  }
   return MadeChange;
 }
 
@@ -212,25 +213,25 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
 /// canonical form so downstream passes can handle it.
 ///
 /// I don't believe this invalidates SCEV.
-void LoopRotate::simplifyLoopLatch(Loop *L) {
+bool LoopRotate::simplifyLoopLatch(Loop *L) {
   BasicBlock *Latch = L->getLoopLatch();
   if (!Latch || Latch->hasAddressTaken())
-    return;
+    return false;
 
   BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
   if (!Jmp || !Jmp->isUnconditional())
-    return;
+    return false;
 
   BasicBlock *LastExit = Latch->getSinglePredecessor();
   if (!LastExit || !L->isLoopExiting(LastExit))
-    return;
+    return false;
 
   BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
   if (!BI)
-    return;
+    return false;
 
   if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
-    return;
+    return false;
 
   DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
         << LastExit->getName() << "\n");
@@ -253,10 +254,20 @@ void LoopRotate::simplifyLoopLatch(Loop *L) {
   if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
     DT->eraseNode(Latch);
   Latch->eraseFromParent();
+  return true;
 }
 
 /// Rotate loop LP. Return true if the loop is rotated.
-bool LoopRotate::rotateLoop(Loop *L) {
+///
+/// \param SimplifiedLatch is true if the latch was just folded into the final
+/// loop exit. In this case we may want to rotate even though the new latch is
+/// now an exiting branch. This rotation would have happened had the latch not
+/// been simplified. However, if SimplifiedLatch is false, then we avoid
+/// rotating loops in which the latch exits to avoid excessive or endless
+/// rotation. LoopRotate should be repeatable and converge to a canonical
+/// form. This property is satisfied because simplifying the loop latch can only
+/// happen once across multiple invocations of the LoopRotate pass.
+bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
   // If the loop has only one block then there is not much to rotate.
   if (L->getBlocks().size() == 1)
     return false;
@@ -276,7 +287,12 @@ bool LoopRotate::rotateLoop(Loop *L) {
 
   // If the loop latch already contains a branch that leaves the loop then the
   // loop is already rotated.
-  if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
+  if (OrigLatch == 0)
+    return false;
+
+  // Rotate if either the loop latch does *not* exit the loop, or if the loop
+  // latch was just simplified.
+  if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)
     return false;
 
   // Check size of original header and reject loop if it is very big or we can't
@@ -505,4 +521,3 @@ bool LoopRotate::rotateLoop(Loop *L) {
   ++NumRotated;
   return true;
 }
-
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 7ee4027..a3c241d 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -143,13 +143,9 @@ namespace {
     //   So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier 
     //   than Y which is defined earlier than Z. Permute "x | 1", "Y & 2",
     //   "z" in the order of X-Y-Z is better than any other orders.
-    class PtrSortFunctor {
-      ArrayRef<XorOpnd> A;
-
-    public:
-      PtrSortFunctor(ArrayRef<XorOpnd> Array) : A(Array) {}
-      bool operator()(unsigned LHSIndex, unsigned RHSIndex) {
-        return A[LHSIndex].getSymbolicRank() < A[RHSIndex].getSymbolicRank();
+    struct PtrSortFunctor {
+      bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) {
+        return LHS->getSymbolicRank() < RHS->getSymbolicRank();
       }
     };
   private:
@@ -1199,9 +1195,6 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
   if (X != Opnd2->getSymbolicPart())
     return false;
 
-  const APInt &C1 = Opnd1->getConstPart();
-  const APInt &C2 = Opnd2->getConstPart();
-
   // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.)
   int DeadInstNum = 1;
   if (Opnd1->getValue()->hasOneUse())
@@ -1219,6 +1212,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
     if (Opnd2->isOrExpr())
       std::swap(Opnd1, Opnd2);
 
+    const APInt &C1 = Opnd1->getConstPart();
+    const APInt &C2 = Opnd2->getConstPart();
     APInt C3((~C1) ^ C2);
 
     // Do not increase code size!
@@ -1234,6 +1229,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
   } else if (Opnd1->isOrExpr()) {
     // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2
     //
+    const APInt &C1 = Opnd1->getConstPart();
+    const APInt &C2 = Opnd2->getConstPart();
     APInt C3 = C1 ^ C2;
     
     // Do not increase code size
@@ -1248,6 +1245,8 @@ bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2,
   } else {
     // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2))
     //
+    const APInt &C1 = Opnd1->getConstPart();
+    const APInt &C2 = Opnd2->getConstPart();
     APInt C3 = C1 ^ C2;
     Res = createAndInstr(I, X, C3);
   }
@@ -1274,7 +1273,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
     return 0;
 
   SmallVector<XorOpnd, 8> Opnds;
-  SmallVector<unsigned, 8> OpndIndices;
+  SmallVector<XorOpnd*, 8> OpndPtrs;
   Type *Ty = Ops[0].Op->getType();
   APInt ConstOpnd(Ty->getIntegerBitWidth(), 0);
 
@@ -1285,23 +1284,29 @@ Value *Reassociate::OptimizeXor(Instruction *I,
       XorOpnd O(V);
       O.setSymbolicRank(getRank(O.getSymbolicPart()));
       Opnds.push_back(O);
-      OpndIndices.push_back(Opnds.size() - 1);
     } else
       ConstOpnd ^= cast<ConstantInt>(V)->getValue();
   }
 
+  // NOTE: From this point on, do *NOT* add/delete element to/from "Opnds".
+  //  It would otherwise invalidate the "Opnds"'s iterator, and hence invalidate
+  //  the "OpndPtrs" as well. For the similar reason, do not fuse this loop
+  //  with the previous loop --- the iterator of the "Opnds" may be invalidated
+  //  when new elements are added to the vector.
+  for (unsigned i = 0, e = Opnds.size(); i != e; ++i)
+    OpndPtrs.push_back(&Opnds[i]);
+
   // Step 2: Sort the Xor-Operands in a way such that the operands containing
   //  the same symbolic value cluster together. For instance, the input operand
   //  sequence ("x | 123", "y & 456", "x & 789") will be sorted into:
   //  ("x | 123", "x & 789", "y & 456").
-  std::sort(OpndIndices.begin(), OpndIndices.end(),
-            XorOpnd::PtrSortFunctor(Opnds));
+  std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
 
   // Step 3: Combine adjacent operands
   XorOpnd *PrevOpnd = 0;
   bool Changed = false;
   for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
-    XorOpnd *CurrOpnd = &Opnds[OpndIndices[i]];
+    XorOpnd *CurrOpnd = OpndPtrs[i];
     // The combined value
     Value *CV;
 
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index f6bb365..d073e78 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2322,17 +2322,15 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
   V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
                               ConstantVector::get(Mask),
                               Name + ".expand");
-  DEBUG(dbgs() << "    shuffle1: " << *V << "\n");
+  DEBUG(dbgs() << "    shuffle: " << *V << "\n");
 
   Mask.clear();
   for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
-    if (i >= BeginIndex && i < EndIndex)
-      Mask.push_back(IRB.getInt32(i));
-    else
-      Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
-  V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask),
-                              Name + "insert");
-  DEBUG(dbgs() << "    shuffle2: " << *V << "\n");
+    Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
+
+  V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend");
+
+  DEBUG(dbgs() << "    blend: " << *V << "\n");
   return V;
 }
 
@@ -2671,6 +2669,7 @@ private:
 
     StoreInst *NewSI;
     if (BeginOffset == NewAllocaBeginOffset &&
+        EndOffset == NewAllocaEndOffset &&
         canConvertValue(TD, V->getType(), NewAllocaTy)) {
       V = convertValue(TD, IRB, V, NewAllocaTy);
       NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
@@ -3050,16 +3049,16 @@ private:
 
   bool visitSelectInst(SelectInst &SI) {
     DEBUG(dbgs() << "    original: " << SI << "\n");
-
-    // Find the operand we need to rewrite here.
-    bool IsTrueVal = SI.getTrueValue() == OldPtr;
-    if (IsTrueVal)
-      assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!");
-    else
-      assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!");
+    assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
+           "Pointer isn't an operand!");
 
     Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType());
-    SI.setOperand(IsTrueVal ? 1 : 2, NewPtr);
+    // Replace the operands which were using the old pointer.
+    if (SI.getOperand(1) == OldPtr)
+      SI.setOperand(1, NewPtr);
+    if (SI.getOperand(2) == OldPtr)
+      SI.setOperand(2, NewPtr);
+
     DEBUG(dbgs() << "          to: " << SI << "\n");
     deleteIfTriviallyDead(OldPtr);
     return false;
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e590a37..bfde334 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1462,8 +1462,8 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
 }
 
 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
-// which runs on all of the alloca instructions in the function, removing them
-// if they are only used by getelementptr instructions.
+// which runs on all of the alloca instructions in the entry block, removing
+// them if they are only used by getelementptr instructions.
 //
 bool SROA::performScalarRepl(Function &F) {
   std::vector<AllocaInst*> WorkList;
@@ -1724,17 +1724,8 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI,
       continue;
 
     ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
-    if (!IdxVal) {
-      // Non constant GEPs are only a problem on arrays, structs, and pointers
-      // Vectors can be dynamically indexed.
-      // FIXME: Add support for dynamic indexing on arrays.  This should be
-      // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0]
-      // isn't.
-      if (!(*GEPIt)->isVectorTy())
-        return MarkUnsafe(Info, GEPI);
-      NonConstant = true;
-      NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt);
-    }
+    if (!IdxVal)
+      return MarkUnsafe(Info, GEPI);
   }
 
   // Compute the offset due to this GEP and check if the alloca has a
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 63d7a1d..be8d39e 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -87,29 +87,26 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
     assert(VMap.count(I) && "No mapping from source argument specified!");
 #endif
 
-  // Clone any attributes.
-  if (NewFunc->arg_size() == OldFunc->arg_size())
-    NewFunc->copyAttributesFrom(OldFunc);
-  else {
-    //Some arguments were deleted with the VMap. Copy arguments one by one
-    for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
-           E = OldFunc->arg_end(); I != E; ++I)
-      if (Argument* Anew = dyn_cast<Argument>(VMap[I])) {
-        AttributeSet attrs = OldFunc->getAttributes()
-          .getParamAttributes(I->getArgNo() + 1);
-        if (attrs.getNumSlots() > 0)
-          Anew->addAttr(attrs);
-      }
-    NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttributes(NewFunc->getContext(),
-                                          AttributeSet::ReturnIndex,
-                                          OldFunc->getAttributes()));
-    NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttributes(NewFunc->getContext(),
-                                          AttributeSet::FunctionIndex,
-                                          OldFunc->getAttributes()));
+  AttributeSet OldAttrs = OldFunc->getAttributes();
+  // Clone any argument attributes that are present in the VMap.
+  for (Function::const_arg_iterator I = OldFunc->arg_begin(),
+                                    E = OldFunc->arg_end();
+       I != E; ++I)
+    if (Argument *Anew = dyn_cast<Argument>(VMap[I])) {
+      AttributeSet attrs =
+          OldAttrs.getParamAttributes(I->getArgNo() + 1);
+      if (attrs.getNumSlots() > 0)
+        Anew->addAttr(attrs);
+    }
 
-  }
+  NewFunc->setAttributes(NewFunc->getAttributes()
+                         .addAttributes(NewFunc->getContext(),
+                                        AttributeSet::ReturnIndex,
+                                        OldAttrs.getRetAttributes()));
+  NewFunc->setAttributes(NewFunc->getAttributes()
+                         .addAttributes(NewFunc->getContext(),
+                                        AttributeSet::FunctionIndex,
+                                        OldAttrs.getFnAttributes()));
 
   // Loop over all of the basic blocks in the function, cloning them as
   // appropriate.  Note that we save BE this way in order to handle cloning of
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index e9828d6..dabb67b9 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -758,8 +758,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
     // If the call site was an invoke instruction, add a branch to the normal
     // destination.
-    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
-      BranchInst::Create(II->getNormalDest(), TheCall);
+    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+      BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+      NewBr->setDebugLoc(Returns[0]->getDebugLoc());
+    }
 
     // If the return instruction returned a value, replace uses of the call with
     // uses of the returned value.
@@ -787,15 +789,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   // "starter" and "ender" blocks.  How we accomplish this depends on whether
   // this is an invoke instruction or a call instruction.
   BasicBlock *AfterCallBB;
+  BranchInst *CreatedBranchToNormalDest = NULL;
   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
 
     // Add an unconditional branch to make this look like the CallInst case...
-    BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+    CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall);
 
     // Split the basic block.  This guarantees that no PHI nodes will have to be
     // updated due to new incoming edges, and make the invoke case more
     // symmetric to the call case.
-    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+    AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
                                           CalledFunc->getName()+".exit");
 
   } else {  // It's a call
@@ -850,11 +853,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
 
     // Add a branch to the merge points and remove return instructions.
+    DebugLoc Loc;
     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
       ReturnInst *RI = Returns[i];
-      BranchInst::Create(AfterCallBB, RI);
+      BranchInst* BI = BranchInst::Create(AfterCallBB, RI);
+      Loc = RI->getDebugLoc();
+      BI->setDebugLoc(Loc);
       RI->eraseFromParent();
     }
+    // We need to set the debug location to *somewhere* inside the
+    // inlined function. The line number may be nonsensical, but the
+    // instruction will at least be associated with the right
+    // function.
+    if (CreatedBranchToNormalDest)
+      CreatedBranchToNormalDest->setDebugLoc(Loc);
   } else if (!Returns.empty()) {
     // Otherwise, if there is exactly one return value, just replace anything
     // using the return value of the call with the computed value.
@@ -874,6 +886,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                       ReturnBB->getInstList());
 
+    if (CreatedBranchToNormalDest)
+      CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
+
     // Delete the return instruction now and empty ReturnBB now.
     Returns[0]->eraseFromParent();
     ReturnBB->eraseFromParent();
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index be80d34..12e5b3e 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -832,7 +832,24 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
 ///  Dbg Intrinsic utilities
 ///
 
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// See if there is a dbg.value intrinsic for DIVar before I.
+static bool LdStHasDebugValue(DIVariable &DIVar, Instruction *I) {
+  // Since we can't guarantee that the original dbg.declare instrinsic
+  // is removed by LowerDbgDeclare(), we need to make sure that we are
+  // not inserting the same dbg.value intrinsic over and over.
+  llvm::BasicBlock::InstListType::iterator PrevI(I);
+  if (PrevI != I->getParent()->getInstList().begin()) {
+    --PrevI;
+    if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(PrevI))
+      if (DVI->getValue() == I->getOperand(0) &&
+          DVI->getOffset() == 0 &&
+          DVI->getVariable() == DIVar)
+        return true;
+  }
+  return false;
+}
+
+/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
 /// that has an associated llvm.dbg.decl intrinsic.
 bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            StoreInst *SI, DIBuilder &Builder) {
@@ -840,6 +857,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   if (!DIVar.Verify())
     return false;
 
+  if (LdStHasDebugValue(DIVar, SI))
+    return true;
+
   Instruction *DbgVal = NULL;
   // If an argument is zero extended then use argument directly. The ZExt
   // may be zapped by an optimization pass in future.
@@ -863,7 +883,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   return true;
 }
 
-/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
 /// that has an associated llvm.dbg.decl intrinsic.
 bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            LoadInst *LI, DIBuilder &Builder) {
@@ -871,6 +891,9 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   if (!DIVar.Verify())
     return false;
 
+  if (LdStHasDebugValue(DIVar, LI))
+    return true;
+
   Instruction *DbgVal = 
     Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
                                     DIVar, LI);
@@ -902,6 +925,8 @@ bool llvm::LowerDbgDeclare(Function &F) {
          E = Dbgs.end(); I != E; ++I) {
     DbgDeclareInst *DDI = *I;
     if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress())) {
+      // We only remove the dbg.declare intrinsic if all uses are
+      // converted to dbg.value intrinsics.
       bool RemoveDDI = true;
       for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
            UI != E; ++UI)
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 681bf9c..052ad85 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -59,6 +59,10 @@ static cl::opt<bool>
 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
        cl::desc("Sink common instructions down to the end block"));
 
+static cl::opt<bool>
+HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
+       cl::desc("Hoist conditional stores if an unconditional store preceeds"));
+
 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
 STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
 STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
@@ -1332,6 +1336,66 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
   return Changed;
 }
 
+/// \brief Determine if we can hoist sink a sole store instruction out of a
+/// conditional block.
+///
+/// We are looking for code like the following:
+///   BrBB:
+///     store i32 %add, i32* %arrayidx2
+///     ... // No other stores or function calls (we could be calling a memory
+///     ... // function).
+///     %cmp = icmp ult %x, %y
+///     br i1 %cmp, label %EndBB, label %ThenBB
+///   ThenBB:
+///     store i32 %add5, i32* %arrayidx2
+///     br label EndBB
+///   EndBB:
+///     ...
+///   We are going to transform this into:
+///   BrBB:
+///     store i32 %add, i32* %arrayidx2
+///     ... //
+///     %cmp = icmp ult %x, %y
+///     %add.add5 = select i1 %cmp, i32 %add, %add5
+///     store i32 %add.add5, i32* %arrayidx2
+///     ...
+///
+/// \return The pointer to the value of the previous store if the store can be
+///         hoisted into the predecessor block. 0 otherwise.
+Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
+                              BasicBlock *StoreBB, BasicBlock *EndBB) {
+  StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
+  if (!StoreToHoist)
+    return 0;
+
+  // Volatile or atomic.
+  if (!StoreToHoist->isSimple())
+    return 0;
+
+  Value *StorePtr = StoreToHoist->getPointerOperand();
+
+  // Look for a store to the same pointer in BrBB.
+  unsigned MaxNumInstToLookAt = 10;
+  for (BasicBlock::reverse_iterator RI = BrBB->rbegin(),
+       RE = BrBB->rend(); RI != RE && (--MaxNumInstToLookAt); ++RI) {
+    Instruction *CurI = &*RI;
+
+    // Could be calling an instruction that effects memory like free().
+    if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
+      return 0;
+
+    StoreInst *SI = dyn_cast<StoreInst>(CurI);
+    // Found the previous store make sure it stores to the same location.
+    if (SI && SI->getPointerOperand() == StorePtr)
+      // Found the previous store, return its value operand.
+      return SI->getValueOperand();
+    else if (SI)
+      return 0; // Unknown store.
+  }
+
+  return 0;
+}
+
 /// \brief Speculate a conditional basic block flattening the CFG.
 ///
 /// Note that this is a very risky transform currently. Speculating
@@ -1395,6 +1459,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
   SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
 
   unsigned SpeculationCost = 0;
+  Value *SpeculatedStoreValue = 0;
+  StoreInst *SpeculatedStore = 0;
   for (BasicBlock::iterator BBI = ThenBB->begin(),
                             BBE = llvm::prior(ThenBB->end());
        BBI != BBE; ++BBI) {
@@ -1410,13 +1476,21 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
       return false;
 
     // Don't hoist the instruction if it's unsafe or expensive.
-    if (!isSafeToSpeculativelyExecute(I))
+    if (!isSafeToSpeculativelyExecute(I) &&
+        !(HoistCondStores &&
+          (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
+                                                         EndBB))))
       return false;
-    if (ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
+    if (!SpeculatedStoreValue &&
+        ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
       return false;
 
+    // Store the store speculation candidate.
+    if (SpeculatedStoreValue)
+      SpeculatedStore = cast<StoreInst>(I);
+
     // Do not hoist the instruction if any of its operands are defined but not
-    // used in this BB. The transformation will prevent the operand from
+    // used in BB. The transformation will prevent the operand from
     // being sunk into the use block.
     for (User::op_iterator i = I->op_begin(), e = I->op_end();
          i != e; ++i) {
@@ -1473,12 +1547,24 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
 
   // If there are no PHIs to process, bail early. This helps ensure idempotence
   // as well.
-  if (!HaveRewritablePHIs)
+  if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
     return false;
 
   // If we get here, we can hoist the instruction and if-convert.
   DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
 
+  // Insert a select of the value of the speculated store.
+  if (SpeculatedStoreValue) {
+    IRBuilder<true, NoFolder> Builder(BI);
+    Value *TrueV = SpeculatedStore->getValueOperand();
+    Value *FalseV = SpeculatedStoreValue;
+    if (Invert)
+      std::swap(TrueV, FalseV);
+    Value *S = Builder.CreateSelect(BrCond, TrueV, FalseV, TrueV->getName() +
+                                    "." + FalseV->getName());
+    SpeculatedStore->setOperand(0, S);
+  }
+
   // Hoist the instructions.
   BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
                            llvm::prior(ThenBB->end()));
@@ -3073,7 +3159,12 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
   Value *Sub = SI->getCondition();
   if (!Offset->isNullValue())
     Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
-  Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
+  Value *Cmp;
+  // If NumCases overflowed, then all possible values jump to the successor.
+  if (NumCases->isNullValue() && SI->getNumCases() != 0)
+    Cmp = ConstantInt::getTrue(SI->getContext());
+  else
+    Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
   BranchInst *NewBI = Builder.CreateCondBr(
       Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
 
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index c231704..6bea2dd 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1518,6 +1518,12 @@ struct FPrintFOpt : public LibCallOptimization {
     if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
       return 0;
 
+    // Do not do any of the following transformations if the fprintf return
+    // value is used, in general the fprintf return value is not compatible
+    // with fwrite(), fputc() or fputs().
+    if (!CI->use_empty())
+      return 0;
+
     // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
     if (CI->getNumArgOperands() == 2) {
       for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
@@ -1527,11 +1533,10 @@ struct FPrintFOpt : public LibCallOptimization {
       // These optimizations require DataLayout.
       if (!TD) return 0;
 
-      Value *NewCI = EmitFWrite(CI->getArgOperand(1),
-                                ConstantInt::get(TD->getIntPtrType(*Context),
-                                                 FormatStr.size()),
-                                CI->getArgOperand(0), B, TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
+      return EmitFWrite(CI->getArgOperand(1),
+                        ConstantInt::get(TD->getIntPtrType(*Context),
+                                         FormatStr.size()),
+                        CI->getArgOperand(0), B, TD, TLI);
     }
 
     // The remaining optimizations require the format string to be "%s" or "%c"
@@ -1544,14 +1549,12 @@ struct FPrintFOpt : public LibCallOptimization {
     if (FormatStr[1] == 'c') {
       // fprintf(F, "%c", chr) --> fputc(chr, F)
       if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
-      Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
-                               TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+      return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
     }
 
     if (FormatStr[1] == 's') {
       // fprintf(F, "%s", str) --> fputs(str, F)
-      if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+      if (!CI->getArgOperand(2)->getType()->isPointerTy())
         return 0;
       return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
     }
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
index 5812d46..c3df215 100644
--- a/lib/Transforms/Utils/Utils.cpp
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 #include "llvm-c/Initialization.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index b5941bd..544c5ee 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -57,7 +57,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
       return VM[V] = const_cast<Value*>(V);
     
     // Create a dummy node in case we have a metadata cycle.
-    MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef<Value*>());
+    MDNode *Dummy = MDNode::getTemporary(V->getContext(), None);
     VM[V] = Dummy;
     
     // Check all operands to see if any need to be remapped.
diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt
index e64034a..7ae082f 100644
--- a/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/lib/Transforms/Vectorize/CMakeLists.txt
@@ -2,6 +2,8 @@ add_llvm_library(LLVMVectorize
   BBVectorize.cpp
   Vectorize.cpp
   LoopVectorize.cpp
+  SLPVectorizer.cpp
+  VecUtils.cpp
   )
 
 add_dependencies(LLVMVectorize intrinsics_gen)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index acf2b81..08d3725 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 //
 // This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
-// and generates target-independent LLVM-IR. Legalization of the IR is done
-// in the codegen. However, the vectorizer uses (will use) the codegen
-// interfaces to generate IR that is likely to result in an optimal binary.
+// and generates target-independent LLVM-IR.
+// The vectorizer uses the TargetTransformInfo analysis to estimate the costs
+// of instructions in order to estimate the profitability of vectorization.
 //
 // The loop vectorizer combines consecutive loop iterations into a single
 // 'wide' iteration. After this transformation the index is incremented
@@ -78,7 +78,9 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -87,6 +89,7 @@
 #include <map>
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 static cl::opt<unsigned>
 VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
@@ -112,9 +115,9 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
 /// We don't unroll loops with a known constant trip count below this number.
 static const unsigned TinyTripCountUnrollThreshold = 128;
 
-/// When performing a runtime memory check, do not check more than this
-/// number of pointers. Notice that the check is quadratic!
-static const unsigned RuntimeMemoryCheckThreshold = 4;
+/// When performing memory disambiguation checks at runtime do not make more
+/// than this number of comparisons.
+static const unsigned RuntimeMemoryCheckThreshold = 8;
 
 /// We use a metadata with this name  to indicate that a scalar loop was
 /// vectorized and that we don't need to re-vectorize it if we run into it
@@ -214,7 +217,7 @@ private:
   /// This function adds 0, 1, 2 ... to each vector element, starting at zero.
   /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
   /// The sequence starts at StartIndex.
-  Value *getConsecutiveVector(Value* Val, unsigned StartIdx, bool Negate);
+  Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
 
   /// When we go over instructions in the basic block we rely on previous
   /// values within the current basic block or on loop invariant values.
@@ -333,7 +336,7 @@ public:
                             DominatorTree *DT, TargetTransformInfo* TTI,
                             AliasAnalysis *AA, TargetLibraryInfo *TLI)
       : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
-        Induction(0) {}
+        Induction(0), HasFunNoNaNAttr(false) {}
 
   /// This enum represents the kinds of reductions that we support.
   enum ReductionKind {
@@ -343,8 +346,10 @@ public:
     RK_IntegerOr,   ///< Bitwise or logical OR of numbers.
     RK_IntegerAnd,  ///< Bitwise or logical AND of numbers.
     RK_IntegerXor,  ///< Bitwise or logical XOR of numbers.
+    RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
     RK_FloatAdd,    ///< Sum of floats.
-    RK_FloatMult    ///< Product of floats.
+    RK_FloatMult,   ///< Product of floats.
+    RK_FloatMinMax  ///< Min/max implemented in terms of select(cmp()).
   };
 
   /// This enum represents the kinds of inductions that we support.
@@ -356,21 +361,52 @@ public:
     IK_ReversePtrInduction  ///< Reverse ptr indvar. Step = - sizeof(elem).
   };
 
+  // This enum represents the kind of minmax reduction.
+  enum MinMaxReductionKind {
+    MRK_Invalid,
+    MRK_UIntMin,
+    MRK_UIntMax,
+    MRK_SIntMin,
+    MRK_SIntMax,
+    MRK_FloatMin,
+    MRK_FloatMax
+  };
+
   /// This POD struct holds information about reduction variables.
   struct ReductionDescriptor {
     ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
-      Kind(RK_NoReduction) {}
+      Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
 
-    ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K)
-        : StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
+    ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
+                        MinMaxReductionKind MK)
+        : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
 
     // The starting value of the reduction.
     // It does not have to be zero!
-    Value *StartValue;
+    TrackingVH<Value> StartValue;
     // The instruction who's value is used outside the loop.
     Instruction *LoopExitInstr;
     // The kind of the reduction.
     ReductionKind Kind;
+    // If this a min/max reduction the kind of reduction.
+    MinMaxReductionKind MinMaxKind;
+  };
+
+  /// This POD struct holds information about a potential reduction operation.
+  struct ReductionInstDesc {
+    ReductionInstDesc(bool IsRedux, Instruction *I) :
+      IsReduction(IsRedux), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
+
+    ReductionInstDesc(Instruction *I, MinMaxReductionKind K) :
+      IsReduction(true), PatternLastInst(I), MinMaxKind(K) {}
+
+    // Is this instruction a reduction candidate.
+    bool IsReduction;
+    // The last instruction in a min/max pattern (select of the select(icmp())
+    // pattern), or the current reduction instruction otherwise.
+    Instruction *PatternLastInst;
+    // If this is a min/max pattern the comparison predicate.
+    MinMaxReductionKind MinMaxKind;
   };
 
   // This POD struct holds information about the memory runtime legality
@@ -387,16 +423,18 @@ public:
     }
 
     /// Insert a pointer and calculate the start and end SCEVs.
-    void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr);
+    void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr);
 
     /// This flag indicates if we need to add the runtime check.
     bool Need;
     /// Holds the pointers that we need to check.
-    SmallVector<Value*, 2> Pointers;
+    SmallVector<TrackingVH<Value>, 2> Pointers;
     /// Holds the pointer value at the beginning of the loop.
     SmallVector<const SCEV*, 2> Starts;
     /// Holds the pointer value at the end of the loop.
     SmallVector<const SCEV*, 2> Ends;
+    /// Holds the information if this pointer is used for writing to memory.
+    SmallVector<bool, 2> IsWritePtr;
   };
 
   /// A POD for saving information about induction variables.
@@ -404,7 +442,7 @@ public:
     InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
     InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
     /// Start value.
-    Value *StartValue;
+    TrackingVH<Value> StartValue;
     /// Induction kind.
     InductionKind IK;
   };
@@ -461,6 +499,10 @@ public:
 
   /// Returns the information that we collected about runtime memory check.
   RuntimePointerCheck *getRuntimePointerCheck() { return &PtrRtCheck; }
+
+  /// This function returns the identity element (or neutral element) for
+  /// the operation K.
+  static Constant *getReductionIdentity(ReductionKind K, Type *Tp);
 private:
   /// Check if a single basic block loop is vectorizable.
   /// At this point we know that this is a loop with a constant trip count
@@ -487,9 +529,17 @@ private:
   /// Returns True, if 'Phi' is the kind of reduction variable for type
   /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
   bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
-  /// Returns true if the instruction I can be a reduction variable of type
-  /// 'Kind'.
-  bool isReductionInstr(Instruction *I, ReductionKind Kind);
+  /// Returns a struct describing if the instruction 'I' can be a reduction
+  /// variable of type 'Kind'. If the reduction is a min/max pattern of
+  /// select(icmp()) this function advances the instruction pointer 'I' from the
+  /// compare instruction to the select instruction and stores this pointer in
+  /// 'PatternLastInst' member of the returned struct.
+  ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind,
+                                     ReductionInstDesc &Desc);
+  /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+  /// pattern corresponding to a min(X, Y) or max(X, Y).
+  static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
+                                                    ReductionInstDesc &Prev);
   /// Returns the induction kind of Phi. This function may return NoInduction
   /// if the PHI is not an induction variable.
   InductionKind isInductionVariable(PHINode *Phi);
@@ -540,6 +590,8 @@ private:
   /// We need to check that all of the pointers in this list are disjoint
   /// at runtime.
   RuntimePointerCheck PtrRtCheck;
+  /// Can we assume the absence of NaNs.
+  bool HasFunNoNaNAttr;
 };
 
 /// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -662,6 +714,11 @@ struct LoopVectorize : public LoopPass {
     AA = getAnalysisIfAvailable<AliasAnalysis>();
     TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
 
+    if (DL == NULL) {
+      DEBUG(dbgs() << "LV: Not vectorizing because of missing data layout");
+      return false;
+    }
+
     DEBUG(dbgs() << "LV: Checking a loop in \"" <<
           L->getHeader()->getParent()->getName() << "\"\n");
 
@@ -737,7 +794,8 @@ struct LoopVectorize : public LoopPass {
 
 void
 LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
-                                                       Loop *Lp, Value *Ptr) {
+                                                       Loop *Lp, Value *Ptr,
+                                                       bool WritePtr) {
   const SCEV *Sc = SE->getSCEV(Ptr);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
   assert(AR && "Invalid addrec expression");
@@ -746,6 +804,7 @@ LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
   Pointers.push_back(Ptr);
   Starts.push_back(AR->getStart());
   Ends.push_back(ScEnd);
+  IsWritePtr.push_back(WritePtr);
 }
 
 Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
@@ -771,7 +830,7 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
   return Shuf;
 }
 
-Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx,
+Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
                                                  bool Negate) {
   assert(Val->getType()->isVectorTy() && "Must be a vector");
   assert(Val->getType()->getScalarType()->isIntegerTy() &&
@@ -784,8 +843,8 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, unsigned StartIdx,
 
   // Create a vector of consecutive numbers from zero to VF.
   for (int i = 0; i < VLen; ++i) {
-    int Idx = Negate ? (-i): i;
-    Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx));
+    int64_t Idx = Negate ? (-i) : i;
+    Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate));
   }
 
   // Add the consecutive indices to the vector value.
@@ -906,12 +965,18 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
   Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
   unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
 
+  unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
+  unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
+
+  if (ScalarAllocatedSize != VectorElementSize)
+    return scalarizeInstruction(Instr);
+
   // If the pointer is loop invariant or if it is non consecutive,
   // scalarize the load.
-  int Stride = Legal->isConsecutivePtr(Ptr);
-  bool Reverse = Stride < 0;
+  int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+  bool Reverse = ConsecutiveStride < 0;
   bool UniformLoad = LI && Legal->isUniform(Ptr);
-  if (Stride == 0 || UniformLoad)
+  if (!ConsecutiveStride || UniformLoad)
     return scalarizeInstruction(Instr);
 
   Constant *Zero = Builder.getInt32(0);
@@ -1040,10 +1105,10 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
   // Create a new entry in the WidenMap and initialize it to Undef or Null.
   VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
 
-  // For each scalar that we create:
-  for (unsigned Width = 0; Width < VF; ++Width) {
-    // For each vector unroll 'part':
-    for (unsigned Part = 0; Part < UF; ++Part) {
+  // For each vector unroll 'part':
+  for (unsigned Part = 0; Part < UF; ++Part) {
+    // For each scalar that we create:
+    for (unsigned Width = 0; Width < VF; ++Width) {
       Instruction *Cloned = Instr->clone();
       if (!IsVoidRetTy)
         Cloned->setName(Instr->getName() + ".cloned");
@@ -1110,6 +1175,10 @@ InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
 
   for (unsigned i = 0; i < NumPointers; ++i) {
     for (unsigned j = i+1; j < NumPointers; ++j) {
+      // No need to check if two readonly pointers intersect.
+      if (!PtrRtCheck->IsWritePtr[i] && !PtrRtCheck->IsWritePtr[j])
+        continue;
+
       Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy, "bc");
       Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy, "bc");
       Value *End0 =   ChkBuilder.CreateBitCast(Ends[i],   PtrArithTy, "bc");
@@ -1167,7 +1236,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
 
   // Mark the old scalar loop with metadata that tells us not to vectorize this
   // loop again if we run into it.
-  MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>());
+  MDNode *MD = MDNode::get(OldBasicBlock->getContext(), None);
   OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD);
 
   // Some loops have a single integer induction variable, while other loops
@@ -1436,24 +1505,24 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
 
 /// This function returns the identity element (or neutral element) for
 /// the operation K.
-static Constant*
-getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) {
+Constant*
+LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
   switch (K) {
-  case LoopVectorizationLegality:: RK_IntegerXor:
-  case LoopVectorizationLegality:: RK_IntegerAdd:
-  case LoopVectorizationLegality:: RK_IntegerOr:
+  case RK_IntegerXor:
+  case RK_IntegerAdd:
+  case RK_IntegerOr:
     // Adding, Xoring, Oring zero to a number does not change it.
     return ConstantInt::get(Tp, 0);
-  case LoopVectorizationLegality:: RK_IntegerMult:
+  case RK_IntegerMult:
     // Multiplying a number by 1 does not change it.
     return ConstantInt::get(Tp, 1);
-  case LoopVectorizationLegality:: RK_IntegerAnd:
+  case RK_IntegerAnd:
     // AND-ing a number with an all-1 value does not change it.
     return ConstantInt::get(Tp, -1, true);
-  case LoopVectorizationLegality:: RK_FloatMult:
+  case  RK_FloatMult:
     // Multiplying a number by 1 does not change it.
     return ConstantFP::get(Tp, 1.0L);
-  case LoopVectorizationLegality:: RK_FloatAdd:
+  case  RK_FloatAdd:
     // Adding zero to a number does not change it.
     return ConstantFP::get(Tp, 0.0L);
   default:
@@ -1566,7 +1635,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
 }
 
 /// This function translates the reduction kind to an LLVM binary operator.
-static Instruction::BinaryOps
+static unsigned
 getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
   switch (Kind) {
     case LoopVectorizationLegality::RK_IntegerAdd:
@@ -1583,11 +1652,53 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
       return Instruction::FMul;
     case LoopVectorizationLegality::RK_FloatAdd:
       return Instruction::FAdd;
+    case LoopVectorizationLegality::RK_IntegerMinMax:
+      return Instruction::ICmp;
+    case LoopVectorizationLegality::RK_FloatMinMax:
+      return Instruction::FCmp;
     default:
       llvm_unreachable("Unknown reduction operation");
   }
 }
 
+Value *createMinMaxOp(IRBuilder<> &Builder,
+                      LoopVectorizationLegality::MinMaxReductionKind RK,
+                      Value *Left,
+                      Value *Right) {
+  CmpInst::Predicate P = CmpInst::ICMP_NE;
+  switch (RK) {
+  default:
+    llvm_unreachable("Unknown min/max reduction kind");
+  case LoopVectorizationLegality::MRK_UIntMin:
+    P = CmpInst::ICMP_ULT;
+    break;
+  case LoopVectorizationLegality::MRK_UIntMax:
+    P = CmpInst::ICMP_UGT;
+    break;
+  case LoopVectorizationLegality::MRK_SIntMin:
+    P = CmpInst::ICMP_SLT;
+    break;
+  case LoopVectorizationLegality::MRK_SIntMax:
+    P = CmpInst::ICMP_SGT;
+    break;
+  case LoopVectorizationLegality::MRK_FloatMin:
+    P = CmpInst::FCMP_OLT;
+    break;
+  case LoopVectorizationLegality::MRK_FloatMax:
+    P = CmpInst::FCMP_OGT;
+    break;
+  }
+
+  Value *Cmp;
+  if (RK == LoopVectorizationLegality::MRK_FloatMin || RK == LoopVectorizationLegality::MRK_FloatMax)
+    Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
+  else
+    Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
+
+  Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
+  return Select;
+}
+
 void
 InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
   //===------------------------------------------------===//
@@ -1651,13 +1762,24 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 
     // Find the reduction identity variable. Zero for addition, or, xor,
     // one for multiplication, -1 for And.
-    Constant *Iden = getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType());
-    Constant *Identity = ConstantVector::getSplat(VF, Iden);
-
-    // This vector is the Identity vector where the first element is the
-    // incoming scalar reduction.
-    Value *VectorStart = Builder.CreateInsertElement(Identity,
-                                                     RdxDesc.StartValue, Zero);
+    Value *Identity;
+    Value *VectorStart;
+    if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
+        RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
+      // MinMax reduction have the start value as their identify.
+      VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
+                                                         "minmax.ident");
+    } else {
+      Constant *Iden =
+        LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
+                                                        VecTy->getScalarType());
+      Identity = ConstantVector::getSplat(VF, Iden);
+
+      // This vector is the Identity vector where the first element is the
+      // incoming scalar reduction.
+      VectorStart = Builder.CreateInsertElement(Identity,
+                                                RdxDesc.StartValue, Zero);
+    }
 
     // Fix the vector-loop phi.
     // We created the induction variable so we know that the
@@ -1699,10 +1821,15 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 
     // Reduce all of the unrolled parts into a single vector.
     Value *ReducedPartRdx = RdxParts[0];
+    unsigned Op = getReductionBinOp(RdxDesc.Kind);
     for (unsigned part = 1; part < UF; ++part) {
-      Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
-      ReducedPartRdx = Builder.CreateBinOp(Op, RdxParts[part], ReducedPartRdx,
-                                           "bin.rdx");
+      if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+        ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
+                                             RdxParts[part], ReducedPartRdx,
+                                             "bin.rdx");
+      else
+        ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
+                                        ReducedPartRdx, RdxParts[part]);
     }
 
     // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
@@ -1727,8 +1854,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
                                     ConstantVector::get(ShuffleMask),
                                     "rdx.shuf");
 
-      Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
-      TmpVec = Builder.CreateBinOp(Op, TmpVec, Shuf, "bin.rdx");
+      if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+        TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+                                     "bin.rdx");
+      else
+        TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxKind, TmpVec, Shuf);
     }
 
     // The result is in the first element of the vector.
@@ -1861,18 +1991,33 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
         // We know that all PHIs in non header blocks are converted into
         // selects, so we don't have to worry about the insertion order and we
         // can just use the builder.
-
         // At this point we generate the predication tree. There may be
         // duplications since this is a simple recursive scan, but future
         // optimizations will clean it up.
-        VectorParts Cond = createEdgeMask(P->getIncomingBlock(0),
-                                               P->getParent());
 
-        for (unsigned part = 0; part < UF; ++part) {
-        VectorParts &In0 = getVectorValue(P->getIncomingValue(0));
-        VectorParts &In1 = getVectorValue(P->getIncomingValue(1));
-          Entry[part] = Builder.CreateSelect(Cond[part], In0[part], In1[part],
-                                             "predphi");
+        unsigned NumIncoming = P->getNumIncomingValues();
+        assert(NumIncoming > 1 && "Invalid PHI");
+
+        // Generate a sequence of selects of the form:
+        // SELECT(Mask3, In3,
+        //      SELECT(Mask2, In2,
+        //                   ( ...)))
+        for (unsigned In = 0; In < NumIncoming; In++) {
+          VectorParts Cond = createEdgeMask(P->getIncomingBlock(In),
+                                            P->getParent());
+          VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
+
+          for (unsigned part = 0; part < UF; ++part) {
+            // We don't need to 'select' the first PHI operand because it is
+            // the default value if all of the other masks don't match.
+            if (In == 0)
+              Entry[part] = In0[part];
+            else
+              // Select between the current value and the previous incoming edge
+              // based on the incoming mask.
+              Entry[part] = Builder.CreateSelect(Cond[part], In0[part],
+                                                 Entry[part], "predphi");
+          }
         }
         continue;
       }
@@ -1928,7 +2073,8 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
           // After broadcasting the induction variable we need to make the
           // vector consecutive by adding  ... -3, -2, -1, 0.
           for (unsigned part = 0; part < UF; ++part)
-            Entry[part] = getConsecutiveVector(Broadcasted, -VF * part, true);
+            Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
+                                               true);
           continue;
         }
 
@@ -2152,12 +2298,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
     if (!isa<BranchInst>(BB->getTerminator()))
       return false;
 
-    // We must have at most two predecessors because we need to convert
-    // all PHIs to selects.
-    unsigned Preds = std::distance(pred_begin(BB), pred_end(BB));
-    if (Preds > 2)
-      return false;
-
     // We must be able to predicate all blocks that need to be predicated.
     if (blockNeedsPredication(BB) && !blockCanBePredicated(BB))
       return false;
@@ -2168,7 +2308,10 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
 }
 
 bool LoopVectorizationLegality::canVectorize() {
-  assert(TheLoop->getLoopPreheader() && "No preheader!!");
+  // We must have a loop in canonical form. Loops with indirectbr in them cannot
+  // be canonicalized.
+  if (!TheLoop->getLoopPreheader())
+    return false;
 
   // We can only vectorize innermost loops.
   if (TheLoop->getSubLoopsVector().size())
@@ -2235,6 +2378,26 @@ bool LoopVectorizationLegality::canVectorize() {
   return true;
 }
 
+/// \brief Check that the instruction has outside loop users and is not an
+/// identified reduction variable.
+static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
+                               SmallPtrSet<Value *, 4> &Reductions) {
+  // Reduction instructions are allowed to have exit users. All other
+  // instructions must not have external users.
+  if (!Reductions.count(Inst))
+    //Check that all of the users of the loop are inside the BB.
+    for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
+         I != E; ++I) {
+      Instruction *U = cast<Instruction>(*I);
+      // This user may be a reduction exit value.
+      if (!TheLoop->contains(U)) {
+        DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+        return true;
+      }
+    }
+  return false;
+}
+
 bool LoopVectorizationLegality::canVectorizeInstrs() {
   BasicBlock *PreHeader = TheLoop->getLoopPreheader();
   BasicBlock *Header = TheLoop->getHeader();
@@ -2246,6 +2409,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
     return false;
   }
 
+  // Look for the attribute signaling the absence of NaNs.
+  Function &F = *Header->getParent();
+  if (F.hasFnAttribute("no-nans-fp-math"))
+    HasFunNoNaNAttr = F.getAttributes().getAttribute(
+      AttributeSet::FunctionIndex,
+      "no-nans-fp-math").getValueAsString() == "true";
+
   // For each block in the loop.
   for (Loop::block_iterator bb = TheLoop->block_begin(),
        be = TheLoop->block_end(); bb != be; ++bb) {
@@ -2255,12 +2425,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
          ++it) {
 
       if (PHINode *Phi = dyn_cast<PHINode>(it)) {
-        // This should not happen because the loop should be normalized.
-        if (Phi->getNumIncomingValues() != 2) {
-          DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
-          return false;
-        }
-
         // Check that this PHI type is allowed.
         if (!Phi->getType()->isIntegerTy() &&
             !Phi->getType()->isFloatingPointTy() &&
@@ -2272,8 +2436,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         // If this PHINode is not in the header block, then we know that we
         // can convert it to select during if-conversion. No need to check if
         // the PHIs in this block are induction or reduction variables.
-        if (*bb != Header)
-          continue;
+        if (*bb != Header) {
+          // Check that this instruction has no outside users or is an
+          // identified reduction value with an outside user.
+          if(!hasOutsideLoopUser(TheLoop, it, AllowedExit))
+            continue;
+          return false;
+        }
+
+        // We only allow if-converted PHIs with more than two incoming values.
+        if (Phi->getNumIncomingValues() != 2) {
+          DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+          return false;
+        }
 
         // This is the value coming from the preheader.
         Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
@@ -2315,6 +2490,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
           continue;
         }
+        if (AddReductionVar(Phi, RK_IntegerMinMax)) {
+          DEBUG(dbgs() << "LV: Found a MINMAX reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
         if (AddReductionVar(Phi, RK_FloatMult)) {
           DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n");
           continue;
@@ -2323,6 +2502,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
           continue;
         }
+        if (AddReductionVar(Phi, RK_FloatMinMax)) {
+          DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
 
         DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
         return false;
@@ -2352,17 +2535,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
       // Reduction instructions are allowed to have exit users.
       // All other instructions must not have external users.
-      if (!AllowedExit.count(it))
-        //Check that all of the users of the loop are inside the BB.
-        for (Value::use_iterator I = it->use_begin(), E = it->use_end();
-             I != E; ++I) {
-          Instruction *U = cast<Instruction>(*I);
-          // This user may be a reduction exit value.
-          if (!TheLoop->contains(U)) {
-            DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
-            return false;
-          }
-        }
+      if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+        return false;
+
     } // next instr.
 
   }
@@ -2442,13 +2617,6 @@ LoopVectorizationLegality::hasPossibleGlobalWriteReorder(
 
 bool LoopVectorizationLegality::canVectorizeMemory() {
 
-  if (TheLoop->isAnnotatedParallel()) {
-    DEBUG(dbgs()
-          << "LV: A loop annotated parallel, ignore memory dependency "
-          << "checks.\n");
-    return true;
-  }
-
   typedef SmallVector<Value*, 16> ValueVector;
   typedef SmallPtrSet<Value*, 16> ValueSet;
   // Holds the Load and Store *instructions*.
@@ -2457,6 +2625,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   PtrRtCheck.Pointers.clear();
   PtrRtCheck.Need = false;
 
+  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
+
   // For each block.
   for (Loop::block_iterator bb = TheLoop->block_begin(),
        be = TheLoop->block_end(); bb != be; ++bb) {
@@ -2471,7 +2641,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
       if (it->mayReadFromMemory()) {
         LoadInst *Ld = dyn_cast<LoadInst>(it);
         if (!Ld) return false;
-        if (!Ld->isSimple()) {
+        if (!Ld->isSimple() && !IsAnnotatedParallel) {
           DEBUG(dbgs() << "LV: Found a non-simple load.\n");
           return false;
         }
@@ -2483,7 +2653,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
       if (it->mayWriteToMemory()) {
         StoreInst *St = dyn_cast<StoreInst>(it);
         if (!St) return false;
-        if (!St->isSimple()) {
+        if (!St->isSimple() && !IsAnnotatedParallel) {
           DEBUG(dbgs() << "LV: Found a non-simple store.\n");
           return false;
         }
@@ -2530,6 +2700,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
       ReadWrites.insert(std::make_pair(Ptr, ST));
   }
 
+  if (IsAnnotatedParallel) {
+    DEBUG(dbgs()
+          << "LV: A loop annotated parallel, ignore memory dependency "
+          << "checks.\n");
+    return true;
+  }
+
   for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
     LoadInst *LD = cast<LoadInst>(*I);
     Value* Ptr = LD->getPointerOperand();
@@ -2552,6 +2729,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
     return true;
   }
 
+  unsigned NumReadPtrs = 0;
+  unsigned NumWritePtrs = 0;
+
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
   bool CanDoRT = true;
@@ -2559,7 +2739,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
     Value *V = (*MI).first;
     if (hasComputableBounds(V)) {
-      PtrRtCheck.insert(SE, TheLoop, V);
+      PtrRtCheck.insert(SE, TheLoop, V, true);
+      NumWritePtrs++;
       DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
     } else {
       CanDoRT = false;
@@ -2569,7 +2750,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
   for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
     Value *V = (*MI).first;
     if (hasComputableBounds(V)) {
-      PtrRtCheck.insert(SE, TheLoop, V);
+      PtrRtCheck.insert(SE, TheLoop, V, false);
+      NumReadPtrs++;
       DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
     } else {
       CanDoRT = false;
@@ -2579,7 +2761,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
 
   // Check that we did not collect too many pointers or found a
   // unsizeable pointer.
-  if (!CanDoRT || PtrRtCheck.Pointers.size() > RuntimeMemoryCheckThreshold) {
+  unsigned NumComparisons = (NumWritePtrs * (NumReadPtrs + NumWritePtrs - 1));
+  DEBUG(dbgs() << "LV: We need to compare " << NumComparisons << " ptrs.\n");
+  if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
     PtrRtCheck.reset();
     CanDoRT = false;
   }
@@ -2642,8 +2826,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
                                         Inst,
                                         WriteObjects,
                                         MaxByteWidth)) {
-        DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
-              << *UI <<"\n");
+        DEBUG(dbgs() << "LV: Found a possible write-write reorder:" << **UI
+                     << "\n");
         return false;
       }
 
@@ -2686,8 +2870,8 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
                                         Inst,
                                         WriteObjects,
                                         MaxByteWidth)) {
-        DEBUG(dbgs() << "LV: Found a possible read-write reorder:"
-              << *UI <<"\n");
+        DEBUG(dbgs() << "LV: Found a possible read-write reorder:" << **UI
+                     << "\n");
         return false;
       }
     }
@@ -2733,7 +2917,18 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
   // used as reduction variables (such as ADD). We may have a single
   // out-of-block user. The cycle must end with the original PHI.
   Instruction *Iter = Phi;
-  while (true) {
+
+  // To recognize min/max patterns formed by a icmp select sequence, we store
+  // the number of instruction we saw from the recognized min/max pattern,
+  // such that we don't stop when we see the phi has two uses (one by the select
+  // and one by the icmp) and to make sure we only see exactly the two
+  // instructions.
+  unsigned NumCmpSelectPatternInst = 0;
+  ReductionInstDesc ReduxDesc(false, 0);
+
+  // Avoid cycles in the chain.
+  SmallPtrSet<Instruction *, 8> VisitedInsts;
+  while (VisitedInsts.insert(Iter)) {
     // If the instruction has no users then this is a broken
     // chain and can't be a reduction variable.
     if (Iter->use_empty())
@@ -2747,9 +2942,6 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
     // Is this a bin op ?
     FoundBinOp |= !isa<PHINode>(Iter);
 
-    // Remember the current instruction.
-    Instruction *OldIter = Iter;
-
     // For each of the *users* of iter.
     for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
          it != e; ++it) {
@@ -2778,25 +2970,35 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
           Iter->hasNUsesOrMore(2))
         continue;
 
-      // We can't have multiple inside users.
-      if (FoundInBlockUser)
+      // We can't have multiple inside users except for a combination of
+      // icmp/select both using the phi.
+      if (FoundInBlockUser && !NumCmpSelectPatternInst)
         return false;
       FoundInBlockUser = true;
 
       // Any reduction instr must be of one of the allowed kinds.
-      if (!isReductionInstr(U, Kind))
+      ReduxDesc = isReductionInstr(U, Kind, ReduxDesc);
+      if (!ReduxDesc.IsReduction)
         return false;
 
+      if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) || isa<SelectInst>(U)))
+          ++NumCmpSelectPatternInst;
+      if (Kind == RK_FloatMinMax && (isa<FCmpInst>(U) || isa<SelectInst>(U)))
+          ++NumCmpSelectPatternInst;
+
       // Reductions of instructions such as Div, and Sub is only
       // possible if the LHS is the reduction variable.
-      if (!U->isCommutative() && !isa<PHINode>(U) && U->getOperand(0) != Iter)
+      if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) &&
+          !isa<ICmpInst>(U) && !isa<FCmpInst>(U) && U->getOperand(0) != Iter)
         return false;
 
-      Iter = U;
+      Iter = ReduxDesc.PatternLastInst;
     }
 
-    // If all uses were skipped this can't be a reduction variable.
-    if (Iter == OldIter)
+    // This means we have seen one but not the other instruction of the
+    // pattern or more than just a select and cmp.
+    if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
+        NumCmpSelectPatternInst != 2)
       return false;
 
     // We found a reduction var if we have reached the original
@@ -2807,47 +3009,107 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
       AllowedExit.insert(ExitInstruction);
 
       // Save the description of this reduction variable.
-      ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
+      ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
+                             ReduxDesc.MinMaxKind);
       Reductions[Phi] = RD;
       // We've ended the cycle. This is a reduction variable if we have an
       // outside user and it has a binary op.
       return FoundBinOp && ExitInstruction;
     }
   }
+
+  return false;
 }
 
-bool
+/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
+/// pattern corresponding to a min(X, Y) or max(X, Y).
+LoopVectorizationLegality::ReductionInstDesc
+LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
+                                                    ReductionInstDesc &Prev) {
+
+  assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
+         "Expect a select instruction");
+  Instruction *Cmp = 0;
+  SelectInst *Select = 0;
+
+  // We must handle the select(cmp()) as a single instruction. Advance to the
+  // select.
+  if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
+    if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin())))
+      return ReductionInstDesc(false, I);
+    return ReductionInstDesc(Select, Prev.MinMaxKind);
+  }
+
+  // Only handle single use cases for now.
+  if (!(Select = dyn_cast<SelectInst>(I)))
+    return ReductionInstDesc(false, I);
+  if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
+      !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
+    return ReductionInstDesc(false, I);
+  if (!Cmp->hasOneUse())
+    return ReductionInstDesc(false, I);
+
+  Value *CmpLeft;
+  Value *CmpRight;
+
+  // Look for a min/max pattern.
+  if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_UIntMin);
+  else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_UIntMax);
+  else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_SIntMax);
+  else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_SIntMin);
+  else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_FloatMin);
+  else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_FloatMax);
+  else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_FloatMin);
+  else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
+    return ReductionInstDesc(Select, MRK_FloatMax);
+
+  return ReductionInstDesc(false, I);
+}
+
+LoopVectorizationLegality::ReductionInstDesc
 LoopVectorizationLegality::isReductionInstr(Instruction *I,
-                                            ReductionKind Kind) {
+                                            ReductionKind Kind,
+                                            ReductionInstDesc &Prev) {
   bool FP = I->getType()->isFloatingPointTy();
   bool FastMath = (FP && I->isCommutative() && I->isAssociative());
-
   switch (I->getOpcode()) {
   default:
-    return false;
+    return ReductionInstDesc(false, I);
   case Instruction::PHI:
-      if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd))
-        return false;
-    // possibly.
-    return true;
+      if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd &&
+                 Kind != RK_FloatMinMax))
+        return ReductionInstDesc(false, I);
+    return ReductionInstDesc(I, Prev.MinMaxKind);
   case Instruction::Sub:
   case Instruction::Add:
-    return Kind == RK_IntegerAdd;
-  case Instruction::SDiv:
-  case Instruction::UDiv:
+    return ReductionInstDesc(Kind == RK_IntegerAdd, I);
   case Instruction::Mul:
-    return Kind == RK_IntegerMult;
+    return ReductionInstDesc(Kind == RK_IntegerMult, I);
   case Instruction::And:
-    return Kind == RK_IntegerAnd;
+    return ReductionInstDesc(Kind == RK_IntegerAnd, I);
   case Instruction::Or:
-    return Kind == RK_IntegerOr;
+    return ReductionInstDesc(Kind == RK_IntegerOr, I);
   case Instruction::Xor:
-    return Kind == RK_IntegerXor;
+    return ReductionInstDesc(Kind == RK_IntegerXor, I);
   case Instruction::FMul:
-    return Kind == RK_FloatMult && FastMath;
+    return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
   case Instruction::FAdd:
-    return Kind == RK_FloatAdd && FastMath;
-   }
+    return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
+  case Instruction::FCmp:
+  case Instruction::ICmp:
+  case Instruction::Select:
+    if (Kind != RK_IntegerMinMax &&
+        (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
+      return ReductionInstDesc(false, I);
+    return isMinMaxSelectCmpPattern(I, Prev);
+  }
 }
 
 LoopVectorizationLegality::InductionKind
@@ -3380,9 +3642,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
         TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
 
     // Scalarized loads/stores.
-    int Stride = Legal->isConsecutivePtr(Ptr);
-    bool Reverse = Stride < 0;
-    if (0 == Stride) {
+    int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+    bool Reverse = ConsecutiveStride < 0;
+    unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
+    unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
+    if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
       unsigned Cost = 0;
       // The cost of extracting from the value vector and pointer vector.
       Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
new file mode 100644
index 0000000..cc30cc9
--- /dev/null
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -0,0 +1,348 @@
+//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
+// stores that can be put together into vector-stores. Next, it attempts to
+// construct vectorizable tree using the use-def chains. If a profitable tree
+// was found, the SLP vectorizer performs vectorization on the tree.
+//
+// The pass is inspired by the work described in the paper:
+//  "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
+//
+//===----------------------------------------------------------------------===//
+#define SV_NAME "slp-vectorizer"
+#define DEBUG_TYPE SV_NAME
+
+#include "VecUtils.h"
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+
+static cl::opt<int>
+SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
+                 cl::desc("Only vectorize trees if the gain is above this "
+                          "number. (gain = -cost of vectorization)"));
+namespace {
+
+/// The SLPVectorizer Pass.
+struct SLPVectorizer : public FunctionPass {
+  typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
+
+  /// Pass identification, replacement for typeid
+  static char ID;
+
+  explicit SLPVectorizer() : FunctionPass(ID) {
+    initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
+  }
+
+  ScalarEvolution *SE;
+  DataLayout *DL;
+  TargetTransformInfo *TTI;
+  AliasAnalysis *AA;
+  LoopInfo *LI;
+
+  virtual bool runOnFunction(Function &F) {
+    SE = &getAnalysis<ScalarEvolution>();
+    DL = getAnalysisIfAvailable<DataLayout>();
+    TTI = &getAnalysis<TargetTransformInfo>();
+    AA = &getAnalysis<AliasAnalysis>();
+    LI = &getAnalysis<LoopInfo>();
+
+    StoreRefs.clear();
+    bool Changed = false;
+
+    // Must have DataLayout. We can't require it because some tests run w/o
+    // triple.
+    if (!DL)
+      return false;
+
+    for (Function::iterator it = F.begin(), e = F.end(); it != e; ++it) {
+      BasicBlock *BB = it;
+      bool BBChanged = false;
+
+      // Use the bollom up slp vectorizer to construct chains that start with
+      // he store instructions.
+      BoUpSLP R(BB, SE, DL, TTI, AA, LI->getLoopFor(BB));
+
+      // Vectorize trees that end at reductions.
+      BBChanged |= vectorizeReductions(BB, R);
+
+      // Vectorize trees that end at stores.
+      if (unsigned count = collectStores(BB, R)) {
+        (void)count;
+        DEBUG(dbgs()<<"SLP: Found " << count << " stores to vectorize.\n");
+        BBChanged |= vectorizeStoreChains(R);
+      }
+
+      // Try to hoist some of the scalarization code to the preheader.
+      if (BBChanged) hoistGatherSequence(LI, BB, R);
+
+      Changed |= BBChanged;
+    }
+
+    if (Changed) {
+      DEBUG(dbgs()<<"SLP: vectorized \""<<F.getName()<<"\"\n");
+      DEBUG(verifyFunction(F));
+    }
+    return Changed;
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    FunctionPass::getAnalysisUsage(AU);
+    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<TargetTransformInfo>();
+    AU.addRequired<LoopInfo>();
+  }
+
+private:
+
+  /// \brief Collect memory references and sort them according to their base
+  /// object. We sort the stores to their base objects to reduce the cost of the
+  /// quadratic search on the stores. TODO: We can further reduce this cost
+  /// if we flush the chain creation every time we run into a memory barrier.
+  unsigned collectStores(BasicBlock *BB, BoUpSLP &R);
+
+  /// \brief Try to vectorize a chain that starts at two arithmetic instrs.
+  bool tryToVectorizePair(Value *A, Value *B,  BoUpSLP &R);
+
+  /// \brief Try to vectorize a list of operands.
+  bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R);
+
+  /// \brief Try to vectorize a chain that may start at the operands of \V;
+  bool tryToVectorize(BinaryOperator *V,  BoUpSLP &R);
+
+  /// \brief Vectorize the stores that were collected in StoreRefs.
+  bool vectorizeStoreChains(BoUpSLP &R);
+
+  /// \brief Try to hoist gather sequences outside of the loop in cases where
+  /// all of the sources are loop invariant.
+  void hoistGatherSequence(LoopInfo *LI, BasicBlock *BB, BoUpSLP &R);
+
+  /// \brief Scan the basic block and look for reductions that may start a
+  /// vectorization chain.
+  bool vectorizeReductions(BasicBlock *BB, BoUpSLP &R);
+
+private:
+  StoreListMap StoreRefs;
+};
+
+unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
+  unsigned count = 0;
+  StoreRefs.clear();
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+    StoreInst *SI = dyn_cast<StoreInst>(it);
+    if (!SI)
+      continue;
+
+    // Check that the pointer points to scalars.
+    Type *Ty = SI->getValueOperand()->getType();
+    if (Ty->isAggregateType() || Ty->isVectorTy())
+      return 0;
+
+    // Find the base of the GEP.
+    Value *Ptr = SI->getPointerOperand();
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+      Ptr = GEP->getPointerOperand();
+
+    // Save the store locations.
+    StoreRefs[Ptr].push_back(SI);
+    count++;
+  }
+  return count;
+}
+
+bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B,  BoUpSLP &R) {
+  if (!A || !B) return false;
+  Value *VL[] = { A, B };
+  return tryToVectorizeList(VL, R);
+}
+
+bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
+  DEBUG(dbgs()<<"SLP: Vectorizing a list of length = " << VL.size() << ".\n");
+
+  // Check that all of the parts are scalar.
+  for (int i = 0, e = VL.size(); i < e; ++i) {
+    Type *Ty = VL[i]->getType();
+    if (Ty->isAggregateType() || Ty->isVectorTy())
+      return 0;
+  }
+
+  int Cost = R.getTreeCost(VL);
+  int ExtrCost = R.getScalarizationCost(VL);
+  DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
+        " Cost of extract:" << ExtrCost << ".\n");
+  if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
+  DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
+  R.vectorizeArith(VL);
+  return true;
+}
+
+bool SLPVectorizer::tryToVectorize(BinaryOperator *V,  BoUpSLP &R) {
+  if (!V) return false;
+  // Try to vectorize V.
+  if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
+    return true;
+
+  BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
+  BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
+  // Try to skip B.
+  if (B && B->hasOneUse()) {
+    BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
+    BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
+    if (tryToVectorizePair(A, B0, R)) {
+      B->moveBefore(V);
+      return true;
+    }
+    if (tryToVectorizePair(A, B1, R)) {
+      B->moveBefore(V);
+      return true;
+    }
+  }
+
+  // Try to skip A.
+  if (A && A->hasOneUse()) {
+    BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
+    BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
+    if (tryToVectorizePair(A0, B, R)) {
+      A->moveBefore(V);
+      return true;
+    }
+    if (tryToVectorizePair(A1, B, R)) {
+      A->moveBefore(V);
+      return true;
+    }
+  }
+  return 0;
+}
+
+bool SLPVectorizer::vectorizeReductions(BasicBlock *BB, BoUpSLP &R) {
+  bool Changed = false;
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+    if (isa<DbgInfoIntrinsic>(it)) continue;
+
+    // Try to vectorize reductions that use PHINodes.
+    if (PHINode *P = dyn_cast<PHINode>(it)) {
+      // Check that the PHI is a reduction PHI.
+      if (P->getNumIncomingValues() != 2) return Changed;
+      Value *Rdx = (P->getIncomingBlock(0) == BB ? P->getIncomingValue(0) :
+                    (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) :
+                     0));
+      // Check if this is a Binary Operator.
+      BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
+      if (!BI)
+        continue;
+
+      Value *Inst = BI->getOperand(0);
+      if (Inst == P) Inst = BI->getOperand(1);
+      Changed |= tryToVectorize(dyn_cast<BinaryOperator>(Inst), R);
+      continue;
+    }
+
+    // Try to vectorize trees that start at compare instructions.
+    if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
+      if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {
+        Changed |= true;
+        continue;
+      }
+      for (int i = 0; i < 2; ++i)
+        if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i)))
+          Changed |= tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R);
+      continue;
+    }
+  }
+
+  return Changed;
+}
+
+bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
+  bool Changed = false;
+  // Attempt to sort and vectorize each of the store-groups.
+  for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
+       it != e; ++it) {
+    if (it->second.size() < 2)
+      continue;
+
+    DEBUG(dbgs()<<"SLP: Analyzing a store chain of length " <<
+          it->second.size() << ".\n");
+
+    Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
+  }
+  return Changed;
+}
+
+void SLPVectorizer::hoistGatherSequence(LoopInfo *LI, BasicBlock *BB,
+                                        BoUpSLP &R) {
+  // Check if this block is inside a loop.
+  Loop *L = LI->getLoopFor(BB);
+  if (!L)
+    return;
+
+  // Check if it has a preheader.
+  BasicBlock *PreHeader = L->getLoopPreheader();
+  if (!PreHeader)
+    return;
+
+  // Mark the insertion point for the block.
+  Instruction *Location = PreHeader->getTerminator();
+
+  BoUpSLP::ValueList &Gathers = R.getGatherSeqInstructions();
+  for (BoUpSLP::ValueList::iterator it = Gathers.begin(), e = Gathers.end();
+       it != e; ++it) {
+    InsertElementInst *Insert = dyn_cast<InsertElementInst>(*it);
+
+    // The InsertElement sequence can be simplified into a constant.
+    if (!Insert)
+      continue;
+
+    // If the vector or the element that we insert into it are
+    // instructions that are defined in this basic block then we can't
+    // hoist this instruction.
+    Instruction *CurrVec = dyn_cast<Instruction>(Insert->getOperand(0));
+    Instruction *NewElem = dyn_cast<Instruction>(Insert->getOperand(1));
+    if (CurrVec && L->contains(CurrVec)) continue;
+    if (NewElem && L->contains(NewElem)) continue;
+
+    // We can hoist this instruction. Move it to the pre-header.
+    Insert->moveBefore(Location);
+  }
+}
+
+} // end anonymous namespace
+
+char SLPVectorizer::ID = 0;
+static const char lv_name[] = "SLP Vectorizer";
+INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
+
+namespace llvm {
+  Pass *createSLPVectorizerPass() {
+    return new SLPVectorizer();
+  }
+}
+
diff --git a/lib/Transforms/Vectorize/VecUtils.cpp b/lib/Transforms/Vectorize/VecUtils.cpp
new file mode 100644
index 0000000..9b94366
--- /dev/null
+++ b/lib/Transforms/Vectorize/VecUtils.cpp
@@ -0,0 +1,730 @@
+//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "SLP"
+
+#include "VecUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+#include <map>
+
+using namespace llvm;
+
+static const unsigned MinVecRegSize = 128;
+
+static const unsigned RecursionMaxDepth = 6;
+
+namespace llvm {
+
+BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
+                 TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
+  BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp)  {
+  numberInstructions();
+}
+
+void BoUpSLP::numberInstructions() {
+  int Loc = 0;
+  InstrIdx.clear();
+  InstrVec.clear();
+  // Number the instructions in the block.
+  for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
+    InstrIdx[it] = Loc++;
+    InstrVec.push_back(it);
+    assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
+  }
+}
+
+Value *BoUpSLP::getPointerOperand(Value *I) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
+  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
+  return 0;
+}
+
+unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
+  if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
+  if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
+  return -1;
+}
+
+bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
+  Value *PtrA = getPointerOperand(A);
+  Value *PtrB = getPointerOperand(B);
+  unsigned ASA = getAddressSpaceOperand(A);
+  unsigned ASB = getAddressSpaceOperand(B);
+
+  // Check that the address spaces match and that the pointers are valid.
+  if (!PtrA || !PtrB || (ASA != ASB)) return false;
+
+  // Check that A and B are of the same type.
+  if (PtrA->getType() != PtrB->getType()) return false;
+
+  // Calculate the distance.
+  const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
+  const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
+  const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
+  const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
+
+  // Non constant distance.
+  if (!ConstOffSCEV) return false;
+
+  int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
+  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
+  // The Instructions are connsecutive if the size of the first load/store is
+  // the same as the offset.
+  int64_t Sz = DL->getTypeStoreSize(Ty);
+  return ((-Offset) == Sz);
+}
+
+bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
+  Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
+  unsigned Sz = DL->getTypeSizeInBits(StoreTy);
+  unsigned VF = MinVecRegSize / Sz;
+
+  if (!isPowerOf2_32(Sz) || VF < 2) return false;
+
+  bool Changed = false;
+  // Look for profitable vectorizable trees at all offsets, starting at zero.
+  for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
+    if (i + VF > e) return Changed;
+    DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
+    ArrayRef<Value *> Operands = Chain.slice(i, VF);
+
+    int Cost = getTreeCost(Operands);
+    DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
+    if (Cost < CostThreshold) {
+      DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
+      vectorizeTree(Operands, VF);
+      i += VF - 1;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
+  ValueSet Heads, Tails;
+  SmallDenseMap<Value*, Value*> ConsecutiveChain;
+
+  // We may run into multiple chains that merge into a single chain. We mark the
+  // stores that we vectorized so that we don't visit the same store twice.
+  ValueSet VectorizedStores;
+  bool Changed = false;
+
+  // Do a quadratic search on all of the given stores and find
+  // all of the pairs of loads that follow each other.
+  for (unsigned i = 0, e = Stores.size(); i < e; ++i)
+    for (unsigned j = 0; j < e; ++j) {
+      if (i == j) continue;
+      if (isConsecutiveAccess(Stores[i], Stores[j])) {
+        Tails.insert(Stores[j]);
+        Heads.insert(Stores[i]);
+        ConsecutiveChain[Stores[i]] = Stores[j];
+      }
+    }
+
+  // For stores that start but don't end a link in the chain:
+  for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
+    if (Tails.count(*it)) continue;
+
+    // We found a store instr that starts a chain. Now follow the chain and try
+    // to vectorize it.
+    ValueList Operands;
+    Value *I = *it;
+    // Collect the chain into a list.
+    while (Tails.count(I) || Heads.count(I)) {
+      if (VectorizedStores.count(I)) break;
+      Operands.push_back(I);
+      // Move to the next value in the chain.
+      I = ConsecutiveChain[I];
+    }
+
+    bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
+
+    // Mark the vectorized stores so that we don't vectorize them again.
+    if (Vectorized)
+      VectorizedStores.insert(Operands.begin(), Operands.end());
+    Changed |= Vectorized;
+  }
+
+  return Changed;
+}
+
+int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) {
+  // Find the type of the operands in VL.
+  Type *ScalarTy = VL[0]->getType();
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    ScalarTy = SI->getValueOperand()->getType();
+  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+  // Find the cost of inserting/extracting values from the vector.
+  return getScalarizationCost(VecTy);
+}
+
+int BoUpSLP::getScalarizationCost(Type *Ty) {
+  int Cost = 0;
+  for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
+    Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+  return Cost;
+}
+
+AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
+  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
+  return AliasAnalysis::Location();
+}
+
+Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
+  assert(Src->getParent() == Dst->getParent() && "Not the same BB");
+  BasicBlock::iterator I = Src, E = Dst;
+  /// Scan all of the instruction from SRC to DST and check if
+  /// the source may alias.
+  for (++I; I != E; ++I) {
+    // Ignore store instructions that are marked as 'ignore'.
+    if (MemBarrierIgnoreList.count(I)) continue;
+    if (Src->mayWriteToMemory()) /* Write */ {
+      if (!I->mayReadOrWriteMemory()) continue;
+    } else /* Read */ {
+      if (!I->mayWriteToMemory()) continue;
+    }
+    AliasAnalysis::Location A = getLocation(&*I);
+    AliasAnalysis::Location B = getLocation(Src);
+
+    if (!A.Ptr || !B.Ptr || AA->alias(A, B))
+      return I;
+  }
+  return 0;
+}
+
+void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
+  Value *Vec = vectorizeTree(Operands, Operands.size());
+  BasicBlock::iterator Loc = cast<Instruction>(Vec);
+  IRBuilder<> Builder(++Loc);
+  // After vectorizing the operands we need to generate extractelement
+  // instructions and replace all of the uses of the scalar values with
+  // the values that we extracted from the vectorized tree.
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
+    Operands[i]->replaceAllUsesWith(S);
+  }
+}
+
+int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
+  // Get rid of the list of stores that were removed, and from the
+  // lists of instructions with multiple users.
+  MemBarrierIgnoreList.clear();
+  LaneMap.clear();
+  MultiUserVals.clear();
+  MustScalarize.clear();
+
+  // Scan the tree and find which value is used by which lane, and which values
+  // must be scalarized.
+  getTreeUses_rec(VL, 0);
+
+  // Check that instructions with multiple users can be vectorized. Mark unsafe
+  // instructions.
+  for (ValueSet::iterator it = MultiUserVals.begin(),
+       e = MultiUserVals.end(); it != e; ++it) {
+    // Check that all of the users of this instr are within the tree
+    // and that they are all from the same lane.
+    int Lane = -1;
+    for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
+         I != E; ++I) {
+      if (LaneMap.find(*I) == LaneMap.end()) {
+        MustScalarize.insert(*it);
+        DEBUG(dbgs()<<"SLP: Adding " << **it <<
+              " to MustScalarize because of an out of tree usage.\n");
+        break;
+      }
+      if (Lane == -1) Lane = LaneMap[*I];
+      if (Lane != LaneMap[*I]) {
+        MustScalarize.insert(*it);
+        DEBUG(dbgs()<<"Adding " << **it <<
+              " to MustScalarize because multiple lane use it: "
+              << Lane << " and " << LaneMap[*I] << ".\n");
+        break;
+      }
+    }
+  }
+
+  // Now calculate the cost of vectorizing the tree.
+  return getTreeCost_rec(VL, 0);
+}
+
+void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
+  if (Depth == RecursionMaxDepth) return;
+
+  // Don't handle vectors.
+  if (VL[0]->getType()->isVectorTy()) return;
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    if (SI->getValueOperand()->getType()->isVectorTy()) return;
+
+  // Check if all of the operands are constants.
+  bool AllConst = true;
+  bool AllSameScalar = true;
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    AllConst &= isa<Constant>(VL[i]);
+    AllSameScalar &= (VL[0] == VL[i]);
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // If one of the instructions is out of this BB, we need to scalarize all.
+    if (I && I->getParent() != BB) return;
+  }
+
+  // If all of the operands are identical or constant we have a simple solution.
+  if (AllConst || AllSameScalar) return;
+
+  // Scalarize unknown structures.
+  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+  if (!VL0) return;
+
+  unsigned Opcode = VL0->getOpcode();
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // If not all of the instructions are identical then we have to scalarize.
+    if (!I || Opcode != I->getOpcode()) return;
+  }
+
+  // Mark instructions with multiple users.
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // Remember to check if all of the users of this instr are vectorized
+    // within our tree.
+    if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
+  }
+
+  for (int i = 0, e = VL.size(); i < e; ++i) {
+    // Check that the instruction is only used within
+    // one lane.
+    if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
+    // Make this instruction as 'seen' and remember the lane.
+    LaneMap[VL[i]] = i;
+  }
+
+  switch (Opcode) {
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::FPExt:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::SIToFP:
+    case Instruction::UIToFP:
+    case Instruction::Trunc:
+    case Instruction::FPTrunc:
+    case Instruction::BitCast:
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor: {
+      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+        ValueList Operands;
+        // Prepare the operand vector.
+        for (unsigned j = 0; j < VL.size(); ++j)
+          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+        getTreeUses_rec(Operands, Depth+1);
+      }
+      return;
+    }
+    case Instruction::Store: {
+      ValueList Operands;
+      for (unsigned j = 0; j < VL.size(); ++j)
+        Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+      getTreeUses_rec(Operands, Depth+1);
+      return;
+    }
+    default:
+    return;
+  }
+}
+
+int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
+  Type *ScalarTy = VL[0]->getType();
+
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    ScalarTy = SI->getValueOperand()->getType();
+
+  /// Don't mess with vectors.
+  if (ScalarTy->isVectorTy()) return max_cost;
+  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+
+  if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
+
+  // Check if all of the operands are constants.
+  bool AllConst = true;
+  bool AllSameScalar = true;
+  bool MustScalarizeFlag = false;
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    AllConst &= isa<Constant>(VL[i]);
+    AllSameScalar &= (VL[0] == VL[i]);
+    // Must have a single use.
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    MustScalarizeFlag |= MustScalarize.count(VL[i]);
+    // This instruction is outside the basic block.
+    if (I && I->getParent() != BB)
+      return getScalarizationCost(VecTy);
+  }
+
+  // Is this a simple vector constant.
+  if (AllConst) return 0;
+
+  // If all of the operands are identical we can broadcast them.
+  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+  if (AllSameScalar) {
+    // If we are in a loop, and this is not an instruction (e.g. constant or
+    // argument) or the instruction is defined outside the loop then assume
+    // that the cost is zero.
+    if (L && (!VL0 || !L->contains(VL0)))
+      return 0;
+
+    // We need to broadcast the scalar.
+    return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
+  }
+
+  // If this is not a constant, or a scalar from outside the loop then we
+  // need to scalarize it.
+  if (MustScalarizeFlag)
+    return getScalarizationCost(VecTy);
+
+  if (!VL0) return getScalarizationCost(VecTy);
+  assert(VL0->getParent() == BB && "Wrong BB");
+
+  unsigned Opcode = VL0->getOpcode();
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // If not all of the instructions are identical then we have to scalarize.
+    if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
+  }
+
+  // Check if it is safe to sink the loads or the stores.
+  if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
+    int MaxIdx = InstrIdx[VL0];
+    for (unsigned i = 1, e = VL.size(); i < e; ++i )
+      MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+
+    Instruction *Last = InstrVec[MaxIdx];
+    for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
+      if (VL[i] == Last) continue;
+      Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
+      if (Barrier) {
+        DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
+              *Last << "\n because of " << *Barrier << "\n");
+        return max_cost;
+      }
+    }
+  }
+
+  switch (Opcode) {
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::FPExt:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::BitCast: {
+    int Cost = 0;
+    ValueList Operands;
+    Type *SrcTy = VL0->getOperand(0)->getType();
+    // Prepare the operand vector.
+    for (unsigned j = 0; j < VL.size(); ++j) {
+      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+      // Check that the casted type is the same for all users.
+      if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
+        return getScalarizationCost(VecTy);
+    }
+
+    Cost += getTreeCost_rec(Operands, Depth+1);
+    if (Cost >= max_cost) return max_cost;
+
+    // Calculate the cost of this instruction.
+    int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
+                                                       VL0->getType(), SrcTy);
+
+    VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
+    int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
+    Cost += (VecCost - ScalarCost);
+    return Cost;
+  }
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    int Cost = 0;
+    // Calculate the cost of all of the operands.
+    for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+      ValueList Operands;
+      // Prepare the operand vector.
+      for (unsigned j = 0; j < VL.size(); ++j)
+        Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+      Cost += getTreeCost_rec(Operands, Depth+1);
+      if (Cost >= max_cost) return max_cost;
+    }
+
+    // Calculate the cost of this instruction.
+    int ScalarCost = VecTy->getNumElements() *
+      TTI->getArithmeticInstrCost(Opcode, ScalarTy);
+
+    int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
+    Cost += (VecCost - ScalarCost);
+    return Cost;
+  }
+  case Instruction::Load: {
+    // If we are scalarize the loads, add the cost of forming the vector.
+    for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
+      if (!isConsecutiveAccess(VL[i], VL[i+1]))
+        return getScalarizationCost(VecTy);
+
+    // Cost of wide load - cost of scalar loads.
+    int ScalarLdCost = VecTy->getNumElements() *
+      TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+    int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
+    return VecLdCost - ScalarLdCost;
+  }
+  case Instruction::Store: {
+    // We know that we can merge the stores. Calculate the cost.
+    int ScalarStCost = VecTy->getNumElements() *
+      TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
+    int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
+    int StoreCost = VecStCost - ScalarStCost;
+
+    ValueList Operands;
+    for (unsigned j = 0; j < VL.size(); ++j) {
+      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
+      MemBarrierIgnoreList.insert(VL[j]);
+    }
+
+    int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
+    return TotalCost;
+  }
+  default:
+    // Unable to vectorize unknown instructions.
+    return getScalarizationCost(VecTy);
+  }
+}
+
+Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
+  int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
+  for (unsigned i = 0; i < VF; ++i )
+    MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
+  return InstrVec[MaxIdx + 1];
+}
+
+Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
+  IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
+  Value *Vec = UndefValue::get(Ty);
+  for (unsigned i=0; i < Ty->getNumElements(); ++i) {
+    // Generate the 'InsertElement' instruction.
+    Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
+    // Remember that this instruction is used as part of a 'gather' sequence.
+    // The caller of the bottom-up slp vectorizer can try to hoist the sequence
+    // if the users are outside of the basic block.
+    GatherInstructions.push_back(Vec);
+  }
+
+  return Vec;
+}
+
+Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
+  Value *V = vectorizeTree_rec(VL, VF);
+  // We moved some instructions around. We have to number them again
+  // before we can do any analysis.
+  numberInstructions();
+  MustScalarize.clear();
+  return V;
+}
+
+Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
+  Type *ScalarTy = VL[0]->getType();
+  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
+    ScalarTy = SI->getValueOperand()->getType();
+  VectorType *VecTy = VectorType::get(ScalarTy, VF);
+
+  // Check if all of the operands are constants or identical.
+  bool AllConst = true;
+  bool AllSameScalar = true;
+  for (unsigned i = 0, e = VF; i < e; ++i) {
+    AllConst &= isa<Constant>(VL[i]);
+    AllSameScalar &= (VL[0] == VL[i]);
+    // The instruction must be in the same BB, and it must be vectorizable.
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
+      return Scalarize(VL, VecTy);
+  }
+
+  // Check that this is a simple vector constant.
+  if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
+
+  // Scalarize unknown structures.
+  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
+  if (!VL0) return Scalarize(VL, VecTy);
+
+  if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
+
+  unsigned Opcode = VL0->getOpcode();
+  for (unsigned i = 0, e = VF; i < e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // If not all of the instructions are identical then we have to scalarize.
+    if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
+  }
+
+  switch (Opcode) {
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::FPExt:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::BitCast: {
+    ValueList INVL;
+    for (int i = 0; i < VF; ++i)
+      INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+    Value *InVec = vectorizeTree_rec(INVL, VF);
+    IRBuilder<> Builder(GetLastInstr(VL, VF));
+    CastInst *CI = dyn_cast<CastInst>(VL0);
+    Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
+    VectorizedValues[VL0] = V;
+    return V;
+  }
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    ValueList LHSVL, RHSVL;
+    for (int i = 0; i < VF; ++i) {
+      RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
+      LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
+    }
+
+    Value *RHS = vectorizeTree_rec(RHSVL, VF);
+    Value *LHS = vectorizeTree_rec(LHSVL, VF);
+    IRBuilder<> Builder(GetLastInstr(VL, VF));
+    BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
+    Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
+    VectorizedValues[VL0] = V;
+    return V;
+  }
+  case Instruction::Load: {
+    LoadInst *LI = cast<LoadInst>(VL0);
+    unsigned Alignment = LI->getAlignment();
+
+    // Check if all of the loads are consecutive.
+    for (unsigned i = 1, e = VF; i < e; ++i)
+      if (!isConsecutiveAccess(VL[i-1], VL[i]))
+        return Scalarize(VL, VecTy);
+
+    IRBuilder<> Builder(GetLastInstr(VL, VF));
+    Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
+                                          VecTy->getPointerTo());
+    LI = Builder.CreateLoad(VecPtr);
+    LI->setAlignment(Alignment);
+    VectorizedValues[VL0] = LI;
+    return LI;
+  }
+  case Instruction::Store: {
+    StoreInst *SI = cast<StoreInst>(VL0);
+    unsigned Alignment = SI->getAlignment();
+
+    ValueList ValueOp;
+    for (int i = 0; i < VF; ++i)
+      ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
+
+    Value *VecValue = vectorizeTree_rec(ValueOp, VF);
+
+    IRBuilder<> Builder(GetLastInstr(VL, VF));
+    Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
+                                          VecTy->getPointerTo());
+    Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
+
+    for (int i = 0; i < VF; ++i)
+      cast<Instruction>(VL[i])->eraseFromParent();
+    return 0;
+  }
+  default:
+    Value *S = Scalarize(VL, VecTy);
+    VectorizedValues[VL0] = S;
+    return S;
+  }
+}
+
+} // end of namespace
diff --git a/lib/Transforms/Vectorize/VecUtils.h b/lib/Transforms/Vectorize/VecUtils.h
new file mode 100644
index 0000000..5456c6c
--- /dev/null
+++ b/lib/Transforms/Vectorize/VecUtils.h
@@ -0,0 +1,164 @@
+//===- VecUtils.h - Vectorization Utilities -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of classes and functions manipulate vectors and chains of
+// vectors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
+#define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include <vector>
+
+namespace llvm {
+
+class BasicBlock; class Instruction; class Type;
+class VectorType; class StoreInst; class Value;
+class ScalarEvolution; class DataLayout;
+class TargetTransformInfo; class AliasAnalysis;
+class Loop;
+
+/// Bottom Up SLP vectorization utility class.
+struct BoUpSLP  {
+  typedef SmallVector<Value*, 8> ValueList;
+  typedef SmallPtrSet<Value*, 16> ValueSet;
+  typedef SmallVector<StoreInst*, 8> StoreList;
+  static const int max_cost = 1<<20;
+
+  // \brief C'tor.
+  BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl,
+         TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp);
+
+  /// \brief Take the pointer operand from the Load/Store instruction.
+  /// \returns NULL if this is not a valid Load/Store instruction.
+  static Value *getPointerOperand(Value *I);
+
+  /// \brief Take the address space operand from the Load/Store instruction.
+  /// \returns -1 if this is not a valid Load/Store instruction.
+  static unsigned getAddressSpaceOperand(Value *I);
+
+  /// \returns true if the memory operations A and B are consecutive.
+  bool isConsecutiveAccess(Value *A, Value *B);
+
+  /// \brief Vectorize the tree that starts with the elements in \p VL.
+  /// \returns the vectorized value.
+  Value *vectorizeTree(ArrayRef<Value *> VL, int VF);
+
+  /// \returns the vectorization cost of the subtree that starts at \p VL.
+  /// A negative number means that this is profitable.
+  int getTreeCost(ArrayRef<Value *> VL);
+
+  /// \returns the scalarization cost for this list of values. Assuming that
+  /// this subtree gets vectorized, we may need to extract the values from the
+  /// roots. This method calculates the cost of extracting the values.
+  int getScalarizationCost(ArrayRef<Value *> VL);
+
+  /// \brief Attempts to order and vectorize a sequence of stores. This
+  /// function does a quadratic scan of the given stores.
+  /// \returns true if the basic block was modified.
+  bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold);
+
+  /// \brief Vectorize a group of scalars into a vector tree.
+  void vectorizeArith(ArrayRef<Value *> Operands);
+
+  /// \returns the list of new instructions that were added in order to collect
+  /// scalars into vectors. This list can be used to further optimize the gather
+  /// sequences.
+  ValueList &getGatherSeqInstructions() {return GatherInstructions; }
+
+private:
+  /// \brief This method contains the recursive part of getTreeCost.
+  int getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth);
+
+  /// \brief This recursive method looks for vectorization hazards such as
+  /// values that are used by multiple users and checks that values are used
+  /// by only one vector lane. It updates the variables LaneMap, MultiUserVals.
+  void getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth);
+
+  /// \brief This method contains the recursive part of vectorizeTree.
+  Value *vectorizeTree_rec(ArrayRef<Value *> VL, int VF);
+
+  /// \brief Number all of the instructions in the block.
+  void numberInstructions();
+
+  ///  \brief Vectorize a sorted sequence of stores.
+  bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold);
+
+  /// \returns the scalarization cost for this type. Scalarization in this
+  /// context means the creation of vectors from a group of scalars.
+  int getScalarizationCost(Type *Ty);
+
+  /// \returns the AA location that is being access by the instruction.
+  AliasAnalysis::Location getLocation(Instruction *I);
+
+  /// \brief Checks if it is possible to sink an instruction from
+  /// \p Src to \p Dst.
+  /// \returns the pointer to the barrier instruction if we can't sink.
+  Value *isUnsafeToSink(Instruction *Src, Instruction *Dst);
+
+  /// \returns the instruction that appears last in the BB from \p VL.
+  /// Only consider the first \p VF elements.
+  Instruction *GetLastInstr(ArrayRef<Value *> VL, unsigned VF);
+
+  /// \returns a vector from a collection of scalars in \p VL.
+  Value *Scalarize(ArrayRef<Value *> VL, VectorType *Ty);
+
+private:
+  /// Maps instructions to numbers and back.
+  SmallDenseMap<Value*, int> InstrIdx;
+  /// Maps integers to Instructions.
+  std::vector<Instruction*> InstrVec;
+
+  // -- containers that are used during getTreeCost -- //
+
+  /// Contains values that must be scalarized because they are used
+  /// by multiple lanes, or by users outside the tree.
+  /// NOTICE: The vectorization methods also use this set.
+  ValueSet MustScalarize;
+
+  /// Contains a list of values that are used outside the current tree. This
+  /// set must be reset between runs.
+  ValueSet MultiUserVals;
+  /// Maps values in the tree to the vector lanes that uses them. This map must
+  /// be reset between runs of getCost.
+  std::map<Value*, int> LaneMap;
+  /// A list of instructions to ignore while sinking
+  /// memory instructions. This map must be reset between runs of getCost.
+  SmallPtrSet<Value *, 8> MemBarrierIgnoreList;
+
+  // -- Containers that are used during vectorizeTree -- //
+
+  /// Maps between the first scalar to the vector. This map must be reset
+  ///between runs.
+  DenseMap<Value*, Value*> VectorizedValues;
+
+  // -- Containers that are used after vectorization by the caller -- //
+
+  /// A list of instructions that are used when gathering scalars into vectors.
+  /// In many cases these instructions can be hoisted outside of the BB.
+  /// Iterating over this list is faster than calling LICM.
+  ValueList GatherInstructions;
+
+  // Analysis and block reference.
+  BasicBlock *BB;
+  ScalarEvolution *SE;
+  DataLayout *DL;
+  TargetTransformInfo *TTI;
+  AliasAnalysis *AA;
+  Loop *L;
+};
+
+} // end of namespace
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index 19eefd2..a927fe1 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -1,4 +1,4 @@
-   //===-- Vectorize.cpp -----------------------------------------------------===//
+//===-- Vectorize.cpp -----------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -28,6 +28,7 @@ using namespace llvm;
 void llvm::initializeVectorization(PassRegistry &Registry) {
   initializeBBVectorizePass(Registry);
   initializeLoopVectorizePass(Registry);
+  initializeSLPVectorizerPass(Registry);
 }
 
 void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
@@ -41,3 +42,7 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
 void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLoopVectorizePass());
 }
+
+void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createSLPVectorizerPass());
+}
diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac
index 283bc12..c0a1067 100644
--- a/projects/sample/autoconf/configure.ac
+++ b/projects/sample/autoconf/configure.ac
@@ -312,6 +312,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
   nvptx-*)                llvm_cv_target_arch="NVPTX" ;;
+  s390x-*)                llvm_cv_target_arch="SystemZ" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
 esac])
 
@@ -482,6 +483,7 @@ else
     Hexagon)     AC_SUBST(TARGET_HAS_JIT,0) ;;
     MBlaze)      AC_SUBST(TARGET_HAS_JIT,0) ;;
     NVPTX)       AC_SUBST(TARGET_HAS_JIT,0) ;;
+    SystemZ)     AC_SUBST(TARGET_HAS_JIT,1) ;;
     *)           AC_SUBST(TARGET_HAS_JIT,0) ;;
   esac
 fi
@@ -592,13 +594,13 @@ TARGETS_TO_BUILD=""
 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
     [Build specific host targets: all or target1,target2,... Valid targets are:
      host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
-     xcore, msp430, nvptx, cbe, and cpp (default=all)]),,
+     xcore, msp430, nvptx, systemz, and cpp (default=all)]),,
     enableval=all)
 if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX SystemZ" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -614,6 +616,7 @@ case "$enableval" in
         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
         nvptx)    TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
+        systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -627,6 +630,7 @@ case "$enableval" in
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
             NVPTX)       TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
+            SystemZ)     TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
             *)       AC_MSG_ERROR([Can not set target to build]) ;;
           esac ;;
         *) AC_MSG_ERROR([Unrecognized target $a_target]) ;;
diff --git a/projects/sample/configure b/projects/sample/configure
index a8fc4bf..a2c70c6 100755
--- a/projects/sample/configure
+++ b/projects/sample/configure
@@ -1406,7 +1406,7 @@ Optional Features:
   --enable-targets        Build specific host targets: all or
                           target1,target2,... Valid targets are: host, x86,
                           x86_64, sparc, powerpc, arm, mips, spu, hexagon,
-                          xcore, msp430, nvptx, cbe, and cpp (default=all)
+                          xcore, msp430, nvptx, systemz, and cpp (default=all)
   --enable-bindings       Build specific language bindings:
                           all,auto,none,{binding-name} (default=auto)
   --enable-libffi         Check for the presence of libffi (default is NO)
@@ -3852,6 +3852,7 @@ else
   hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
   nvptx-*)                llvm_cv_target_arch="NVPTX" ;;
+  s390x-*)                llvm_cv_target_arch="SystemZ" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
 esac
 fi
@@ -5116,6 +5117,8 @@ else
  ;;
     NVPTX)       TARGET_HAS_JIT=0
  ;;
+    SystemZ)     TARGET_HAS_JIT=1
+ ;;
     *)           TARGET_HAS_JIT=0
  ;;
   esac
@@ -5300,7 +5303,7 @@ if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM AArch64 Mips XCore MSP430 Hexagon CppBackend MBlaze NVPTX SystemZ" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -5316,6 +5319,7 @@ case "$enableval" in
         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
         nvptx)    TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
+        systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
             x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -5329,6 +5333,7 @@ case "$enableval" in
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
             Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
             NVPTX)       TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
+            SystemZ)     TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
             *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
 echo "$as_me: error: Can not set target to build" >&2;}
    { (exit 1); exit 1; }; } ;;
@@ -10353,7 +10358,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10356 "configure"
+#line 10361 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/test/Analysis/BasicAA/invariant_load.ll b/test/Analysis/BasicAA/invariant_load.ll
index cd6ddb9..09b5401 100644
--- a/test/Analysis/BasicAA/invariant_load.ll
+++ b/test/Analysis/BasicAA/invariant_load.ll
@@ -10,10 +10,10 @@
 
 define i32 @foo(i32* nocapture %p, i8* nocapture %q) {
 entry:
-  %0 = load i32* %p, align 4, !tbaa !0, !invariant.load !3
+  %0 = load i32* %p, align 4, !invariant.load !3
   %conv = trunc i32 %0 to i8
-  store i8 %conv, i8* %q, align 1, !tbaa !1
-  %1 = load i32* %p, align 4, !tbaa !0, !invariant.load !3
+  store i8 %conv, i8* %q, align 1
+  %1 = load i32* %p, align 4, !invariant.load !3
   %add = add nsw i32 %1, 1
   ret i32 %add
 
@@ -23,7 +23,4 @@ entry:
 ; CHECK: %add = add nsw i32 %0, 1
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
 !3 = metadata !{}
diff --git a/test/Analysis/BasicAA/phi-spec-order.ll b/test/Analysis/BasicAA/phi-spec-order.ll
index 27d47bc..4172d09 100644
--- a/test/Analysis/BasicAA/phi-spec-order.ll
+++ b/test/Analysis/BasicAA/phi-spec-order.ll
@@ -24,23 +24,23 @@ for.body4:                                        ; preds = %for.body4, %for.con
   %lsr.iv46 = bitcast [16000 x double]* %lsr.iv4 to <4 x double>*
   %lsr.iv12 = bitcast [16000 x double]* %lsr.iv1 to <4 x double>*
   %scevgep11 = getelementptr <4 x double>* %lsr.iv46, i64 -2
-  %i6 = load <4 x double>* %scevgep11, align 32, !tbaa !0
+  %i6 = load <4 x double>* %scevgep11, align 32
   %add = fadd <4 x double> %i6, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
-  store <4 x double> %add, <4 x double>* %lsr.iv12, align 32, !tbaa !0
+  store <4 x double> %add, <4 x double>* %lsr.iv12, align 32
   %scevgep10 = getelementptr <4 x double>* %lsr.iv46, i64 -1
-  %i7 = load <4 x double>* %scevgep10, align 32, !tbaa !0
+  %i7 = load <4 x double>* %scevgep10, align 32
   %add.4 = fadd <4 x double> %i7, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
   %scevgep9 = getelementptr <4 x double>* %lsr.iv12, i64 1
-  store <4 x double> %add.4, <4 x double>* %scevgep9, align 32, !tbaa !0
-  %i8 = load <4 x double>* %lsr.iv46, align 32, !tbaa !0
+  store <4 x double> %add.4, <4 x double>* %scevgep9, align 32
+  %i8 = load <4 x double>* %lsr.iv46, align 32
   %add.8 = fadd <4 x double> %i8, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
   %scevgep8 = getelementptr <4 x double>* %lsr.iv12, i64 2
-  store <4 x double> %add.8, <4 x double>* %scevgep8, align 32, !tbaa !0
+  store <4 x double> %add.8, <4 x double>* %scevgep8, align 32
   %scevgep7 = getelementptr <4 x double>* %lsr.iv46, i64 1
-  %i9 = load <4 x double>* %scevgep7, align 32, !tbaa !0
+  %i9 = load <4 x double>* %scevgep7, align 32
   %add.12 = fadd <4 x double> %i9, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
   %scevgep3 = getelementptr <4 x double>* %lsr.iv12, i64 3
-  store <4 x double> %add.12, <4 x double>* %scevgep3, align 32, !tbaa !0
+  store <4 x double> %add.12, <4 x double>* %scevgep3, align 32
 
 ; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep11, <4 x double>* %scevgep7
 ; CHECK: NoAlias:{{[ \t]+}}<4 x double>* %scevgep10, <4 x double>* %scevgep7
@@ -65,7 +65,3 @@ for.end:                                          ; preds = %for.body4
 for.end10:                                        ; preds = %for.end
   ret i32 0
 }
-
-!0 = metadata !{metadata !"double", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
index ba9d84c..0cdd61c 100644
--- a/test/Analysis/CostModel/ARM/cast.ll
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -175,9 +175,9 @@ define i32 @casts() {
   %rext_5 = zext <4 x i16> undef to <4 x i64>
 
   ; Vector cast cost of instructions lowering the cast to the stack.
-  ; CHECK: cost of 19 {{.*}} trunc
+  ; CHECK: cost of 3 {{.*}} trunc
   %r74 = trunc <8 x i32> undef to <8 x i8>
-  ; CHECK: cost of 38 {{.*}} trunc
+  ; CHECK: cost of 6 {{.*}} trunc
   %r75 = trunc <16 x i32> undef to <16 x i8>
 
   ; Floating point truncation costs.
diff --git a/test/Analysis/CostModel/ARM/divrem.ll b/test/Analysis/CostModel/ARM/divrem.ll
new file mode 100644
index 0000000..c4ac59b
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/divrem.ll
@@ -0,0 +1,450 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a9 | FileCheck %s
+
+define <2 x i8> @sdiv_v2_i8(<2 x i8>  %a, <2 x i8> %b) {
+  ; CHECK: sdiv_v2_i8
+  ; CHECK: cost of 40 {{.*}} sdiv
+
+  %1 = sdiv <2 x i8>  %a, %b
+  ret <2 x i8> %1
+}
+define <2 x i16> @sdiv_v2_i16(<2 x i16>  %a, <2 x i16> %b) {
+  ; CHECK: sdiv_v2_i16
+  ; CHECK: cost of 40 {{.*}} sdiv
+
+  %1 = sdiv <2 x i16>  %a, %b
+  ret <2 x i16> %1
+}
+define <2 x i32> @sdiv_v2_i32(<2 x i32>  %a, <2 x i32> %b) {
+  ; CHECK: sdiv_v2_i32
+  ; CHECK: cost of 40 {{.*}} sdiv
+
+  %1 = sdiv <2 x i32>  %a, %b
+  ret <2 x i32> %1
+}
+define <2 x i64> @sdiv_v2_i64(<2 x i64>  %a, <2 x i64> %b) {
+  ; CHECK: sdiv_v2_i64
+  ; CHECK: cost of 40 {{.*}} sdiv
+
+  %1 = sdiv <2 x i64>  %a, %b
+  ret <2 x i64> %1
+}
+define <4 x i8> @sdiv_v4_i8(<4 x i8>  %a, <4 x i8> %b) {
+  ; CHECK: sdiv_v4_i8
+  ; CHECK: cost of 10 {{.*}} sdiv
+
+  %1 = sdiv <4 x i8>  %a, %b
+  ret <4 x i8> %1
+}
+define <4 x i16> @sdiv_v4_i16(<4 x i16>  %a, <4 x i16> %b) {
+  ; CHECK: sdiv_v4_i16
+  ; CHECK: cost of 10 {{.*}} sdiv
+
+  %1 = sdiv <4 x i16>  %a, %b
+  ret <4 x i16> %1
+}
+define <4 x i32> @sdiv_v4_i32(<4 x i32>  %a, <4 x i32> %b) {
+  ; CHECK: sdiv_v4_i32
+  ; CHECK: cost of 80 {{.*}} sdiv
+
+  %1 = sdiv <4 x i32>  %a, %b
+  ret <4 x i32> %1
+}
+define <4 x i64> @sdiv_v4_i64(<4 x i64>  %a, <4 x i64> %b) {
+  ; CHECK: sdiv_v4_i64
+  ; CHECK: cost of 80 {{.*}} sdiv
+
+  %1 = sdiv <4 x i64>  %a, %b
+  ret <4 x i64> %1
+}
+define <8 x i8> @sdiv_v8_i8(<8 x i8>  %a, <8 x i8> %b) {
+  ; CHECK: sdiv_v8_i8
+  ; CHECK: cost of 10 {{.*}} sdiv
+
+  %1 = sdiv <8 x i8>  %a, %b
+  ret <8 x i8> %1
+}
+define <8 x i16> @sdiv_v8_i16(<8 x i16>  %a, <8 x i16> %b) {
+  ; CHECK: sdiv_v8_i16
+  ; CHECK: cost of 160 {{.*}} sdiv
+
+  %1 = sdiv <8 x i16>  %a, %b
+  ret <8 x i16> %1
+}
+define <8 x i32> @sdiv_v8_i32(<8 x i32>  %a, <8 x i32> %b) {
+  ; CHECK: sdiv_v8_i32
+  ; CHECK: cost of 160 {{.*}} sdiv
+
+  %1 = sdiv <8 x i32>  %a, %b
+  ret <8 x i32> %1
+}
+define <8 x i64> @sdiv_v8_i64(<8 x i64>  %a, <8 x i64> %b) {
+  ; CHECK: sdiv_v8_i64
+  ; CHECK: cost of 160 {{.*}} sdiv
+
+  %1 = sdiv <8 x i64>  %a, %b
+  ret <8 x i64> %1
+}
+define <16 x i8> @sdiv_v16_i8(<16 x i8>  %a, <16 x i8> %b) {
+  ; CHECK: sdiv_v16_i8
+  ; CHECK: cost of 320 {{.*}} sdiv
+
+  %1 = sdiv <16 x i8>  %a, %b
+  ret <16 x i8> %1
+}
+define <16 x i16> @sdiv_v16_i16(<16 x i16>  %a, <16 x i16> %b) {
+  ; CHECK: sdiv_v16_i16
+  ; CHECK: cost of 320 {{.*}} sdiv
+
+  %1 = sdiv <16 x i16>  %a, %b
+  ret <16 x i16> %1
+}
+define <16 x i32> @sdiv_v16_i32(<16 x i32>  %a, <16 x i32> %b) {
+  ; CHECK: sdiv_v16_i32
+  ; CHECK: cost of 320 {{.*}} sdiv
+
+  %1 = sdiv <16 x i32>  %a, %b
+  ret <16 x i32> %1
+}
+define <16 x i64> @sdiv_v16_i64(<16 x i64>  %a, <16 x i64> %b) {
+  ; CHECK: sdiv_v16_i64
+  ; CHECK: cost of 320 {{.*}} sdiv
+
+  %1 = sdiv <16 x i64>  %a, %b
+  ret <16 x i64> %1
+}
+define <2 x i8> @udiv_v2_i8(<2 x i8>  %a, <2 x i8> %b) {
+  ; CHECK: udiv_v2_i8
+  ; CHECK: cost of 40 {{.*}} udiv
+
+  %1 = udiv <2 x i8>  %a, %b
+  ret <2 x i8> %1
+}
+define <2 x i16> @udiv_v2_i16(<2 x i16>  %a, <2 x i16> %b) {
+  ; CHECK: udiv_v2_i16
+  ; CHECK: cost of 40 {{.*}} udiv
+
+  %1 = udiv <2 x i16>  %a, %b
+  ret <2 x i16> %1
+}
+define <2 x i32> @udiv_v2_i32(<2 x i32>  %a, <2 x i32> %b) {
+  ; CHECK: udiv_v2_i32
+  ; CHECK: cost of 40 {{.*}} udiv
+
+  %1 = udiv <2 x i32>  %a, %b
+  ret <2 x i32> %1
+}
+define <2 x i64> @udiv_v2_i64(<2 x i64>  %a, <2 x i64> %b) {
+  ; CHECK: udiv_v2_i64
+  ; CHECK: cost of 40 {{.*}} udiv
+
+  %1 = udiv <2 x i64>  %a, %b
+  ret <2 x i64> %1
+}
+define <4 x i8> @udiv_v4_i8(<4 x i8>  %a, <4 x i8> %b) {
+  ; CHECK: udiv_v4_i8
+  ; CHECK: cost of 10 {{.*}} udiv
+
+  %1 = udiv <4 x i8>  %a, %b
+  ret <4 x i8> %1
+}
+define <4 x i16> @udiv_v4_i16(<4 x i16>  %a, <4 x i16> %b) {
+  ; CHECK: udiv_v4_i16
+  ; CHECK: cost of 10 {{.*}} udiv
+
+  %1 = udiv <4 x i16>  %a, %b
+  ret <4 x i16> %1
+}
+define <4 x i32> @udiv_v4_i32(<4 x i32>  %a, <4 x i32> %b) {
+  ; CHECK: udiv_v4_i32
+  ; CHECK: cost of 80 {{.*}} udiv
+
+  %1 = udiv <4 x i32>  %a, %b
+  ret <4 x i32> %1
+}
+define <4 x i64> @udiv_v4_i64(<4 x i64>  %a, <4 x i64> %b) {
+  ; CHECK: udiv_v4_i64
+  ; CHECK: cost of 80 {{.*}} udiv
+
+  %1 = udiv <4 x i64>  %a, %b
+  ret <4 x i64> %1
+}
+define <8 x i8> @udiv_v8_i8(<8 x i8>  %a, <8 x i8> %b) {
+  ; CHECK: udiv_v8_i8
+  ; CHECK: cost of 10 {{.*}} udiv
+
+  %1 = udiv <8 x i8>  %a, %b
+  ret <8 x i8> %1
+}
+define <8 x i16> @udiv_v8_i16(<8 x i16>  %a, <8 x i16> %b) {
+  ; CHECK: udiv_v8_i16
+  ; CHECK: cost of 160 {{.*}} udiv
+
+  %1 = udiv <8 x i16>  %a, %b
+  ret <8 x i16> %1
+}
+define <8 x i32> @udiv_v8_i32(<8 x i32>  %a, <8 x i32> %b) {
+  ; CHECK: udiv_v8_i32
+  ; CHECK: cost of 160 {{.*}} udiv
+
+  %1 = udiv <8 x i32>  %a, %b
+  ret <8 x i32> %1
+}
+define <8 x i64> @udiv_v8_i64(<8 x i64>  %a, <8 x i64> %b) {
+  ; CHECK: udiv_v8_i64
+  ; CHECK: cost of 160 {{.*}} udiv
+
+  %1 = udiv <8 x i64>  %a, %b
+  ret <8 x i64> %1
+}
+define <16 x i8> @udiv_v16_i8(<16 x i8>  %a, <16 x i8> %b) {
+  ; CHECK: udiv_v16_i8
+  ; CHECK: cost of 320 {{.*}} udiv
+
+  %1 = udiv <16 x i8>  %a, %b
+  ret <16 x i8> %1
+}
+define <16 x i16> @udiv_v16_i16(<16 x i16>  %a, <16 x i16> %b) {
+  ; CHECK: udiv_v16_i16
+  ; CHECK: cost of 320 {{.*}} udiv
+
+  %1 = udiv <16 x i16>  %a, %b
+  ret <16 x i16> %1
+}
+define <16 x i32> @udiv_v16_i32(<16 x i32>  %a, <16 x i32> %b) {
+  ; CHECK: udiv_v16_i32
+  ; CHECK: cost of 320 {{.*}} udiv
+
+  %1 = udiv <16 x i32>  %a, %b
+  ret <16 x i32> %1
+}
+define <16 x i64> @udiv_v16_i64(<16 x i64>  %a, <16 x i64> %b) {
+  ; CHECK: udiv_v16_i64
+  ; CHECK: cost of 320 {{.*}} udiv
+
+  %1 = udiv <16 x i64>  %a, %b
+  ret <16 x i64> %1
+}
+define <2 x i8> @srem_v2_i8(<2 x i8>  %a, <2 x i8> %b) {
+  ; CHECK: srem_v2_i8
+  ; CHECK: cost of 40 {{.*}} srem
+
+  %1 = srem <2 x i8>  %a, %b
+  ret <2 x i8> %1
+}
+define <2 x i16> @srem_v2_i16(<2 x i16>  %a, <2 x i16> %b) {
+  ; CHECK: srem_v2_i16
+  ; CHECK: cost of 40 {{.*}} srem
+
+  %1 = srem <2 x i16>  %a, %b
+  ret <2 x i16> %1
+}
+define <2 x i32> @srem_v2_i32(<2 x i32>  %a, <2 x i32> %b) {
+  ; CHECK: srem_v2_i32
+  ; CHECK: cost of 40 {{.*}} srem
+
+  %1 = srem <2 x i32>  %a, %b
+  ret <2 x i32> %1
+}
+define <2 x i64> @srem_v2_i64(<2 x i64>  %a, <2 x i64> %b) {
+  ; CHECK: srem_v2_i64
+  ; CHECK: cost of 40 {{.*}} srem
+
+  %1 = srem <2 x i64>  %a, %b
+  ret <2 x i64> %1
+}
+define <4 x i8> @srem_v4_i8(<4 x i8>  %a, <4 x i8> %b) {
+  ; CHECK: srem_v4_i8
+  ; CHECK: cost of 80 {{.*}} srem
+
+  %1 = srem <4 x i8>  %a, %b
+  ret <4 x i8> %1
+}
+define <4 x i16> @srem_v4_i16(<4 x i16>  %a, <4 x i16> %b) {
+  ; CHECK: srem_v4_i16
+  ; CHECK: cost of 80 {{.*}} srem
+
+  %1 = srem <4 x i16>  %a, %b
+  ret <4 x i16> %1
+}
+define <4 x i32> @srem_v4_i32(<4 x i32>  %a, <4 x i32> %b) {
+  ; CHECK: srem_v4_i32
+  ; CHECK: cost of 80 {{.*}} srem
+
+  %1 = srem <4 x i32>  %a, %b
+  ret <4 x i32> %1
+}
+define <4 x i64> @srem_v4_i64(<4 x i64>  %a, <4 x i64> %b) {
+  ; CHECK: srem_v4_i64
+  ; CHECK: cost of 80 {{.*}} srem
+
+  %1 = srem <4 x i64>  %a, %b
+  ret <4 x i64> %1
+}
+define <8 x i8> @srem_v8_i8(<8 x i8>  %a, <8 x i8> %b) {
+  ; CHECK: srem_v8_i8
+  ; CHECK: cost of 160 {{.*}} srem
+
+  %1 = srem <8 x i8>  %a, %b
+  ret <8 x i8> %1
+}
+define <8 x i16> @srem_v8_i16(<8 x i16>  %a, <8 x i16> %b) {
+  ; CHECK: srem_v8_i16
+  ; CHECK: cost of 160 {{.*}} srem
+
+  %1 = srem <8 x i16>  %a, %b
+  ret <8 x i16> %1
+}
+define <8 x i32> @srem_v8_i32(<8 x i32>  %a, <8 x i32> %b) {
+  ; CHECK: srem_v8_i32
+  ; CHECK: cost of 160 {{.*}} srem
+
+  %1 = srem <8 x i32>  %a, %b
+  ret <8 x i32> %1
+}
+define <8 x i64> @srem_v8_i64(<8 x i64>  %a, <8 x i64> %b) {
+  ; CHECK: srem_v8_i64
+  ; CHECK: cost of 160 {{.*}} srem
+
+  %1 = srem <8 x i64>  %a, %b
+  ret <8 x i64> %1
+}
+define <16 x i8> @srem_v16_i8(<16 x i8>  %a, <16 x i8> %b) {
+  ; CHECK: srem_v16_i8
+  ; CHECK: cost of 320 {{.*}} srem
+
+  %1 = srem <16 x i8>  %a, %b
+  ret <16 x i8> %1
+}
+define <16 x i16> @srem_v16_i16(<16 x i16>  %a, <16 x i16> %b) {
+  ; CHECK: srem_v16_i16
+  ; CHECK: cost of 320 {{.*}} srem
+
+  %1 = srem <16 x i16>  %a, %b
+  ret <16 x i16> %1
+}
+define <16 x i32> @srem_v16_i32(<16 x i32>  %a, <16 x i32> %b) {
+  ; CHECK: srem_v16_i32
+  ; CHECK: cost of 320 {{.*}} srem
+
+  %1 = srem <16 x i32>  %a, %b
+  ret <16 x i32> %1
+}
+define <16 x i64> @srem_v16_i64(<16 x i64>  %a, <16 x i64> %b) {
+  ; CHECK: srem_v16_i64
+  ; CHECK: cost of 320 {{.*}} srem
+
+  %1 = srem <16 x i64>  %a, %b
+  ret <16 x i64> %1
+}
+define <2 x i8> @urem_v2_i8(<2 x i8>  %a, <2 x i8> %b) {
+  ; CHECK: urem_v2_i8
+  ; CHECK: cost of 40 {{.*}} urem
+
+  %1 = urem <2 x i8>  %a, %b
+  ret <2 x i8> %1
+}
+define <2 x i16> @urem_v2_i16(<2 x i16>  %a, <2 x i16> %b) {
+  ; CHECK: urem_v2_i16
+  ; CHECK: cost of 40 {{.*}} urem
+
+  %1 = urem <2 x i16>  %a, %b
+  ret <2 x i16> %1
+}
+define <2 x i32> @urem_v2_i32(<2 x i32>  %a, <2 x i32> %b) {
+  ; CHECK: urem_v2_i32
+  ; CHECK: cost of 40 {{.*}} urem
+
+  %1 = urem <2 x i32>  %a, %b
+  ret <2 x i32> %1
+}
+define <2 x i64> @urem_v2_i64(<2 x i64>  %a, <2 x i64> %b) {
+  ; CHECK: urem_v2_i64
+  ; CHECK: cost of 40 {{.*}} urem
+
+  %1 = urem <2 x i64>  %a, %b
+  ret <2 x i64> %1
+}
+define <4 x i8> @urem_v4_i8(<4 x i8>  %a, <4 x i8> %b) {
+  ; CHECK: urem_v4_i8
+  ; CHECK: cost of 80 {{.*}} urem
+
+  %1 = urem <4 x i8>  %a, %b
+  ret <4 x i8> %1
+}
+define <4 x i16> @urem_v4_i16(<4 x i16>  %a, <4 x i16> %b) {
+  ; CHECK: urem_v4_i16
+  ; CHECK: cost of 80 {{.*}} urem
+
+  %1 = urem <4 x i16>  %a, %b
+  ret <4 x i16> %1
+}
+define <4 x i32> @urem_v4_i32(<4 x i32>  %a, <4 x i32> %b) {
+  ; CHECK: urem_v4_i32
+  ; CHECK: cost of 80 {{.*}} urem
+
+  %1 = urem <4 x i32>  %a, %b
+  ret <4 x i32> %1
+}
+define <4 x i64> @urem_v4_i64(<4 x i64>  %a, <4 x i64> %b) {
+  ; CHECK: urem_v4_i64
+  ; CHECK: cost of 80 {{.*}} urem
+
+  %1 = urem <4 x i64>  %a, %b
+  ret <4 x i64> %1
+}
+define <8 x i8> @urem_v8_i8(<8 x i8>  %a, <8 x i8> %b) {
+  ; CHECK: urem_v8_i8
+  ; CHECK: cost of 160 {{.*}} urem
+
+  %1 = urem <8 x i8>  %a, %b
+  ret <8 x i8> %1
+}
+define <8 x i16> @urem_v8_i16(<8 x i16>  %a, <8 x i16> %b) {
+  ; CHECK: urem_v8_i16
+  ; CHECK: cost of 160 {{.*}} urem
+
+  %1 = urem <8 x i16>  %a, %b
+  ret <8 x i16> %1
+}
+define <8 x i32> @urem_v8_i32(<8 x i32>  %a, <8 x i32> %b) {
+  ; CHECK: urem_v8_i32
+  ; CHECK: cost of 160 {{.*}} urem
+
+  %1 = urem <8 x i32>  %a, %b
+  ret <8 x i32> %1
+}
+define <8 x i64> @urem_v8_i64(<8 x i64>  %a, <8 x i64> %b) {
+  ; CHECK: urem_v8_i64
+  ; CHECK: cost of 160 {{.*}} urem
+
+  %1 = urem <8 x i64>  %a, %b
+  ret <8 x i64> %1
+}
+define <16 x i8> @urem_v16_i8(<16 x i8>  %a, <16 x i8> %b) {
+  ; CHECK: urem_v16_i8
+  ; CHECK: cost of 320 {{.*}} urem
+
+  %1 = urem <16 x i8>  %a, %b
+  ret <16 x i8> %1
+}
+define <16 x i16> @urem_v16_i16(<16 x i16>  %a, <16 x i16> %b) {
+  ; CHECK: urem_v16_i16
+  ; CHECK: cost of 320 {{.*}} urem
+
+  %1 = urem <16 x i16>  %a, %b
+  ret <16 x i16> %1
+}
+define <16 x i32> @urem_v16_i32(<16 x i32>  %a, <16 x i32> %b) {
+  ; CHECK: urem_v16_i32
+  ; CHECK: cost of 320 {{.*}} urem
+
+  %1 = urem <16 x i32>  %a, %b
+  ret <16 x i32> %1
+}
+define <16 x i64> @urem_v16_i64(<16 x i64>  %a, <16 x i64> %b) {
+  ; CHECK: urem_v16_i64
+  ; CHECK: cost of 320 {{.*}} urem
+
+  %1 = urem <16 x i64>  %a, %b
+  ret <16 x i64> %1
+}
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll
index 85b4425..92f5a1e 100644
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -66,9 +66,9 @@ define void @avx2mull() {
 
 ; CHECK: fmul
 define i32 @fmul(i32 %arg) {
-  ;CHECK: cost of 1 {{.*}} fmul
+  ;CHECK: cost of 2 {{.*}} fmul
   %A = fmul <4 x float> undef, undef
-  ;CHECK: cost of 1 {{.*}} fmul
+  ;CHECK: cost of 2 {{.*}} fmul
   %B = fmul <8 x float> undef, undef
   ret i32 undef
 }
diff --git a/test/Analysis/CostModel/X86/loop_v2.ll b/test/Analysis/CostModel/X86/loop_v2.ll
index 260a606..348444e 100644
--- a/test/Analysis/CostModel/X86/loop_v2.ll
+++ b/test/Analysis/CostModel/X86/loop_v2.ll
@@ -20,10 +20,10 @@ vector.body:                                      ; preds = %vector.body, %vecto
   ;CHECK: cost of 1 {{.*}} extract
   %6 = extractelement <2 x i64> %3, i32 1
   %7 = getelementptr inbounds i32* %A, i64 %6
-  %8 = load i32* %5, align 4, !tbaa !0
+  %8 = load i32* %5, align 4
   ;CHECK: cost of 1 {{.*}} insert
   %9 = insertelement <2 x i32> undef, i32 %8, i32 0
-  %10 = load i32* %7, align 4, !tbaa !0
+  %10 = load i32* %7, align 4
   ;CHECK: cost of 1 {{.*}} insert
   %11 = insertelement <2 x i32> %9, i32 %10, i32 1
   %12 = add nsw <2 x i32> %11, %vec.phi
@@ -37,7 +37,3 @@ for.end:                                          ; preds = %vector.body
   %16 = add i32 %14, %15
   ret i32 %16
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Analysis/CostModel/X86/sitofp.ll b/test/Analysis/CostModel/X86/sitofp.ll
new file mode 100644
index 0000000..338d974
--- /dev/null
+++ b/test/Analysis/CostModel/X86/sitofp.ll
@@ -0,0 +1,281 @@
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+
+define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) {
+  ; SSE2: sitofpv2i8v2double
+  ; SSE2: cost of 20 {{.*}} sitofp
+  %1 = sitofp <2 x i8> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @sitofpv4i8v4double(<4 x i8> %a) {
+  ; SSE2: sitofpv4i8v4double
+  ; SSE2: cost of 40 {{.*}} sitofp
+  %1 = sitofp <4 x i8> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @sitofpv8i8v8double(<8 x i8> %a) {
+  ; SSE2: sitofpv8i8v8double
+  ; SSE2: cost of 80 {{.*}} sitofp
+%1 = sitofp <8 x i8> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+define <16 x double> @sitofpv16i8v16double(<16 x i8> %a) {
+  ; SSE2: sitofpv16i8v16double
+  ; SSE2: cost of 160 {{.*}} sitofp
+  %1 = sitofp <16 x i8> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @sitofpv32i8v32double(<32 x i8> %a) {
+  ; SSE2: sitofpv32i8v32double
+  ; SSE2: cost of 320 {{.*}} sitofp
+  %1 = sitofp <32 x i8> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x double> @sitofpv2i16v2double(<2 x i16> %a) {
+  ; SSE2: sitofpv2i16v2double
+  ; SSE2: cost of 20 {{.*}} sitofp
+  %1 = sitofp <2 x i16> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @sitofpv4i16v4double(<4 x i16> %a) {
+  ; SSE2: sitofpv4i16v4double
+  ; SSE2: cost of 40 {{.*}} sitofp
+  %1 = sitofp <4 x i16> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @sitofpv8i16v8double(<8 x i16> %a) {
+  ; SSE2: sitofpv8i16v8double
+  ; SSE2: cost of 80 {{.*}} sitofp
+  %1 = sitofp <8 x i16> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+define <16 x double> @sitofpv16i16v16double(<16 x i16> %a) {
+  ; SSE2: sitofpv16i16v16double
+  ; SSE2: cost of 160 {{.*}} sitofp
+  %1 = sitofp <16 x i16> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @sitofpv32i16v32double(<32 x i16> %a) {
+  ; SSE2: sitofpv32i16v32double
+  ; SSE2: cost of 320 {{.*}} sitofp
+  %1 = sitofp <32 x i16> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x double> @sitofpv2i32v2double(<2 x i32> %a) {
+  ; SSE2: sitofpv2i32v2double
+  ; SSE2: cost of 20 {{.*}} sitofp
+  %1 = sitofp <2 x i32> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @sitofpv4i32v4double(<4 x i32> %a) {
+  ; SSE2: sitofpv4i32v4double
+  ; SSE2: cost of 40 {{.*}} sitofp
+  %1 = sitofp <4 x i32> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @sitofpv8i32v8double(<8 x i32> %a) {
+  ; SSE2: sitofpv8i32v8double
+  ; SSE2: cost of 80 {{.*}} sitofp
+  %1 = sitofp <8 x i32> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+define <16 x double> @sitofpv16i32v16double(<16 x i32> %a) {
+  ; SSE2: sitofpv16i32v16double
+  ; SSE2: cost of 160 {{.*}} sitofp
+  %1 = sitofp <16 x i32> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @sitofpv32i32v32double(<32 x i32> %a) {
+  ; SSE2: sitofpv32i32v32double
+  ; SSE2: cost of 320 {{.*}} sitofp
+  %1 = sitofp <32 x i32> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x double> @sitofpv2i64v2double(<2 x i64> %a) {
+  ; SSE2: sitofpv2i64v2double
+  ; SSE2: cost of 20 {{.*}} sitofp
+  %1 = sitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @sitofpv4i64v4double(<4 x i64> %a) {
+  ; SSE2: sitofpv4i64v4double
+  ; SSE2: cost of 40 {{.*}} sitofp
+  %1 = sitofp <4 x i64> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @sitofpv8i64v8double(<8 x i64> %a) {
+  %1 = sitofp <8 x i64> %a to <8 x double>
+  ; SSE2: sitofpv8i64v8double
+  ; SSE2: cost of 80 {{.*}} sitofp
+  ret <8 x double> %1
+}
+
+define <16 x double> @sitofpv16i64v16double(<16 x i64> %a) {
+  ; SSE2: sitofpv16i64v16double
+  ; SSE2: cost of 160 {{.*}} sitofp
+  %1 = sitofp <16 x i64> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @sitofpv32i64v32double(<32 x i64> %a) {
+  ; SSE2: sitofpv32i64v32double
+  ; SSE2: cost of 320 {{.*}} sitofp
+  %1 = sitofp <32 x i64> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x float> @sitofpv2i8v2float(<2 x i8> %a) {
+  ; SSE2: sitofpv2i8v2float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <2 x i8> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @sitofpv4i8v4float(<4 x i8> %a) {
+  ; SSE2: sitofpv4i8v4float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <4 x i8> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @sitofpv8i8v8float(<8 x i8> %a) {
+  ; SSE2: sitofpv8i8v8float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <8 x i8> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @sitofpv16i8v16float(<16 x i8> %a) {
+  ; SSE2: sitofpv16i8v16float
+  ; SSE2: cost of 8 {{.*}} sitofp
+  %1 = sitofp <16 x i8> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @sitofpv32i8v32float(<32 x i8> %a) {
+  ; SSE2: sitofpv32i8v32float
+  ; SSE2: cost of 16 {{.*}} sitofp
+  %1 = sitofp <32 x i8> %a to <32 x float>
+  ret <32 x float> %1
+}
+
+define <2 x float> @sitofpv2i16v2float(<2 x i16> %a) {
+  ; SSE2: sitofpv2i16v2float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <2 x i16> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @sitofpv4i16v4float(<4 x i16> %a) {
+  ; SSE2: sitofpv4i16v4float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <4 x i16> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @sitofpv8i16v8float(<8 x i16> %a) {
+  ; SSE2: sitofpv8i16v8float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <8 x i16> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @sitofpv16i16v16float(<16 x i16> %a) {
+  ; SSE2: sitofpv16i16v16float
+  ; SSE2: cost of 30 {{.*}} sitofp
+  %1 = sitofp <16 x i16> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @sitofpv32i16v32float(<32 x i16> %a) {
+  ; SSE2: sitofpv32i16v32float
+  ; SSE2: cost of 60 {{.*}} sitofp
+  %1 = sitofp <32 x i16> %a to <32 x float>
+  ret <32 x float> %1
+}
+
+define <2 x float> @sitofpv2i32v2float(<2 x i32> %a) {
+  ; SSE2: sitofpv2i32v2float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <2 x i32> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
+  ; SSE2: sitofpv4i32v4float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
+  ; SSE2: sitofpv8i32v8float
+  ; SSE2: cost of 30 {{.*}} sitofp
+  %1 = sitofp <8 x i32> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
+  ; SSE2: sitofpv16i32v16float
+  ; SSE2: cost of 60 {{.*}} sitofp
+  %1 = sitofp <16 x i32> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) {
+  ; SSE2: sitofpv32i32v32float
+  ; SSE2: cost of 120 {{.*}} sitofp
+  %1 = sitofp <32 x i32> %a to <32 x float>
+  ret <32 x float> %1
+}
+
+define <2 x float> @sitofpv2i64v2float(<2 x i64> %a) {
+  ; SSE2: sitofpv2i64v2float
+  ; SSE2: cost of 15 {{.*}} sitofp
+  %1 = sitofp <2 x i64> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @sitofpv4i64v4float(<4 x i64> %a) {
+  ; SSE2: sitofpv4i64v4float
+  ; SSE2: cost of 30 {{.*}} sitofp
+  %1 = sitofp <4 x i64> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @sitofpv8i64v8float(<8 x i64> %a) {
+  ; SSE2: sitofpv8i64v8float
+  ; SSE2: cost of 60 {{.*}} sitofp
+  %1 = sitofp <8 x i64> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @sitofpv16i64v16float(<16 x i64> %a) {
+  ; SSE2: sitofpv16i64v16float
+  ; SSE2: cost of 120 {{.*}} sitofp
+  %1 = sitofp <16 x i64> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) {
+  ; SSE2: sitofpv32i64v32float
+  ; SSE2: cost of 240 {{.*}} sitofp
+  %1 = sitofp <32 x i64> %a to <32 x float>
+  ret <32 x float> %1
+}
diff --git a/test/Analysis/CostModel/X86/testshiftashr.ll b/test/Analysis/CostModel/X86/testshiftashr.ll
index f35eea8..d96a92f 100644
--- a/test/Analysis/CostModel/X86/testshiftashr.ll
+++ b/test/Analysis/CostModel/X86/testshiftashr.ll
@@ -113,7 +113,7 @@ entry:
 define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
 entry:
   ; SSE2: shift32i32
-  ; SSE2: cost of 256 {{.*}} ashr
+  ; SSE2: cost of 320 {{.*}} ashr
   ; SSE2-CODEGEN: shift32i32
   ; SSE2-CODEGEN: sarl %cl
 
@@ -173,7 +173,7 @@ entry:
 define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
 entry:
   ; SSE2: shift32i64
-  ; SSE2: cost of 256 {{.*}} ashr
+  ; SSE2: cost of 320 {{.*}} ashr
   ; SSE2-CODEGEN: shift32i64
   ; SSE2-CODEGEN: sarq %cl
 
@@ -373,7 +373,7 @@ define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
 entry:
   ; SSE2: shift32i32c
   ; getTypeConversion fails here and promotes this to a i64.
-  ; SSE2: cost of 256 {{.*}} ashr
+  ; SSE2: cost of 8 {{.*}} ashr
   ; SSE2-CODEGEN: shift32i32c
   ; SSE2-CODEGEN: psrad $3
   %0 = ashr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
@@ -443,7 +443,7 @@ entry:
 define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
 entry:
   ; SSE2: shift32i64c
-  ; SSE2: cost of 256 {{.*}} ashr
+  ; SSE2: cost of 320 {{.*}} ashr
   ; SSE2-CODEGEN: shift32i64c
   ; SSE2-CODEGEN: sarq $3
 
diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll
index 8d6ef38..7bc8d89 100644
--- a/test/Analysis/CostModel/X86/testshiftlshr.ll
+++ b/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -113,7 +113,7 @@ entry:
 define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
 entry:
   ; SSE2: shift32i32
-  ; SSE2: cost of 256 {{.*}} lshr
+  ; SSE2: cost of 320 {{.*}} lshr
   ; SSE2-CODEGEN: shift32i32
   ; SSE2-CODEGEN: shrl %cl
 
@@ -173,7 +173,7 @@ entry:
 define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
 entry:
   ; SSE2: shift32i64
-  ; SSE2: cost of 256 {{.*}} lshr
+  ; SSE2: cost of 320 {{.*}} lshr
   ; SSE2-CODEGEN: shift32i64
   ; SSE2-CODEGEN: shrq %cl
 
@@ -372,8 +372,7 @@ entry:
 define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
 entry:
   ; SSE2: shift32i32c
-  ; getTypeConversion fails here and promotes this to a i64.
-  ; SSE2: cost of 256 {{.*}} lshr
+  ; SSE2: cost of 8 {{.*}} lshr
   ; SSE2-CODEGEN: shift32i32c
   ; SSE2-CODEGEN: psrld $3
   %0 = lshr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
@@ -443,7 +442,7 @@ entry:
 define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
 entry:
   ; SSE2: shift32i64c
-  ; SSE2: cost of 256 {{.*}} lshr
+  ; SSE2: cost of 16 {{.*}} lshr
   ; SSE2-CODEGEN: shift32i64c
   ; SSE2-CODEGEN: psrlq $3
 
diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll
index f45a698..40effd0 100644
--- a/test/Analysis/CostModel/X86/testshiftshl.ll
+++ b/test/Analysis/CostModel/X86/testshiftshl.ll
@@ -113,7 +113,7 @@ entry:
 define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
 entry:
   ; SSE2: shift32i32
-  ; SSE2: cost of 256 {{.*}} shl
+  ; SSE2: cost of 80 {{.*}} shl
   ; SSE2-CODEGEN: shift32i32
   ; SSE2-CODEGEN: pmuludq
 
@@ -173,7 +173,7 @@ entry:
 define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
 entry:
   ; SSE2: shift32i64
-  ; SSE2: cost of 256 {{.*}} shl
+  ; SSE2: cost of 320 {{.*}} shl
   ; SSE2-CODEGEN: shift32i64
   ; SSE2-CODEGEN: shlq %cl
 
@@ -372,8 +372,7 @@ entry:
 define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
 entry:
   ; SSE2: shift32i32c
-  ; getTypeConversion fails here and promotes this to a i64.
-  ; SSE2: cost of 256 {{.*}} shl
+  ; SSE2: cost of 8 {{.*}} shl
   ; SSE2-CODEGEN: shift32i32c
   ; SSE2-CODEGEN: pslld $3
   %0 = shl %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
@@ -443,7 +442,7 @@ entry:
 define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
 entry:
   ; SSE2: shift32i64c
-  ; SSE2: cost of 256 {{.*}} shl
+  ; SSE2: cost of 16 {{.*}} shl
   ; SSE2-CODEGEN: shift32i64c
   ; SSE2-CODEGEN: psllq $3
 
diff --git a/test/Analysis/CostModel/X86/uitofp.ll b/test/Analysis/CostModel/X86/uitofp.ll
new file mode 100644
index 0000000..a41a04d
--- /dev/null
+++ b/test/Analysis/CostModel/X86/uitofp.ll
@@ -0,0 +1,368 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+
+; In X86TargetTransformInfo::getCastInstrCost we have code that depends on
+; getSimpleVT on a value type. On AVX2 we execute this code. Make sure we exit
+; early if the type is not a simple value type before we call this function.
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze < %s
+
+define <2 x double> @uitofpv2i8v2double(<2 x i8> %a) {
+  ; SSE2: uitofpv2i8v2double
+  ; SSE2: cost of 20 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv2i8v2double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <2 x i8> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @uitofpv4i8v4double(<4 x i8> %a) {
+  ; SSE2: uitofpv4i8v4double
+  ; SSE2: cost of 40 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv4i8v4double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <4 x i8> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @uitofpv8i8v8double(<8 x i8> %a) {
+  ; SSE2: uitofpv8i8v8double
+  ; SSE2: cost of 80 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv8i8v8double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+%1 = uitofp <8 x i8> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+define <16 x double> @uitofpv16i8v16double(<16 x i8> %a) {
+  ; SSE2: uitofpv16i8v16double
+  ; SSE2: cost of 160 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv16i8v16double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <16 x i8> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @uitofpv32i8v32double(<32 x i8> %a) {
+  ; SSE2: uitofpv32i8v32double
+  ; SSE2: cost of 320 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv32i8v32double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <32 x i8> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x double> @uitofpv2i16v2double(<2 x i16> %a) {
+  ; SSE2: uitofpv2i16v2double
+  ; SSE2: cost of 20 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv2i16v2double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <2 x i16> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @uitofpv4i16v4double(<4 x i16> %a) {
+  ; SSE2: uitofpv4i16v4double
+  ; SSE2: cost of 40 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv4i16v4double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <4 x i16> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @uitofpv8i16v8double(<8 x i16> %a) {
+  ; SSE2: uitofpv8i16v8double
+  ; SSE2: cost of 80 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv8i16v8double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <8 x i16> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+define <16 x double> @uitofpv16i16v16double(<16 x i16> %a) {
+  ; SSE2: uitofpv16i16v16double
+  ; SSE2: cost of 160 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv16i16v16double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <16 x i16> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @uitofpv32i16v32double(<32 x i16> %a) {
+  ; SSE2: uitofpv32i16v32double
+  ; SSE2: cost of 320 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv32i16v32double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <32 x i16> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x double> @uitofpv2i32v2double(<2 x i32> %a) {
+  ; SSE2: uitofpv2i32v2double
+  ; SSE2: cost of 20 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv2i32v2double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <2 x i32> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @uitofpv4i32v4double(<4 x i32> %a) {
+  ; SSE2: uitofpv4i32v4double
+  ; SSE2: cost of 40 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv4i32v4double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <4 x i32> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @uitofpv8i32v8double(<8 x i32> %a) {
+  ; SSE2: uitofpv8i32v8double
+  ; SSE2: cost of 80 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv8i32v8double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <8 x i32> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+define <16 x double> @uitofpv16i32v16double(<16 x i32> %a) {
+  ; SSE2: uitofpv16i32v16double
+  ; SSE2: cost of 160 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv16i32v16double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <16 x i32> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @uitofpv32i32v32double(<32 x i32> %a) {
+  ; SSE2: uitofpv32i32v32double
+  ; SSE2: cost of 320 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv32i32v32double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <32 x i32> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x double> @uitofpv2i64v2double(<2 x i64> %a) {
+  ; SSE2: uitofpv2i64v2double
+  ; SSE2: cost of 20 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv2i64v2double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <4 x double> @uitofpv4i64v4double(<4 x i64> %a) {
+  ; SSE2: uitofpv4i64v4double
+  ; SSE2: cost of 40 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv4i64v4double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <4 x i64> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <8 x double> @uitofpv8i64v8double(<8 x i64> %a) {
+  %1 = uitofp <8 x i64> %a to <8 x double>
+  ; SSE2: uitofpv8i64v8double
+  ; SSE2: cost of 80 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv8i64v8double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  ret <8 x double> %1
+}
+
+define <16 x double> @uitofpv16i64v16double(<16 x i64> %a) {
+  ; SSE2: uitofpv16i64v16double
+  ; SSE2: cost of 160 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv16i64v16double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <16 x i64> %a to <16 x double>
+  ret <16 x double> %1
+}
+
+define <32 x double> @uitofpv32i64v32double(<32 x i64> %a) {
+  ; SSE2: uitofpv32i64v32double
+  ; SSE2: cost of 320 {{.*}} uitofp
+  ; SSE2-CODEGEN: uitofpv32i64v32double
+  ; SSE2-CODEGEN: movapd  LCPI
+  ; SSE2-CODEGEN: subpd
+  ; SSE2-CODEGEN: addpd
+  %1 = uitofp <32 x i64> %a to <32 x double>
+  ret <32 x double> %1
+}
+
+define <2 x float> @uitofpv2i8v2float(<2 x i8> %a) {
+  ; SSE2: uitofpv2i8v2float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <2 x i8> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @uitofpv4i8v4float(<4 x i8> %a) {
+  ; SSE2: uitofpv4i8v4float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <4 x i8> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @uitofpv8i8v8float(<8 x i8> %a) {
+  ; SSE2: uitofpv8i8v8float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <8 x i8> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @uitofpv16i8v16float(<16 x i8> %a) {
+  ; SSE2: uitofpv16i8v16float
+  ; SSE2: cost of 8 {{.*}} uitofp
+  %1 = uitofp <16 x i8> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @uitofpv32i8v32float(<32 x i8> %a) {
+  ; SSE2: uitofpv32i8v32float
+  ; SSE2: cost of 16 {{.*}} uitofp
+  %1 = uitofp <32 x i8> %a to <32 x float>
+  ret <32 x float> %1
+}
+
+define <2 x float> @uitofpv2i16v2float(<2 x i16> %a) {
+  ; SSE2: uitofpv2i16v2float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <2 x i16> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @uitofpv4i16v4float(<4 x i16> %a) {
+  ; SSE2: uitofpv4i16v4float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <4 x i16> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @uitofpv8i16v8float(<8 x i16> %a) {
+  ; SSE2: uitofpv8i16v8float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <8 x i16> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @uitofpv16i16v16float(<16 x i16> %a) {
+  ; SSE2: uitofpv16i16v16float
+  ; SSE2: cost of 30 {{.*}} uitofp
+  %1 = uitofp <16 x i16> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @uitofpv32i16v32float(<32 x i16> %a) {
+  ; SSE2: uitofpv32i16v32float
+  ; SSE2: cost of 60 {{.*}} uitofp
+  %1 = uitofp <32 x i16> %a to <32 x float>
+  ret <32 x float> %1
+}
+
+define <2 x float> @uitofpv2i32v2float(<2 x i32> %a) {
+  ; SSE2: uitofpv2i32v2float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <2 x i32> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @uitofpv4i32v4float(<4 x i32> %a) {
+  ; SSE2: uitofpv4i32v4float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @uitofpv8i32v8float(<8 x i32> %a) {
+  ; SSE2: uitofpv8i32v8float
+  ; SSE2: cost of 30 {{.*}} uitofp
+  %1 = uitofp <8 x i32> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @uitofpv16i32v16float(<16 x i32> %a) {
+  ; SSE2: uitofpv16i32v16float
+  ; SSE2: cost of 60 {{.*}} uitofp
+  %1 = uitofp <16 x i32> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @uitofpv32i32v32float(<32 x i32> %a) {
+  ; SSE2: uitofpv32i32v32float
+  ; SSE2: cost of 120 {{.*}} uitofp
+  %1 = uitofp <32 x i32> %a to <32 x float>
+  ret <32 x float> %1
+}
+
+define <2 x float> @uitofpv2i64v2float(<2 x i64> %a) {
+  ; SSE2: uitofpv2i64v2float
+  ; SSE2: cost of 15 {{.*}} uitofp
+  %1 = uitofp <2 x i64> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <4 x float> @uitofpv4i64v4float(<4 x i64> %a) {
+  ; SSE2: uitofpv4i64v4float
+  ; SSE2: cost of 30 {{.*}} uitofp
+  %1 = uitofp <4 x i64> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <8 x float> @uitofpv8i64v8float(<8 x i64> %a) {
+  ; SSE2: uitofpv8i64v8float
+  ; SSE2: cost of 60 {{.*}} uitofp
+  %1 = uitofp <8 x i64> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <16 x float> @uitofpv16i64v16float(<16 x i64> %a) {
+  ; SSE2: uitofpv16i64v16float
+  ; SSE2: cost of 120 {{.*}} uitofp
+  %1 = uitofp <16 x i64> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <32 x float> @uitofpv32i64v32float(<32 x i64> %a) {
+  ; SSE2: uitofpv32i64v32float
+  ; SSE2: cost of 240 {{.*}} uitofp
+  %1 = uitofp <32 x i64> %a to <32 x float>
+  ret <32 x float> %1
+}
+
diff --git a/test/Analysis/CostModel/X86/vectorized-loop.ll b/test/Analysis/CostModel/X86/vectorized-loop.ll
index 25b1114..af7d1df 100644
--- a/test/Analysis/CostModel/X86/vectorized-loop.ll
+++ b/test/Analysis/CostModel/X86/vectorized-loop.ll
@@ -54,14 +54,14 @@ for.body:                                         ; preds = %middle.block, %for.
   %13 = add nsw i64 %indvars.iv, 2
   %arrayidx = getelementptr inbounds i32* %B, i64 %13
   ;CHECK: cost of 1 {{.*}} load
-  %14 = load i32* %arrayidx, align 4, !tbaa !0
+  %14 = load i32* %arrayidx, align 4
   ;CHECK: cost of 1 {{.*}} mul
   %mul = mul nsw i32 %14, 5
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv
   ;CHECK: cost of 1 {{.*}} load
-  %15 = load i32* %arrayidx2, align 4, !tbaa !0
+  %15 = load i32* %arrayidx2, align 4
   %add3 = add nsw i32 %15, %mul
-  store i32 %add3, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %add3, i32* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   ;CHECK: cost of 0 {{.*}} trunc
   %16 = trunc i64 %indvars.iv.next to i32
@@ -73,7 +73,3 @@ for.end:                                          ; preds = %middle.block, %for.
   ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Analysis/GlobalsModRef/volatile-instrs.ll b/test/Analysis/GlobalsModRef/volatile-instrs.ll
index 49bce67..46d3d76 100644
--- a/test/Analysis/GlobalsModRef/volatile-instrs.ll
+++ b/test/Analysis/GlobalsModRef/volatile-instrs.ll
@@ -22,13 +22,9 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 define i32 @main() nounwind uwtable ssp {
 main_entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false)
-  %0 = load volatile i32* getelementptr inbounds (%struct.anon* @b, i64 0, i32 0), align 4, !tbaa !0
-  store i32 %0, i32* @c, align 4, !tbaa !0
+  %0 = load volatile i32* getelementptr inbounds (%struct.anon* @b, i64 0, i32 0), align 4
+  store i32 %0, i32* @c, align 4
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) nounwind
   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %0) nounwind
   ret i32 0
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Analysis/MemoryDependenceAnalysis/lit.local.cfg b/test/Analysis/MemoryDependenceAnalysis/lit.local.cfg
new file mode 100644
index 0000000..c6106e4
--- /dev/null
+++ b/test/Analysis/MemoryDependenceAnalysis/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Analysis/MemoryDependenceAnalysis/memdep_requires_dominator_tree.ll b/test/Analysis/MemoryDependenceAnalysis/memdep_requires_dominator_tree.ll
new file mode 100644
index 0000000..3c95770
--- /dev/null
+++ b/test/Analysis/MemoryDependenceAnalysis/memdep_requires_dominator_tree.ll
@@ -0,0 +1,19 @@
+; RUN: opt -memdep -gvn < %s
+
+define void @__memdep_requires_dominator_tree(i32* nocapture %bufUInt, i32* nocapture %pattern) nounwind {
+entry:
+  br label %for.body
+
+for.exit:                                         ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.01 = phi i32 [ 0, %entry ], [ %tmp8.7, %for.body ]
+  %arrayidx = getelementptr i32* %bufUInt, i32 %i.01
+  %arrayidx5 = getelementptr i32* %pattern, i32 %i.01
+  %tmp6 = load i32* %arrayidx5, align 4
+  store i32 %tmp6, i32* %arrayidx, align 4
+  %tmp8.7 = add i32 %i.01, 8
+  %cmp.7 = icmp ult i32 %tmp8.7, 1024
+  br i1 %cmp.7, label %for.body, label %for.exit
+}
diff --git a/test/Analysis/Profiling/lit.local.cfg b/test/Analysis/Profiling/lit.local.cfg
index 444b7dc..d40fa4f 100644
--- a/test/Analysis/Profiling/lit.local.cfg
+++ b/test/Analysis/Profiling/lit.local.cfg
@@ -7,10 +7,5 @@ def getRoot(config):
 
 root = getRoot(config)
 
-# Most profiling tests rely on a JIT being present to gather their data; AArch64
-# doesn't have any JIT at present so they will fail when run there.
-if root.host_arch in ['AArch64']:
-    config.unsupported = True
-
 if 'hexagon' in root.target_triple:
     config.unsupported = True
diff --git a/test/Analysis/RegionInfo/unreachable_bb.ll b/test/Analysis/RegionInfo/unreachable_bb.ll
new file mode 100644
index 0000000..626ccbe
--- /dev/null
+++ b/test/Analysis/RegionInfo/unreachable_bb.ll
@@ -0,0 +1,29 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+
+; We should not crash if there are some bbs that are not reachable.
+define void @f() {
+entry:
+  br label %for.pre
+
+notintree:                                        ; No predecessors!
+  br label %ret
+
+for.pre:                                          ; preds = %entry
+  br label %for
+
+for:                                              ; preds = %for.inc, %for.pre
+  %indvar = phi i64 [ 0, %for.pre ], [ %indvar.next, %for.inc ]
+  %exitcond = icmp ne i64 %indvar, 200
+  br i1 %exitcond, label %for.inc, label %ret
+
+for.inc:                                          ; preds = %for
+  %indvar.next = add i64 %indvar, 1
+  br label %for
+
+ret:                                              ; preds = %for, %notintree
+  ret void
+}
+
+; CHECK: [0] entry => <Function Return>
+; CHECK:   [1] for => ret
+
diff --git a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
index 138c015..b88e33f 100644
--- a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
+++ b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
@@ -15,24 +15,24 @@ entry:
 
 lbl_818:                                          ; preds = %for.end, %entry
   call void (...)* @func_27()
-  store i32 0, i32* @g_814, align 4, !tbaa !0
+  store i32 0, i32* @g_814, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.body, %lbl_818
-  %0 = load i32* @g_814, align 4, !tbaa !0
+  %0 = load i32* @g_814, align 4
   %cmp = icmp sle i32 %0, 0
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
   %idxprom = sext i32 %0 to i64
   %arrayidx = getelementptr inbounds [0 x i32]* getelementptr inbounds ([1 x [0 x i32]]* @g_244, i32 0, i64 0), i32 0, i64 %idxprom
-  %1 = load i32* %arrayidx, align 1, !tbaa !0
-  store i32 %1, i32* @func_21_l_773, align 4, !tbaa !0
-  store i32 1, i32* @g_814, align 4, !tbaa !0
+  %1 = load i32* %arrayidx, align 1
+  store i32 %1, i32* @func_21_l_773, align 4
+  store i32 1, i32* @g_814, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %2 = load i32* @func_21_l_773, align 4, !tbaa !0
+  %2 = load i32* @func_21_l_773, align 4
   %tobool = icmp ne i32 %2, 0
   br i1 %tobool, label %lbl_818, label %if.end
 
@@ -41,7 +41,3 @@ if.end:                                           ; preds = %for.end
 }
 
 declare void @func_27(...)
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
new file mode 100644
index 0000000..ee52763
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
@@ -0,0 +1,392 @@
+; RUN: opt < %s -tbaa -basicaa -struct-path-tbaa -aa-eval -evaluate-tbaa -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -tbaa -basicaa -struct-path-tbaa -gvn -S | FileCheck %s --check-prefix=OPT
+; Generated from clang/test/CodeGen/tbaa.cpp with "-O1 -struct-path-tbaa -disable-llvm-optzns".
+
+%struct.StructA = type { i16, i32, i16, i32 }
+%struct.StructB = type { i16, %struct.StructA, i32 }
+%struct.StructS = type { i16, i32 }
+%struct.StructS2 = type { i16, i32 }
+%struct.StructC = type { i16, %struct.StructB, i32 }
+%struct.StructD = type { i16, %struct.StructB, i32, i8 }
+
+define i32 @_Z1gPjP7StructAy(i32* %s, %struct.StructA* %A, i64 %count) #0 {
+entry:
+; Access to i32* and &(A->f32).
+; CHECK: Function
+; CHECK: MayAlias:   store i32 4, i32* %f32, align 4, !tbaa !8 <->   store i32 1, i32* %0, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; OPT: %[[RET:.*]] = load i32*
+; OPT: ret i32 %[[RET]]
+  %s.addr = alloca i32*, align 8
+  %A.addr = alloca %struct.StructA*, align 8
+  %count.addr = alloca i64, align 8
+  store i32* %s, i32** %s.addr, align 8, !tbaa !0
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load i32** %s.addr, align 8, !tbaa !0
+  store i32 1, i32* %0, align 4, !tbaa !6
+  %1 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %1, i32 0, i32 1
+  store i32 4, i32* %f32, align 4, !tbaa !8
+  %2 = load i32** %s.addr, align 8, !tbaa !0
+  %3 = load i32* %2, align 4, !tbaa !6
+  ret i32 %3
+}
+
+define i32 @_Z2g2PjP7StructAy(i32* %s, %struct.StructA* %A, i64 %count) #0 {
+entry:
+; Access to i32* and &(A->f16).
+; CHECK: Function
+; CHECK: NoAlias:   store i16 4, i16* %f16, align 2, !tbaa !8 <->   store i32 1, i32* %0, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i16 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %s.addr = alloca i32*, align 8
+  %A.addr = alloca %struct.StructA*, align 8
+  %count.addr = alloca i64, align 8
+  store i32* %s, i32** %s.addr, align 8, !tbaa !0
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load i32** %s.addr, align 8, !tbaa !0
+  store i32 1, i32* %0, align 4, !tbaa !6
+  %1 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f16 = getelementptr inbounds %struct.StructA* %1, i32 0, i32 0
+  store i16 4, i16* %f16, align 2, !tbaa !11
+  %2 = load i32** %s.addr, align 8, !tbaa !0
+  %3 = load i32* %2, align 4, !tbaa !6
+  ret i32 %3
+}
+
+define i32 @_Z2g3P7StructAP7StructBy(%struct.StructA* %A, %struct.StructB* %B, i64 %count) #0 {
+entry:
+; Access to &(A->f32) and &(B->a.f32).
+; CHECK: Function
+; CHECK: MayAlias:   store i32 4, i32* %f321, align 4, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; OPT: %[[RET:.*]] = load i32*
+; OPT: ret i32 %[[RET]]
+  %A.addr = alloca %struct.StructA*, align 8
+  %B.addr = alloca %struct.StructB*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !8
+  %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0
+  %a = getelementptr inbounds %struct.StructB* %1, i32 0, i32 1
+  %f321 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 1
+  store i32 4, i32* %f321, align 4, !tbaa !12
+  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f322 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1
+  %3 = load i32* %f322, align 4, !tbaa !8
+  ret i32 %3
+}
+
+define i32 @_Z2g4P7StructAP7StructBy(%struct.StructA* %A, %struct.StructB* %B, i64 %count) #0 {
+entry:
+; Access to &(A->f32) and &(B->a.f16).
+; CHECK: Function
+; CHECK: NoAlias:   store i16 4, i16* %f16, align 2, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i16 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %A.addr = alloca %struct.StructA*, align 8
+  %B.addr = alloca %struct.StructB*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !8
+  %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0
+  %a = getelementptr inbounds %struct.StructB* %1, i32 0, i32 1
+  %f16 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 0
+  store i16 4, i16* %f16, align 2, !tbaa !14
+  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1
+  %3 = load i32* %f321, align 4, !tbaa !8
+  ret i32 %3
+}
+
+define i32 @_Z2g5P7StructAP7StructBy(%struct.StructA* %A, %struct.StructB* %B, i64 %count) #0 {
+entry:
+; Access to &(A->f32) and &(B->f32).
+; CHECK: Function
+; CHECK: NoAlias:   store i32 4, i32* %f321, align 4, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %A.addr = alloca %struct.StructA*, align 8
+  %B.addr = alloca %struct.StructB*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !8
+  %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructB* %1, i32 0, i32 2
+  store i32 4, i32* %f321, align 4, !tbaa !15
+  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f322 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1
+  %3 = load i32* %f322, align 4, !tbaa !8
+  ret i32 %3
+}
+
+define i32 @_Z2g6P7StructAP7StructBy(%struct.StructA* %A, %struct.StructB* %B, i64 %count) #0 {
+entry:
+; Access to &(A->f32) and &(B->a.f32_2).
+; CHECK: Function
+; CHECK: NoAlias:   store i32 4, i32* %f32_2, align 4, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %A.addr = alloca %struct.StructA*, align 8
+  %B.addr = alloca %struct.StructB*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !8
+  %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0
+  %a = getelementptr inbounds %struct.StructB* %1, i32 0, i32 1
+  %f32_2 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 3
+  store i32 4, i32* %f32_2, align 4, !tbaa !16
+  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1
+  %3 = load i32* %f321, align 4, !tbaa !8
+  ret i32 %3
+}
+
+define i32 @_Z2g7P7StructAP7StructSy(%struct.StructA* %A, %struct.StructS* %S, i64 %count) #0 {
+entry:
+; Access to &(A->f32) and &(S->f32).
+; CHECK: Function
+; CHECK: NoAlias:   store i32 4, i32* %f321, align 4, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %A.addr = alloca %struct.StructA*, align 8
+  %S.addr = alloca %struct.StructS*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !8
+  %1 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructS* %1, i32 0, i32 1
+  store i32 4, i32* %f321, align 4, !tbaa !17
+  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f322 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1
+  %3 = load i32* %f322, align 4, !tbaa !8
+  ret i32 %3
+}
+
+define i32 @_Z2g8P7StructAP7StructSy(%struct.StructA* %A, %struct.StructS* %S, i64 %count) #0 {
+entry:
+; Access to &(A->f32) and &(S->f16).
+; CHECK: Function
+; CHECK: NoAlias:   store i16 4, i16* %f16, align 2, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i16 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %A.addr = alloca %struct.StructA*, align 8
+  %S.addr = alloca %struct.StructS*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
+  store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructA* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !8
+  %1 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %f16 = getelementptr inbounds %struct.StructS* %1, i32 0, i32 0
+  store i16 4, i16* %f16, align 2, !tbaa !19
+  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructA* %2, i32 0, i32 1
+  %3 = load i32* %f321, align 4, !tbaa !8
+  ret i32 %3
+}
+
+define i32 @_Z2g9P7StructSP8StructS2y(%struct.StructS* %S, %struct.StructS2* %S2, i64 %count) #0 {
+entry:
+; Access to &(S->f32) and &(S2->f32).
+; CHECK: Function
+; CHECK: NoAlias:   store i32 4, i32* %f321, align 4, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %S.addr = alloca %struct.StructS*, align 8
+  %S2.addr = alloca %struct.StructS2*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0
+  store %struct.StructS2* %S2, %struct.StructS2** %S2.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructS* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !17
+  %1 = load %struct.StructS2** %S2.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructS2* %1, i32 0, i32 1
+  store i32 4, i32* %f321, align 4, !tbaa !20
+  %2 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %f322 = getelementptr inbounds %struct.StructS* %2, i32 0, i32 1
+  %3 = load i32* %f322, align 4, !tbaa !17
+  ret i32 %3
+}
+
+define i32 @_Z3g10P7StructSP8StructS2y(%struct.StructS* %S, %struct.StructS2* %S2, i64 %count) #0 {
+entry:
+; Access to &(S->f32) and &(S2->f16).
+; CHECK: Function
+; CHECK: NoAlias:   store i16 4, i16* %f16, align 2, !tbaa !10 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i16 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %S.addr = alloca %struct.StructS*, align 8
+  %S2.addr = alloca %struct.StructS2*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0
+  store %struct.StructS2* %S2, %struct.StructS2** %S2.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %f32 = getelementptr inbounds %struct.StructS* %0, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !17
+  %1 = load %struct.StructS2** %S2.addr, align 8, !tbaa !0
+  %f16 = getelementptr inbounds %struct.StructS2* %1, i32 0, i32 0
+  store i16 4, i16* %f16, align 2, !tbaa !22
+  %2 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %f321 = getelementptr inbounds %struct.StructS* %2, i32 0, i32 1
+  %3 = load i32* %f321, align 4, !tbaa !17
+  ret i32 %3
+}
+
+define i32 @_Z3g11P7StructCP7StructDy(%struct.StructC* %C, %struct.StructD* %D, i64 %count) #0 {
+entry:
+; Access to &(C->b.a.f32) and &(D->b.a.f32).
+; CHECK: Function
+; CHECK: NoAlias:   store i32 4, i32* %f323, align 4, !tbaa !12 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; Remove a load and propogate the value from store.
+; OPT: ret i32 1
+  %C.addr = alloca %struct.StructC*, align 8
+  %D.addr = alloca %struct.StructD*, align 8
+  %count.addr = alloca i64, align 8
+  store %struct.StructC* %C, %struct.StructC** %C.addr, align 8, !tbaa !0
+  store %struct.StructD* %D, %struct.StructD** %D.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructC** %C.addr, align 8, !tbaa !0
+  %b = getelementptr inbounds %struct.StructC* %0, i32 0, i32 1
+  %a = getelementptr inbounds %struct.StructB* %b, i32 0, i32 1
+  %f32 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !23
+  %1 = load %struct.StructD** %D.addr, align 8, !tbaa !0
+  %b1 = getelementptr inbounds %struct.StructD* %1, i32 0, i32 1
+  %a2 = getelementptr inbounds %struct.StructB* %b1, i32 0, i32 1
+  %f323 = getelementptr inbounds %struct.StructA* %a2, i32 0, i32 1
+  store i32 4, i32* %f323, align 4, !tbaa !25
+  %2 = load %struct.StructC** %C.addr, align 8, !tbaa !0
+  %b4 = getelementptr inbounds %struct.StructC* %2, i32 0, i32 1
+  %a5 = getelementptr inbounds %struct.StructB* %b4, i32 0, i32 1
+  %f326 = getelementptr inbounds %struct.StructA* %a5, i32 0, i32 1
+  %3 = load i32* %f326, align 4, !tbaa !23
+  ret i32 %3
+}
+
+define i32 @_Z3g12P7StructCP7StructDy(%struct.StructC* %C, %struct.StructD* %D, i64 %count) #0 {
+entry:
+; Access to &(b1->a.f32) and &(b2->a.f32).
+; CHECK: Function
+; CHECK: MayAlias:   store i32 4, i32* %f325, align 4, !tbaa !6 <->   store i32 1, i32* %f32, align 4, !tbaa !6
+; OPT: define
+; OPT: store i32 1
+; OPT: store i32 4
+; OPT: %[[RET:.*]] = load i32*
+; OPT: ret i32 %[[RET]]
+  %C.addr = alloca %struct.StructC*, align 8
+  %D.addr = alloca %struct.StructD*, align 8
+  %count.addr = alloca i64, align 8
+  %b1 = alloca %struct.StructB*, align 8
+  %b2 = alloca %struct.StructB*, align 8
+  store %struct.StructC* %C, %struct.StructC** %C.addr, align 8, !tbaa !0
+  store %struct.StructD* %D, %struct.StructD** %D.addr, align 8, !tbaa !0
+  store i64 %count, i64* %count.addr, align 8, !tbaa !4
+  %0 = load %struct.StructC** %C.addr, align 8, !tbaa !0
+  %b = getelementptr inbounds %struct.StructC* %0, i32 0, i32 1
+  store %struct.StructB* %b, %struct.StructB** %b1, align 8, !tbaa !0
+  %1 = load %struct.StructD** %D.addr, align 8, !tbaa !0
+  %b3 = getelementptr inbounds %struct.StructD* %1, i32 0, i32 1
+  store %struct.StructB* %b3, %struct.StructB** %b2, align 8, !tbaa !0
+  %2 = load %struct.StructB** %b1, align 8, !tbaa !0
+  %a = getelementptr inbounds %struct.StructB* %2, i32 0, i32 1
+  %f32 = getelementptr inbounds %struct.StructA* %a, i32 0, i32 1
+  store i32 1, i32* %f32, align 4, !tbaa !12
+  %3 = load %struct.StructB** %b2, align 8, !tbaa !0
+  %a4 = getelementptr inbounds %struct.StructB* %3, i32 0, i32 1
+  %f325 = getelementptr inbounds %struct.StructA* %a4, i32 0, i32 1
+  store i32 4, i32* %f325, align 4, !tbaa !12
+  %4 = load %struct.StructB** %b1, align 8, !tbaa !0
+  %a6 = getelementptr inbounds %struct.StructB* %4, i32 0, i32 1
+  %f327 = getelementptr inbounds %struct.StructA* %a6, i32 0, i32 1
+  %5 = load i32* %f327, align 4, !tbaa !12
+  ret i32 %5
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !1, metadata !1, i64 0}
+!1 = metadata !{metadata !"any pointer", metadata !2}
+!2 = metadata !{metadata !"omnipotent char", metadata !3}
+!3 = metadata !{metadata !"Simple C/C++ TBAA"}
+!4 = metadata !{metadata !5, metadata !5, i64 0}
+!5 = metadata !{metadata !"long long", metadata !2}
+!6 = metadata !{metadata !7, metadata !7, i64 0}
+!7 = metadata !{metadata !"int", metadata !2}
+!8 = metadata !{metadata !9, metadata !7, i64 4}
+!9 = metadata !{metadata !"_ZTS7StructA", metadata !10, i64 0, metadata !7, i64 4, metadata !10, i64 8, metadata !7, i64 12}
+!10 = metadata !{metadata !"short", metadata !2}
+!11 = metadata !{metadata !9, metadata !10, i64 0}
+!12 = metadata !{metadata !13, metadata !7, i64 8}
+!13 = metadata !{metadata !"_ZTS7StructB", metadata !10, i64 0, metadata !9, i64 4, metadata !7, i64 20}
+!14 = metadata !{metadata !13, metadata !10, i64 4}
+!15 = metadata !{metadata !13, metadata !7, i64 20}
+!16 = metadata !{metadata !13, metadata !7, i64 16}
+!17 = metadata !{metadata !18, metadata !7, i64 4}
+!18 = metadata !{metadata !"_ZTS7StructS", metadata !10, i64 0, metadata !7, i64 4}
+!19 = metadata !{metadata !18, metadata !10, i64 0}
+!20 = metadata !{metadata !21, metadata !7, i64 4}
+!21 = metadata !{metadata !"_ZTS8StructS2", metadata !10, i64 0, metadata !7, i64 4}
+!22 = metadata !{metadata !21, metadata !10, i64 0}
+!23 = metadata !{metadata !24, metadata !7, i64 12}
+!24 = metadata !{metadata !"_ZTS7StructC", metadata !10, i64 0, metadata !13, i64 4, metadata !7, i64 28}
+!25 = metadata !{metadata !26, metadata !7, i64 12}
+!26 = metadata !{metadata !"_ZTS7StructD", metadata !10, i64 0, metadata !13, i64 4, metadata !7, i64 28, metadata !2, i64 32}
diff --git a/test/CodeGen/AArch64/adrp-relocation.ll b/test/CodeGen/AArch64/adrp-relocation.ll
index c33b442..cf41116 100644
--- a/test/CodeGen/AArch64/adrp-relocation.ll
+++ b/test/CodeGen/AArch64/adrp-relocation.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -filetype=obj < %s | elf-dump | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -filetype=obj < %s | llvm-readobj -s -r | FileCheck %s
 
 define i64 @testfn() nounwind {
 entry:
@@ -19,17 +19,9 @@ entry:
 ; relative offsets of testfn and foo) because its value depends on where this
 ; object file's .text section gets relocated in memory.
 
-; CHECK: .rela.text
-
-; CHECK: # Relocation 0
-; CHECK-NEXT: (('r_offset', 0x0000000000000010)
-; CHECK-NEXT:  ('r_sym', 0x00000007)
-; CHECK-NEXT:  ('r_type', 0x00000113)
-; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-; CHECK-NEXT: ),
-; CHECK-NEXT:  Relocation 1
-; CHECK-NEXT: (('r_offset', 0x0000000000000014)
-; CHECK-NEXT:  ('r_sym', 0x00000007)
-; CHECK-NEXT:  ('r_type', 0x00000115)
-; CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-; CHECK-NEXT: ),
+; CHECK:      Relocations [
+; CHECK-NEXT:   Section (1) .text {
+; CHECK-NEXT:     0x10 R_AARCH64_ADR_PREL_PG_HI21 testfn 0x0
+; CHECK-NEXT:     0x14 R_AARCH64_ADD_ABS_LO12_NC testfn 0x0
+; CHECK-NEXT:   }
+; CHECK-NEXT: ]
diff --git a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
index 3c03e47..9888a74 100644
--- a/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
+++ b/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
 
 define i32 @foo(i32* %var, i1 %cond) {
 ; CHECK: foo:
@@ -9,7 +9,9 @@ simple_ver:
   store i32 %newval, i32* %var
   br label %somewhere
 atomic_ver:
-  %val = atomicrmw add i32* %var, i32 -1 seq_cst
+  fence seq_cst
+  %val = atomicrmw add i32* %var, i32 -1 monotonic
+  fence seq_cst
   br label %somewhere
 ; CHECK: dmb
 ; CHECK: ldxr
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index f3c1617..5e87f21 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -8,18 +8,18 @@
 define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_add_i8:
    %old = atomicrmw add i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -27,19 +27,19 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_add_i16:
-   %old = atomicrmw add i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw add i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -47,8 +47,8 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_add_i32:
-   %old = atomicrmw add i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw add i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
@@ -57,9 +57,9 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -67,8 +67,8 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_add_i64:
-   %old = atomicrmw add i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw add i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
@@ -79,7 +79,7 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 ; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -87,8 +87,8 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_sub_i8:
-   %old = atomicrmw sub i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw sub i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
@@ -99,7 +99,7 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -107,8 +107,8 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_sub_i16:
-   %old = atomicrmw sub i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw sub i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
@@ -117,9 +117,9 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -127,19 +127,19 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_sub_i32:
-   %old = atomicrmw sub i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw sub i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -148,18 +148,18 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
 define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_sub_i64:
    %old = atomicrmw sub i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -167,8 +167,8 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_and_i8:
-   %old = atomicrmw and i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw and i8* @var8, i8 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
@@ -177,9 +177,9 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -187,8 +187,8 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_and_i16:
-   %old = atomicrmw and i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw and i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
@@ -199,7 +199,7 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -208,18 +208,18 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
 define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_and_i32:
    %old = atomicrmw and i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -227,19 +227,19 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_and_i64:
-   %old = atomicrmw and i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw and i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -248,18 +248,18 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
 define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_or_i8:
    %old = atomicrmw or i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -267,8 +267,8 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_or_i16:
-   %old = atomicrmw or i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw or i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
@@ -279,7 +279,7 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -287,19 +287,19 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_or_i32:
-   %old = atomicrmw or i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw or i32* @var32, i32 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -307,8 +307,8 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_or_i64:
-   %old = atomicrmw or i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw or i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
@@ -317,9 +317,9 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -327,19 +327,19 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_xor_i8:
-   %old = atomicrmw xor i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw xor i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -347,8 +347,8 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_xor_i16:
-   %old = atomicrmw xor i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw xor i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
@@ -357,9 +357,9 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -368,18 +368,18 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
 define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_xor_i32:
    %old = atomicrmw xor i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -387,8 +387,8 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_xor_i64:
-   %old = atomicrmw xor i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw xor i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
@@ -399,7 +399,7 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 ; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -407,8 +407,8 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_xchg_i8:
-   %old = atomicrmw xchg i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw xchg i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
@@ -418,7 +418,7 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
   ; function there.
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -427,17 +427,17 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_xchg_i16:
    %old = atomicrmw xchg i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
-; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -445,8 +445,8 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_xchg_i32:
-   %old = atomicrmw xchg i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw xchg i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
@@ -454,9 +454,9 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -464,18 +464,18 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_xchg_i64:
-   %old = atomicrmw xchg i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw xchg i64* @var64, i64 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -484,20 +484,20 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_min_i8:
-   %old = atomicrmw min i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw min i8* @var8, i8 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -505,8 +505,8 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_min_i16:
-   %old = atomicrmw min i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw min i16* @var16, i16 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
@@ -516,9 +516,9 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
-; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -526,8 +526,8 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_min_i32:
-   %old = atomicrmw min i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw min i32* @var32, i32 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
@@ -539,7 +539,7 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -548,19 +548,19 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
 define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_min_i64:
    %old = atomicrmw min i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -569,19 +569,19 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
 define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_max_i8:
    %old = atomicrmw max i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
-; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -589,20 +589,20 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_max_i16:
-   %old = atomicrmw max i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw max i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], sxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -610,8 +610,8 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
 
 define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_max_i32:
-   %old = atomicrmw max i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw max i32* @var32, i32 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
@@ -621,9 +621,9 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -631,8 +631,8 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_max_i64:
-   %old = atomicrmw max i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw max i64* @var64, i64 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
@@ -644,7 +644,7 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt
 ; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -652,8 +652,8 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_umin_i8:
-   %old = atomicrmw umin i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw umin i8* @var8, i8 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
@@ -665,7 +665,7 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
 ; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -673,20 +673,20 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_umin_i16:
-   %old = atomicrmw umin i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw umin i16* @var16, i16 %offset acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], uxth
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -695,19 +695,19 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_umin_i32:
    %old = atomicrmw umin i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -715,20 +715,20 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_umin_i64:
-   %old = atomicrmw umin i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw umin i64* @var64, i64 %offset acq_rel
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr x[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
   ; x0 below is a reasonable guess but could change: it certainly comes into the
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -736,20 +736,20 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 ; CHECK: test_atomic_load_umax_i8:
-   %old = atomicrmw umax i8* @var8, i8 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw umax i8* @var8, i8 %offset acq_rel
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
-; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -757,8 +757,8 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 
 define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; CHECK: test_atomic_load_umax_i16:
-   %old = atomicrmw umax i16* @var16, i16 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw umax i16* @var16, i16 %offset monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
@@ -770,7 +770,7 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
 ; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -779,19 +779,19 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
 ; CHECK: test_atomic_load_umax_i32:
    %old = atomicrmw umax i32* @var32, i32 %offset seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
 ; CHECK: .LBB{{[0-9]+}}_1:
-; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxr w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w0, w[[OLD]]
 ; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -799,8 +799,8 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
 
 define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 ; CHECK: test_atomic_load_umax_i64:
-   %old = atomicrmw umax i64* @var64, i64 %offset seq_cst
-; CHECK: dmb ish
+   %old = atomicrmw umax i64* @var64, i64 %offset release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
@@ -810,9 +810,9 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
   ; function there.
 ; CHECK-NEXT: cmp x0, x[[OLD]]
 ; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo
-; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -820,13 +820,13 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 
 define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK: test_atomic_cmpxchg_i8:
-   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new seq_cst
-; CHECK: dmb ish
+   %old = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
 
 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w[[OLD]], w0
@@ -834,7 +834,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
   ; As above, w1 is a reasonable guess.
 ; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i8 %old
@@ -843,20 +843,20 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK: test_atomic_cmpxchg_i16:
    %old = cmpxchg i16* @var16, i16 %wanted, i16 %new seq_cst
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var16
 
 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
+; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
   ; w0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
-; CHECK: stxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i16 %old
@@ -864,8 +864,8 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 
 define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK: test_atomic_cmpxchg_i32:
-   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new seq_cst
-; CHECK: dmb ish
+   %old = cmpxchg i32* @var32, i32 %wanted, i32 %new release
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var32
 
@@ -876,9 +876,9 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-NEXT: cmp w[[OLD]], w0
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
   ; As above, w1 is a reasonable guess.
-; CHECK: stxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
+; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i32 %old
@@ -886,8 +886,8 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 
 define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK: test_atomic_cmpxchg_i64:
-   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new seq_cst
-; CHECK: dmb ish
+   %old = cmpxchg i64* @var64, i64 %wanted, i64 %new monotonic
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var64
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var64
 
@@ -900,7 +900,7 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
   ; As above, w1 is a reasonable guess.
 ; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
 ; CHECK: mov x0, x[[OLD]]
    ret i64 %old
@@ -933,19 +933,26 @@ define i8 @test_atomic_load_monotonic_regoff_i8(i64 %base, i64 %off) nounwind {
 define i8 @test_atomic_load_acquire_i8() nounwind {
 ; CHECK: test_atomic_load_acquire_i8:
   %val = load atomic i8* @var8 acquire, align 1
+; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
+; CHECK-NOT: dmb
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], #:lo12:var8
-
+; CHECK-NOT: dmb
 ; CHECK: ldarb w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
   ret i8 %val
 }
 
 define i8 @test_atomic_load_seq_cst_i8() nounwind {
 ; CHECK: test_atomic_load_seq_cst_i8:
   %val = load atomic i8* @var8 seq_cst, align 1
-; CHECK: adrp x[[HIADDR:[0-9]+]], var8
-; CHECK: ldrb w0, [x[[HIADDR]], #:lo12:var8]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
+; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK-NOT: dmb
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK-NOT: dmb
+; CHECK: ldarb w0, [x[[ADDR]]]
+; CHECK-NOT: dmb
   ret i8 %val
 }
 
@@ -954,6 +961,7 @@ define i16 @test_atomic_load_monotonic_i16() nounwind {
   %val = load atomic i16* @var16 monotonic, align 2
 ; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var16
+; CHECK-NOT: dmb
 ; CHECK: ldrh w0, [x[[HIADDR]], #:lo12:var16]
 ; CHECK-NOT: dmb
 
@@ -976,9 +984,13 @@ define i32 @test_atomic_load_monotonic_regoff_i32(i64 %base, i64 %off) nounwind
 define i64 @test_atomic_load_seq_cst_i64() nounwind {
 ; CHECK: test_atomic_load_seq_cst_i64:
   %val = load atomic i64* @var64 seq_cst, align 8
-; CHECK: adrp x[[HIADDR:[0-9]+]], var64
-; CHECK: ldr x0, [x[[HIADDR]], #:lo12:var64]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
+; CHECK: adrp [[HIADDR:x[0-9]+]], var64
+; CHECK-NOT: dmb
+; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64
+; CHECK-NOT: dmb
+; CHECK: ldar x0, [x[[ADDR]]]
+; CHECK-NOT: dmb
   ret i64 %val
 }
 
@@ -1005,20 +1017,26 @@ define void @test_atomic_store_monotonic_regoff_i8(i64 %base, i64 %off, i8 %val)
 define void @test_atomic_store_release_i8(i8 %val) nounwind {
 ; CHECK: test_atomic_store_release_i8:
   store atomic i8 %val, i8* @var8 release, align 1
+; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK-NOT: dmb
 ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK-NOT: dmb
 ; CHECK: stlrb w0, [x[[ADDR]]]
-
+; CHECK-NOT: dmb
   ret void
 }
 
 define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
 ; CHECK: test_atomic_store_seq_cst_i8:
   store atomic i8 %val, i8* @var8 seq_cst, align 1
+; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
+; CHECK-NOT: dmb
 ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var8
+; CHECK-NOT: dmb
 ; CHECK: stlrb w0, [x[[ADDR]]]
-; CHECK: dmb ish
+; CHECK-NOT: dmb
 
   ret void
 }
@@ -1026,9 +1044,11 @@ define void @test_atomic_store_seq_cst_i8(i8 %val) nounwind {
 define void @test_atomic_store_monotonic_i16(i16 %val) nounwind {
 ; CHECK: test_atomic_store_monotonic_i16:
   store atomic i16 %val, i16* @var16 monotonic, align 2
+; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var16
+; CHECK-NOT: dmb
 ; CHECK: strh w0, [x[[HIADDR]], #:lo12:var16]
-
+; CHECK-NOT: dmb
   ret void
 }
 
@@ -1039,7 +1059,9 @@ define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %va
   %addr = inttoptr i64 %addr_int to i32*
 
   store atomic i32 %val, i32* %addr monotonic, align 4
+; CHECK-NOT: dmb
 ; CHECK: str w2, [x0, x1]
+; CHECK-NOT: dmb
 
   ret void
 }
@@ -1047,9 +1069,12 @@ define void @test_atomic_store_monotonic_regoff_i32(i64 %base, i64 %off, i32 %va
 define void @test_atomic_store_release_i64(i64 %val) nounwind {
 ; CHECK: test_atomic_store_release_i64:
   store atomic i64 %val, i64* @var64 release, align 8
+; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var64
+; CHECK-NOT: dmb
 ; CHECK: add x[[ADDR:[0-9]+]], [[HIADDR]], #:lo12:var64
+; CHECK-NOT: dmb
 ; CHECK: stlr x0, [x[[ADDR]]]
-
+; CHECK-NOT: dmb
   ret void
 }
diff --git a/test/CodeGen/AArch64/blockaddress.ll b/test/CodeGen/AArch64/blockaddress.ll
index 3d0a5cf..5e85057 100644
--- a/test/CodeGen/AArch64/blockaddress.ll
+++ b/test/CodeGen/AArch64/blockaddress.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -code-model=large -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LARGE %s
 
 @addr = global i8* null
 
@@ -13,6 +14,14 @@ define void @test_blockaddress() {
 ; CHECK: ldr [[NEWDEST:x[0-9]+]]
 ; CHECK: br [[NEWDEST]]
 
+; CHECK-LARGE: movz [[ADDR_REG:x[0-9]+]], #:abs_g3:[[DEST_LBL:.Ltmp[0-9]+]]
+; CHECK-LARGE: movk [[ADDR_REG]], #:abs_g2_nc:[[DEST_LBL]]
+; CHECK-LARGE: movk [[ADDR_REG]], #:abs_g1_nc:[[DEST_LBL]]
+; CHECK-LARGE: movk [[ADDR_REG]], #:abs_g0_nc:[[DEST_LBL]]
+; CHECK-LARGE: str [[ADDR_REG]],
+; CHECK-LARGE: ldr [[NEWDEST:x[0-9]+]]
+; CHECK-LARGE: br [[NEWDEST]]
+
 block:
   ret void
 }
diff --git a/test/CodeGen/AArch64/code-model-large-abs.ll b/test/CodeGen/AArch64/code-model-large-abs.ll
new file mode 100644
index 0000000..a365568
--- /dev/null
+++ b/test/CodeGen/AArch64/code-model-large-abs.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large < %s | FileCheck %s
+
+@var8 = global i8 0
+@var16 = global i16 0
+@var32 = global i32 0
+@var64 = global i64 0
+
+define i8* @global_addr() {
+; CHECK: global_addr:
+  ret i8* @var8
+  ; The movz/movk calculation should end up returned directly in x0.
+; CHECK: movz x0, #:abs_g3:var8
+; CHECK: movk x0, #:abs_g2_nc:var8
+; CHECK: movk x0, #:abs_g1_nc:var8
+; CHECK: movk x0, #:abs_g0_nc:var8
+; CHECK-NEXT: ret
+}
+
+define i8 @global_i8() {
+; CHECK: global_i8:
+  %val = load i8* @var8
+  ret i8 %val
+; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var8
+; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var8
+; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var8
+; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var8
+; CHECK: ldrb w0, [x[[ADDR_REG]]]
+}
+
+define i16 @global_i16() {
+; CHECK: global_i16:
+  %val = load i16* @var16
+  ret i16 %val
+; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var16
+; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var16
+; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var16
+; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var16
+; CHECK: ldrh w0, [x[[ADDR_REG]]]
+}
+
+define i32 @global_i32() {
+; CHECK: global_i32:
+  %val = load i32* @var32
+  ret i32 %val
+; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var32
+; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var32
+; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var32
+; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var32
+; CHECK: ldr w0, [x[[ADDR_REG]]]
+}
+
+define i64 @global_i64() {
+; CHECK: global_i64:
+  %val = load i64* @var64
+  ret i64 %val
+; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var64
+; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var64
+; CHECK: movk x[[ADDR_REG]], #:abs_g1_nc:var64
+; CHECK: movk x[[ADDR_REG]], #:abs_g0_nc:var64
+; CHECK: ldr x0, [x[[ADDR_REG]]]
+}
diff --git a/test/CodeGen/AArch64/elf-extern.ll b/test/CodeGen/AArch64/elf-extern.ll
index ee89d8d..8bf1b2f 100644
--- a/test/CodeGen/AArch64/elf-extern.ll
+++ b/test/CodeGen/AArch64/elf-extern.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | elf-dump | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s
 
 ; External symbols are a different concept to global variables but should still
 ; get relocations and so on when used.
@@ -10,12 +10,8 @@ define i32 @check_extern() {
   ret i32 0
 }
 
-; CHECK: .rela.text
-; CHECK: ('r_sym', 0x00000009)
-; CHECK-NEXT: ('r_type', 0x0000011b)
-
-; CHECK: .symtab
-; CHECK: Symbol 9
-; CHECK-NEXT: memcpy
-
-
+; CHECK: Relocations [
+; CHECK:   Section (1) .text {
+; CHECK:     0x{{[0-9,A-F]+}} R_AARCH64_CALL26 memcpy
+; CHECK:   }
+; CHECK: ]
diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll
index 3d3d867..bc0acc2 100644
--- a/test/CodeGen/AArch64/extern-weak.ll
+++ b/test/CodeGen/AArch64/extern-weak.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -o - < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large -o - < %s | FileCheck --check-prefix=CHECK-LARGE %s
 
 declare extern_weak i32 @var()
 
@@ -11,6 +12,12 @@ define i32()* @foo() {
 
 ; CHECK: ldr x0, [{{x[0-9]+}}, #:lo12:.LCPI0_0]
 
+  ; In the large model, the usual relocations are absolute and can
+  ; materialise 0.
+; CHECK-LARGE: movz x0, #:abs_g3:var
+; CHECK-LARGE: movk x0, #:abs_g2_nc:var
+; CHECK-LARGE: movk x0, #:abs_g1_nc:var
+; CHECK-LARGE: movk x0, #:abs_g0_nc:var
 }
 
 
@@ -24,6 +31,13 @@ define i32* @bar() {
 ; CHECK: ldr [[BASE:x[0-9]+]], [{{x[0-9]+}}, #:lo12:.LCPI1_0]
 ; CHECK: add x0, [[BASE]], #20
   ret i32* %addr
+
+  ; In the large model, the usual relocations are absolute and can
+  ; materialise 0.
+; CHECK-LARGE: movz x0, #:abs_g3:arr_var
+; CHECK-LARGE: movk x0, #:abs_g2_nc:arr_var
+; CHECK-LARGE: movk x0, #:abs_g1_nc:arr_var
+; CHECK-LARGE: movk x0, #:abs_g0_nc:arr_var
 }
 
 @defined_weak_var = internal unnamed_addr global i32 0
@@ -32,4 +46,9 @@ define i32* @wibble() {
   ret i32* @defined_weak_var
 ; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var
 ; CHECK: add x0, [[BASE]], #:lo12:defined_weak_var
+
+; CHECK-LARGE: movz x0, #:abs_g3:defined_weak_var
+; CHECK-LARGE: movk x0, #:abs_g2_nc:defined_weak_var
+; CHECK-LARGE: movk x0, #:abs_g1_nc:defined_weak_var
+; CHECK-LARGE: movk x0, #:abs_g0_nc:defined_weak_var
 }
 \ No newline at end of file
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
index dcf9f4e..3c7f5f9 100644
--- a/test/CodeGen/AArch64/jump-table.ll
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | elf-dump | FileCheck %s -check-prefix=CHECK-ELF
+; RUN: llc -code-model=large -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK-LARGE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -filetype=obj | llvm-readobj -r | FileCheck %s -check-prefix=CHECK-ELF
 
 define i32 @test_jumptable(i32 %in) {
 ; CHECK: test_jumptable
@@ -15,6 +16,13 @@ define i32 @test_jumptable(i32 %in) {
 ; CHECK: ldr [[DEST:x[0-9]+]], [x[[JT]], {{x[0-9]+}}, lsl #3]
 ; CHECK: br [[DEST]]
 
+; CHECK-LARGE: movz x[[JTADDR:[0-9]+]], #:abs_g3:.LJTI0_0
+; CHECK-LARGE: movk x[[JTADDR]], #:abs_g2_nc:.LJTI0_0
+; CHECK-LARGE: movk x[[JTADDR]], #:abs_g1_nc:.LJTI0_0
+; CHECK-LARGE: movk x[[JTADDR]], #:abs_g0_nc:.LJTI0_0
+; CHECK-LARGE: ldr [[DEST:x[0-9]+]], [x[[JTADDR]], {{x[0-9]+}}, lsl #3]
+; CHECK-LARGE: br [[DEST]]
+
 def:
   ret i32 0
 
@@ -44,13 +52,15 @@ lbl4:
 ; ELF tests:
 
 ; First make sure we get a page/lo12 pair in .text to pick up the jump-table
-; CHECK-ELF: .rela.text
-; CHECK-ELF: ('r_sym', 0x00000008)
-; CHECK-ELF-NEXT: ('r_type', 0x00000113)
-; CHECK-ELF: ('r_sym', 0x00000008)
-; CHECK-ELF-NEXT: ('r_type', 0x00000115)
+
+; CHECK-ELF:      Relocations [
+; CHECK-ELF:        Section ({{[0-9]+}}) .text {
+; CHECK-ELF-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 .rodata
+; CHECK-ELF-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ADD_ABS_LO12_NC .rodata
+; CHECK-ELF:        }
 
 ; Also check the targets in .rodata are relocated
-; CHECK-ELF: .rela.rodata
-; CHECK-ELF: ('r_sym', 0x00000005)
-; CHECK-ELF-NEXT: ('r_type', 0x00000101)
-\ No newline at end of file
+; CHECK-ELF:        Section ({{[0-9]+}}) .rodata {
+; CHECK-ELF-NEXT:     0x{{[0-9,A-F]+}} R_AARCH64_ABS64 .text
+; CHECK-ELF:        }
+; CHECK-ELF:      ]
diff --git a/test/CodeGen/AArch64/literal_pools.ll b/test/CodeGen/AArch64/literal_pools.ll
index e090841..9cfa8c5 100644
--- a/test/CodeGen/AArch64/literal_pools.ll
+++ b/test/CodeGen/AArch64/literal_pools.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK-LARGE %s
 
 @var32 = global i32 0
 @var64 = global i64 0
@@ -13,21 +14,45 @@ define void @foo() {
 ; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
 ; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
 
+; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
+; CHECK-LARGE: ldr {{w[0-9]+}}, [x[[LITADDR]]]
+
     %val64_lit32 = and i64 %val64, 305402420
     store volatile i64 %val64_lit32, i64* @var64
 ; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
 ; CHECK: ldr {{w[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
 
+; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
+; CHECK-LARGE: ldr {{w[0-9]+}}, [x[[LITADDR]]]
+
     %val64_lit32signed = and i64 %val64, -12345678
     store volatile i64 %val64_lit32signed, i64* @var64
 ; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
 ; CHECK: ldrsw {{x[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
 
+; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
+; CHECK-LARGE: ldrsw {{x[0-9]+}}, [x[[LITADDR]]]
+
     %val64_lit64 = and i64 %val64, 1234567898765432
     store volatile i64 %val64_lit64, i64* @var64
 ; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI0_[0-9]+]]
 ; CHECK: ldr {{x[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
 
+; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI0_[0-9]+]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
+; CHECK-LARGE: ldr {{x[0-9]+}}, [x[[LITADDR]]]
+
     ret void
 }
 
@@ -42,6 +67,14 @@ define void @floating_lits() {
 ; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
 ; CHECK: ldr {{s[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
 ; CHECK: fadd
+
+; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI1_[0-9]+]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
+; CHECK-LARGE: ldr {{s[0-9]+}}, [x[[LITADDR]]]
+; CHECK-LARGE: fadd
+
   store float %newfloat, float* @varfloat
 
   %doubleval = load double* @vardouble
@@ -49,6 +82,13 @@ define void @floating_lits() {
 ; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI1_[0-9]+]]
 ; CHECK: ldr {{d[0-9]+}}, [x[[LITBASE]], #:lo12:[[CURLIT]]]
 ; CHECK: fadd
+
+; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g3:[[CURLIT:.LCPI1_[0-9]+]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
+; CHECK-LARGE: movk x[[LITADDR]], #:abs_g0_nc:[[CURLIT]]
+; CHECK-LARGE: ldr {{d[0-9]+}}, [x[[LITADDR]]]
+
   store double %newdouble, double* @vardouble
 
   ret void
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 91a9903..112512f 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -79,7 +79,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
 !2 = metadata !{i32 786473, metadata !48} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47,  metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
 !5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
 !6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
diff --git a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
index 36d1575..b253fef 100644
--- a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
+++ b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
@@ -1,36 +1,47 @@
 ; RUN: llc  %s -mtriple=arm-linux-gnueabi -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=BASIC %s 
+; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=BASIC %s 
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -march=arm -mcpu=cortex-a8 \
 ; RUN:    -mattr=-neon,-vfp3,+vfp2 \
 ; RUN:    -arm-reserve-r9 -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=CORTEXA8 %s
+; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=CORTEXA8 %s
 
 
 ; This tests that the extpected ARM attributes are emitted.
 ;
-; BASIC:        .ARM.attributes
-; BASIC-NEXT:         0x70000003
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         0x0000003c
-; BASIC-NEXT:         0x00000022
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         0x00000001
-; BASIC-NEXT:         0x00000000
-; BASIC-NEXT:         '41210000 00616561 62690001 17000000 060a0741 08010902 14011501 17031801 1901'
+; BASIC:        Section {
+; BASIC:          Name: .ARM.attributes
+; BASIC-NEXT:     Type: SHT_ARM_ATTRIBUTES
+; BASIC-NEXT:     Flags [ (0x0)
+; BASIC-NEXT:     ]
+; BASIC-NEXT:     Address: 0x0
+; BASIC-NEXT:     Offset: 0x3C
+; BASIC-NEXT:     Size: 34
+; BASIC-NEXT:     Link: 0
+; BASIC-NEXT:     Info: 0
+; BASIC-NEXT:     AddressAlignment: 1
+; BASIC-NEXT:     EntrySize: 0
+; BASIC-NEXT:     SectionData (
+; BASIC-NEXT:       0000: 41210000 00616561 62690001 17000000
+; BASIC-NEXT:       0010: 060A0741 08010902 14011501 17031801
+; BASIC-NEXT:       0020: 1901
+; BASIC-NEXT:     )
 
-; CORTEXA8:        .ARM.attributes
-; CORTEXA8-NEXT:         0x70000003
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         0x0000003c
-; CORTEXA8-NEXT:         0x0000002f
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         0x00000001
-; CORTEXA8-NEXT:         0x00000000
-; CORTEXA8-NEXT:         '412e0000 00616561 62690001 24000000 05434f52 5445582d 41380006 0a074108 0109020a 02140115 01170318 011901'
+; CORTEXA8:        Name: .ARM.attributes
+; CORTEXA8-NEXT:     Type: SHT_ARM_ATTRIBUTES
+; CORTEXA8-NEXT:     Flags [ (0x0)
+; CORTEXA8-NEXT:     ]
+; CORTEXA8-NEXT:     Address: 0x0
+; CORTEXA8-NEXT:     Offset: 0x3C
+; CORTEXA8-NEXT:     Size: 47
+; CORTEXA8-NEXT:     Link: 0
+; CORTEXA8-NEXT:     Info: 0
+; CORTEXA8-NEXT:     AddressAlignment: 1
+; CORTEXA8-NEXT:     EntrySize: 0
+; CORTEXA8-NEXT:     SectionData (
+; CORTEXA8-NEXT:       0000: 412E0000 00616561 62690001 24000000
+; CORTEXA8-NEXT:       0010: 05434F52 5445582D 41380006 0A074108
+; CORTEXA8-NEXT:       0020: 0109020A 02140115 01170318 011901
+; CORTEXA8-NEXT:     )
 
 define i32 @f(i64 %z) {
        ret i32 0
diff --git a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
index 94a0541..9eecd04 100644
--- a/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
+++ b/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
@@ -1,5 +1,5 @@
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+; RUN:    llvm-readobj -s -sr -sd | FileCheck  -check-prefix=OBJ %s
 
 target triple = "armv7-none-linux-gnueabi"
 
@@ -9,32 +9,17 @@ define arm_aapcs_vfpcc i32 @barf() nounwind {
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
   ret i32 %0
-; OBJ:         '.text'
-; OBJ-NEXT:    'sh_type'
-; OBJ-NEXT:    'sh_flags'
-; OBJ-NEXT:    'sh_addr'
-; OBJ-NEXT:    'sh_offset'
-; OBJ-NEXT:    'sh_size'
-; OBJ-NEXT:    'sh_link'
-; OBJ-NEXT:    'sh_info'
-; OBJ-NEXT:    'sh_addralign'
-; OBJ-NEXT:    'sh_entsize'
-; OBJ-NEXT:    '_section_data', '00482de9 000000e3 000040e3 feffffeb 0088bde8'
-
-; OBJ:            Relocation 0
-; OBJ-NEXT:       'r_offset', 0x00000004
-; OBJ-NEXT:       'r_sym', 0x000009
-; OBJ-NEXT:        'r_type', 0x2b
-
-; OBJ:          Relocation 1
-; OBJ-NEXT:       'r_offset', 0x00000008
-; OBJ-NEXT:       'r_sym'
-; OBJ-NEXT:        'r_type', 0x2c
-
-; OBJ:          # Relocation 2
-; OBJ-NEXT:       'r_offset', 0x0000000c
-; OBJ-NEXT:       'r_sym', 0x00000a
-; OBJ-NEXT:       'r_type', 0x1c
+; OBJ:        Section {
+; OBJ:          Name: .text
+; OBJ:          Relocations [
+; OBJ-NEXT:       0x4 R_ARM_MOVW_ABS_NC a
+; OBJ-NEXT:       0x8 R_ARM_MOVT_ABS
+; OBJ-NEXT:       0xC R_ARM_CALL foo
+; OBJ-NEXT:     ]
+; OBJ-NEXT:     SectionData (
+; OBJ-NEXT:       0000: 00482DE9 000000E3 000040E3 FEFFFFEB
+; OBJ-NEXT:       0010: 0088BDE8
+; OBJ-NEXT:     )
 
 }
 
diff --git a/test/CodeGen/ARM/2010-12-08-tpsoft.ll b/test/CodeGen/ARM/2010-12-08-tpsoft.ll
index b8ed819..1351a26 100644
--- a/test/CodeGen/ARM/2010-12-08-tpsoft.ll
+++ b/test/CodeGen/ARM/2010-12-08-tpsoft.ll
@@ -1,9 +1,9 @@
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
 ; RUN:    FileCheck  -check-prefix=ELFASM %s 
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=ELFOBJ %s
+; RUN:    llvm-readobj -s -sd | FileCheck  -check-prefix=ELFOBJ %s
 
-;; Make sure that bl __aeabi_read_tp is materiazlied and fixed up correctly
+;; Make sure that bl __aeabi_read_tp is materialized and fixed up correctly
 ;; in the obj case. 
 
 @i = external thread_local global i32
@@ -24,19 +24,13 @@ bb:                                               ; preds = %entry
 ; ELFASM:       	bl	__aeabi_read_tp
 
 
-; ELFOBJ:   '.text'
-; ELFOBJ-NEXT:  'sh_type'
-; ELFOBJ-NEXT:  'sh_flags'
-; ELFOBJ-NEXT:  'sh_addr'
-; ELFOBJ-NEXT:  'sh_offset'
-; ELFOBJ-NEXT:  'sh_size'
-; ELFOBJ-NEXT:  'sh_link'
-; ELFOBJ-NEXT:  'sh_info'
-; ELFOBJ-NEXT:  'sh_addralign'
-; ELFOBJ-NEXT:  'sh_entsize'
-;;;               BL __aeabi_read_tp is ---+
-;;;                                        V
-; ELFOBJ-NEXT:  00482de9 3c009fe5 00109fe7 feffffeb
+; ELFOBJ:      Sections [
+; ELFOBJ:        Section {
+; ELFOBJ:          Name: .text
+; ELFOBJ:          SectionData (
+;;;                  BL __aeabi_read_tp is ---------+
+;;;                                                 V
+; ELFOBJ-NEXT:     0000: 00482DE9 3C009FE5 00109FE7 FEFFFFEB
 
 
 bb1:                                              ; preds = %entry
diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
index 1272a25..f13bc12 100644
--- a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
+++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -1,5 +1,5 @@
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
-; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+; RUN:    llvm-readobj -s -t | FileCheck  -check-prefix=OBJ %s
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
 ; RUN:    FileCheck  -check-prefix=ASM %s
 
@@ -15,17 +15,20 @@
 ; ASM-NEXT:     .type   _MergedGlobals,%object  @ @_MergedGlobals
 
 
-
-; OBJ:          Section 4
-; OBJ-NEXT:     '.bss'
-
-; OBJ:          'array00'
-; OBJ-NEXT:     'st_value', 0x00000000
-; OBJ-NEXT:     'st_size', 0x00000050
-; OBJ-NEXT:     'st_bind', 0x0
-; OBJ-NEXT:     'st_type', 0x1
-; OBJ-NEXT:     'st_other', 0x00
-; OBJ-NEXT:     'st_shndx', 0x0004
+; OBJ:      Sections [
+; OBJ:        Section {
+; OBJ:          Index: 4
+; OBJ-NEXT:     Name: .bss
+
+; OBJ:      Symbols [
+; OBJ:        Symbol {
+; OBJ:          Name: array00
+; OBJ-NEXT:     Value: 0x0
+; OBJ-NEXT:     Size: 80
+; OBJ-NEXT:     Binding: Local
+; OBJ-NEXT:     Type: Object
+; OBJ-NEXT:     Other: 0
+; OBJ-NEXT:     Section: .bss
 
 define i32 @main(i32 %argc) nounwind {
   %1 = load i32* @sum, align 4
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 1d1b89a..98c0af3 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -79,7 +79,7 @@ entry:
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41,  metadata !41, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !5}
 !5 = metadata !{i32 786468, metadata !1, metadata !1, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index 266609b8..7a7ca8e 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -74,7 +74,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, null} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41,  metadata !41, null} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"get1", metadata !"get1", metadata !"", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
index 1b21f75..9334bf3 100644
--- a/test/CodeGen/ARM/2011-12-14-machine-sink.ll
+++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -15,13 +15,13 @@ for.cond:                                         ; preds = %for.body, %entry
 
 for.body:                                         ; preds = %for.cond
   %v.5 = select i1 undef, i32 undef, i32 0
-  %0 = load i8* undef, align 1, !tbaa !0
+  %0 = load i8* undef, align 1
   %conv88 = zext i8 %0 to i32
   %sub89 = sub nsw i32 0, %conv88
   %v.8 = select i1 undef, i32 undef, i32 %sub89
-  %1 = load i8* null, align 1, !tbaa !0
+  %1 = load i8* null, align 1
   %conv108 = zext i8 %1 to i32
-  %2 = load i8* undef, align 1, !tbaa !0
+  %2 = load i8* undef, align 1
   %conv110 = zext i8 %2 to i32
   %sub111 = sub nsw i32 %conv108, %conv110
   %cmp112 = icmp slt i32 %sub111, 0
@@ -44,6 +44,3 @@ if.end299:                                        ; preds = %for.body, %for.cond
   %s.10 = phi i32 [ %add172, %for.body ], [ 0, %for.cond ]
   ret i32 %s.10
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
index 926daaf..0f1c452 100644
--- a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
+++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
@@ -18,7 +18,7 @@ bb3:                                              ; preds = %bb4, %bb2
   br i1 %tmp, label %bb4, label %bb67
 
 bb4:                                              ; preds = %bb3
-  %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp5 = load <4 x i32>* undef, align 16
   %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
   %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
   %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
@@ -41,9 +41,9 @@ bb4:                                              ; preds = %bb3
   %tmp24 = trunc i128 %tmp23 to i64
   %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
   %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
-  %tmp27 = load float* undef, align 4, !tbaa !2
+  %tmp27 = load float* undef, align 4
   %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
-  %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp29 = load <4 x i32>* undef, align 16
   %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
   %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
   %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
@@ -52,10 +52,10 @@ bb4:                                              ; preds = %bb3
   %tmp35 = fmul <4 x float> %tmp34, undef
   %tmp36 = fmul <4 x float> %tmp35, undef
   %tmp37 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
-  %tmp38 = load float* undef, align 4, !tbaa !2
+  %tmp38 = load float* undef, align 4
   %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
   %tmp40 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
-  %tmp41 = load float* undef, align 4, !tbaa !2
+  %tmp41 = load float* undef, align 4
   %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
   %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
   %tmp44 = fmul <4 x float> %tmp33, %tmp43
@@ -64,10 +64,10 @@ bb4:                                              ; preds = %bb3
   %tmp47 = fmul <4 x float> %tmp46, %tmp36
   %tmp48 = fadd <4 x float> undef, %tmp47
   %tmp49 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
-  %tmp50 = load float* undef, align 4, !tbaa !2
+  %tmp50 = load float* undef, align 4
   %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
   %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
-  %tmp54 = load float* %tmp52, align 4, !tbaa !2
+  %tmp54 = load float* %tmp52, align 4
   %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
   %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
   %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
@@ -99,7 +99,3 @@ declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwin
 declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
 
 declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!2 = metadata !{metadata !"float", metadata !0}
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
index f1c85f1..61623ec 100644
--- a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -7,7 +7,7 @@ target triple = "armv7-none-linux-eabi"
 ; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE.
 define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 {
 bb:
-  %tmp = load <2 x float>* undef, align 8, !tbaa !0
+  %tmp = load <2 x float>* undef, align 8
   %tmp2 = extractelement <2 x float> %tmp, i32 0
   %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0
   %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1
@@ -70,6 +70,3 @@ entry:
 declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
 declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
index 5f24e42..a9e2ebb 100644
--- a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
+++ b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
@@ -56,9 +56,9 @@ bb3:                                              ; preds = %bb2
   %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer
   %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float>
   %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp42 = load <4 x float>* null, align 16, !tbaa !0
+  %tmp42 = load <4 x float>* null, align 16
   %tmp43 = fmul <4 x float> %tmp42, %tmp41
-  %tmp44 = load <4 x float>* undef, align 16, !tbaa !0
+  %tmp44 = load <4 x float>* undef, align 16
   %tmp45 = fadd <4 x float> undef, %tmp43
   %tmp46 = fadd <4 x float> undef, %tmp45
   %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64>
@@ -108,7 +108,7 @@ bb3:                                              ; preds = %bb2
   %tmp89 = fmul <4 x float> undef, %tmp88
   %tmp90 = fadd <4 x float> %tmp89, undef
   %tmp91 = fadd <4 x float> undef, %tmp90
-  store <4 x float> %tmp91, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> %tmp91, <4 x float>* undef, align 16
   unreachable
 
 bb92:                                             ; preds = %bb2
@@ -116,6 +116,3 @@ bb92:                                             ; preds = %bb2
 }
 
 declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
index 33ad187..0843fdc 100644
--- a/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
+++ b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
@@ -9,16 +9,13 @@ define arm_aapcs_vfpcc void @foo() nounwind align 2 {
 ; <label>:1                                       ; preds = %0
   %2 = shufflevector <1 x i64> zeroinitializer, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
   %3 = bitcast <2 x i64> %2 to <4 x float>
-  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
-  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
-  store <4 x float> %3, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16
+  store <4 x float> %3, <4 x float>* undef, align 16
   %4 = insertelement <4 x float> %3, float 8.000000e+00, i32 2
-  store <4 x float> %4, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> %4, <4 x float>* undef, align 16
   unreachable
 
 ; <label>:5                                       ; preds = %0
   ret void
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
index 6f50f27..089dc91 100644
--- a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
+++ b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
@@ -20,12 +20,9 @@ bb5:                                              ; preds = %bb4
   %tmp15 = shufflevector <2 x float> %tmp14, <2 x float> undef, <4 x i32> zeroinitializer
   %tmp16 = fmul <4 x float> zeroinitializer, %tmp15
   %tmp17 = fadd <4 x float> %tmp16, %arg
-  store <4 x float> %tmp17, <4 x float>* undef, align 8, !tbaa !0
+  store <4 x float> %tmp17, <4 x float>* undef, align 8
   br label %bb18
 
 bb18:                                             ; preds = %bb5, %bb4
   ret void
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
index ca0964a..a288015 100644
--- a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
+++ b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
@@ -26,18 +26,14 @@
 ; CHECK: Successors:
 define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind {
 entry:
-  store volatile i32 65540, i32* %p1, align 4, !tbaa !0
-  %0 = load volatile i32* %p2, align 4, !tbaa !0
+  store volatile i32 65540, i32* %p1, align 4
+  %0 = load volatile i32* %p2, align 4
   ret i32 %0
 }
 
 define i32 @f2(i32* nocapture %p1, i32* nocapture %p2) nounwind {
 entry:
-  store i32 65540, i32* %p1, align 4, !tbaa !0
-  %0 = load i32* %p2, align 4, !tbaa !0
+  store i32 65540, i32* %p1, align 4
+  %0 = load i32* %p2, align 4
   ret i32 %0
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
index e4ad45b..adb5c7e 100644
--- a/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
+++ b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
@@ -129,7 +129,7 @@ define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable
   %45 = fmul <4 x float> undef, undef
   %46 = fmul <4 x float> %45, %43
   %47 = fmul <4 x float> undef, %44
-  %48 = load <4 x float>* undef, align 8, !tbaa !1
+  %48 = load <4 x float>* undef, align 8
   %49 = bitcast <4 x float> %48 to <2 x i64>
   %50 = shufflevector <2 x i64> %49, <2 x i64> undef, <1 x i32> <i32 1>
   %51 = bitcast <1 x i64> %50 to <2 x float>
@@ -145,10 +145,10 @@ define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable
   %61 = fmul <4 x float> %59, %60
   %62 = fmul <4 x float> %61, <float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01>
   %63 = fadd <4 x float> %47, %62
-  store <4 x float> %46, <4 x float>* undef, align 8, !tbaa !1
+  store <4 x float> %46, <4 x float>* undef, align 8
   call arm_aapcs_vfpcc  void @bar(%0* undef, float 0.000000e+00) nounwind
   call arm_aapcs_vfpcc  void @bar(%0* undef, float 0.000000e+00) nounwind
-  store <4 x float> %63, <4 x float>* undef, align 8, !tbaa !1
+  store <4 x float> %63, <4 x float>* undef, align 8
   unreachable
 
 ; <label>:64                                      ; preds = %41, %40
@@ -170,5 +170,3 @@ define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable
 declare arm_aapcs_vfpcc void @bar(%0*, float)
 
 !0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2013-01-21-PR14992.ll b/test/CodeGen/ARM/2013-01-21-PR14992.ll
index 38b9e0e..05abded 100644
--- a/test/CodeGen/ARM/2013-01-21-PR14992.ll
+++ b/test/CodeGen/ARM/2013-01-21-PR14992.ll
@@ -6,11 +6,11 @@
 ;CHECK: foo:
 define i32 @foo(i32* %a) nounwind optsize {
 entry:
-  %0 = load i32* %a, align 4, !tbaa !0
+  %0 = load i32* %a, align 4
   %arrayidx1 = getelementptr inbounds i32* %a, i32 1
-  %1 = load i32* %arrayidx1, align 4, !tbaa !0
+  %1 = load i32* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds i32* %a, i32 2
-  %2 = load i32* %arrayidx2, align 4, !tbaa !0
+  %2 = load i32* %arrayidx2, align 4
   %add.ptr = getelementptr inbounds i32* %a, i32 3
 ;Make sure we do not have a duplicated register in the front of the reg list
 ;EXPECTED:  ldm [[BASE:r[0-9]+]]!, {[[REG:r[0-9]+]], {{r[0-9]+}},
@@ -22,7 +22,3 @@ entry:
 }
 
 declare void @bar(i32*) optsize
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
new file mode 100644
index 0000000..4a5ca9d
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
@@ -0,0 +1,73 @@
+;PR15293: ARM codegen ice - expected larger existing stack allocation
+;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s
+
+;CHECK: foo:
+;CHECK: 	sub	sp, sp, #8
+;CHECK: 	push	{r11, lr}
+;CHECK: 	str	r0, [sp, #12]
+;CHECK: 	add	r0, sp, #12
+;CHECK: 	bl	fooUseParam
+;CHECK: 	pop	{r11, lr}
+;CHECK: 	add	sp, sp, #8
+;CHECK: 	mov	pc, lr
+
+;CHECK: foo2:
+;CHECK: 	sub	sp, sp, #16
+;CHECK: 	push	{r11, lr}
+;CHECK: 	str	r0, [sp, #12]
+;CHECK: 	add	r0, sp, #12
+;CHECK: 	str	r2, [sp, #16]
+;CHECK: 	bl	fooUseParam
+;CHECK: 	add	r0, sp, #16
+;CHECK: 	bl	fooUseParam
+;CHECK: 	pop	{r11, lr}
+;CHECK: 	add	sp, sp, #16
+;CHECK: 	mov	pc, lr
+
+;CHECK: doFoo:
+;CHECK: 	push	{r11, lr}
+;CHECK: 	ldr	r0,
+;CHECK: 	ldr	r0, [r0]
+;CHECK: 	bl	foo
+;CHECK: 	pop	{r11, lr}
+;CHECK: 	mov	pc, lr
+
+
+;CHECK: doFoo2:
+;CHECK: 	push	{r11, lr}
+;CHECK: 	ldr	r0,
+;CHECK: 	mov	r1, #0
+;CHECK: 	ldr	r0, [r0]
+;CHECK: 	mov	r2, r0
+;CHECK: 	bl	foo2
+;CHECK: 	pop	{r11, lr}
+;CHECK: 	mov	pc, lr
+
+
+%artz = type { i32 }
+@static_val = constant %artz { i32 777 }
+
+declare void @fooUseParam(%artz* )
+
+define void @foo(%artz* byval %s) {
+  call void @fooUseParam(%artz* %s)
+  ret void
+}
+
+define void @foo2(%artz* byval %s, i32 %p, %artz* byval %s2) {
+  call void @fooUseParam(%artz* %s)
+  call void @fooUseParam(%artz* %s2)
+  ret void
+}
+
+
+define void @doFoo() {
+  call void @foo(%artz* byval @static_val)
+  ret void
+}
+
+define void @doFoo2() {
+  call void @foo2(%artz* byval @static_val, i32 0, %artz* byval @static_val)
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
new file mode 100644
index 0000000..38d515f
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
@@ -0,0 +1,95 @@
+;Check 5.5 Parameter Passing --> Stage C --> C.4 statement, when NSAA is not
+;equal to SP.
+;
+; Our purpose: make NSAA != SP, and only after start to use GPRs. 
+;
+;Co-Processor register candidates may be either in VFP or in stack, so after
+;all VFP are allocated, stack is used. We can use stack without GPR allocation
+;in that case, passing 9 f64 params, for example.
+;First eight params goes to d0-d7, ninth one goes to the stack.
+;Now, as 10th parameter, we pass i32, and it must go to R0.
+;
+;5.5 Parameter Passing, Stage C:
+;
+;C.2.cp If the argument is a CPRC then any co-processor registers in that class
+;that are unallocated are marked as unavailable. The NSAA is adjusted upwards
+;until it is correctly aligned for the argument and the argument is copied to
+;the memory at the adjusted NSAA. The NSAA is further incremented by the size
+;of the argument. The argument has now been allocated.
+;...
+;C.4 If the size in words of the argument is not more than r4 minus NCRN, the
+;argument is copied into core registers, starting at the NCRN. The NCRN is
+;incremented by the number of registers used. Successive registers hold the
+;parts of the argument they would hold if its value were loaded into those
+;registers from memory using an LDM instruction. The argument has now been
+;allocated.
+;
+;What is actually checked here:
+;Here we check that i32 param goes to r0.
+;
+;Current test-case was produced with command:
+;arm-linux-gnueabihf-clang -mcpu=cortex-a9 params-to-GPR.c -S -O1 -emit-llvm
+;
+;// params-to-GRP.c:
+;
+;void fooUseI32(unsigned);
+;
+;void foo(long double p0,
+;         long double p1,
+;         long double p2,
+;         long double p3,
+;         long double p4,
+;         long double p5,
+;         long double p6,
+;         long double p7,
+;         long double p8,
+;         unsigned p9) {
+;  fooUseI32(p9);
+;}
+;
+;void doFoo() {
+;  foo( 1,2,3,4,5,6,7,8,9, 43 );
+;}
+
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+;
+;CHECK:     foo:
+;CHECK-NOT:     mov r0
+;CHECK-NOT:     ldr r0
+;CHECK:         bl fooUseI32
+;CHECK:     doFoo:
+;CHECK:         movs    r0, #43
+;CHECK:         bl      foo
+
+define void @foo(double %p0, ; --> D0
+                 double %p1, ; --> D1
+		 double %p2, ; --> D2
+		 double %p3, ; --> D3
+		 double %p4, ; --> D4
+		 double %p5, ; --> D5
+		 double %p6, ; --> D6
+		 double %p7, ; --> D7
+		 double %p8, ; --> Stack
+		 i32 %p9) #0 { ; --> R0, not Stack+8
+entry:
+  tail call void @fooUseI32(i32 %p9)
+  ret void
+}
+
+declare void @fooUseI32(i32)
+
+define void @doFoo() {
+entry:
+  tail call void @foo(double 23.0, ; --> D0
+                      double 23.1, ; --> D1
+		      double 23.2, ; --> D2
+                      double 23.3, ; --> D3
+                      double 23.4, ; --> D4
+                      double 23.5, ; --> D5
+                      double 23.6, ; --> D6
+                      double 23.7, ; --> D7
+                      double 23.8, ; --> Stack
+                      i32 43)      ; --> R0, not Stack+8
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll
new file mode 100644
index 0000000..446403d
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll
@@ -0,0 +1,61 @@
+;Check 5.5 Parameter Passing --> Stage C --> C.5 statement, when NSAA is not
+;equal to SP.
+;
+; Our purpose: make NSAA != SP, and only after start to use GPRs, then pass
+;              byval parameter and check that it goes to stack only.
+;
+;Co-Processor register candidates may be either in VFP or in stack, so after
+;all VFP are allocated, stack is used. We can use stack without GPR allocation
+;in that case, passing 9 f64 params, for example.
+;First eight params goes to d0-d7, ninth one goes to the stack.
+;Now, as 10th parameter, we pass i32, and it must go to R0.
+;
+;For more information,
+;please, read 5.5 Parameter Passing, Stage C, stages C.2.cp, C.4 and C.5
+;
+;
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+%struct_t = type { i32, i32, i32, i32 }
+@static_val = constant %struct_t { i32 777, i32 888, i32 999, i32 1000 }
+declare void @fooUseStruct(%struct_t*)
+
+define void @foo2(double %p0, ; --> D0
+                  double %p1, ; --> D1
+		  double %p2, ; --> D2
+		  double %p3, ; --> D3
+		  double %p4, ; --> D4
+		  double %p5, ; --> D5
+		  double %p6, ; --> D6
+		  double %p7, ; --> D7
+		  double %p8, ; --> Stack
+		  i32 %p9,    ; --> R0
+                  %struct_t* byval %p10) ; --> Stack+8
+{
+entry:
+;CHECK:     push.w {r11, lr}
+;CHECK-NOT: stm
+;CHECK:     add r0, sp, #16
+;CHECK:     bl fooUseStruct
+  call void @fooUseStruct(%struct_t* %p10)
+
+  ret void
+}
+
+define void @doFoo2() {
+entry:
+;CHECK-NOT: ldm
+  tail call void @foo2(double 23.0, ; --> D0
+                       double 23.1, ; --> D1
+		       double 23.2, ; --> D2
+                       double 23.3, ; --> D3
+                       double 23.4, ; --> D4
+                       double 23.5, ; --> D5
+                       double 23.6, ; --> D6
+                       double 23.7, ; --> D7
+                       double 23.8, ; --> Stack
+                       i32 43,      ; --> R0, not Stack+8
+                       %struct_t* byval @static_val) ; --> Stack+8, not R1     
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll b/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
index 2561686..4599928 100644
--- a/test/CodeGen/ARM/2013-04-05-overridden-loads-PR14824.ll
+++ b/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
@@ -1,18 +1,17 @@
 ; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp | FileCheck %s
-; The test is presented by Jiangning Liu.
-;CHECK-NOT: vldmia
+; PR14824. The test is presented by Jiangning Liu. If the ld/st optimization algorithm is changed, this test case may fail.
+; Also if the machine code for ld/st optimizor is changed, this test case may fail. If so, remove this test.
 
 define void @sample_test(<8 x i64> * %secondSource, <8 x i64> * %source, <8 x i64> * %dest) nounwind {
+; CHECK: sample_test
+; CHECK-NOT: vldmia
+; CHECK: add
 entry:
+
+; Load %source
   %s0 = load <8 x i64> * %source, align 64
-  %s1 = load <8 x i64> * %secondSource, align 64
-  %s2 = bitcast <8 x i64> %s0 to i512
-  %data.i.i.48.extract.shift = lshr i512 %s2, 384
-  %data.i.i.48.extract.trunc = trunc i512 %data.i.i.48.extract.shift to i64
   %arrayidx64 = getelementptr inbounds <8 x i64> * %source, i32 6
   %s120 = load <8 x i64> * %arrayidx64, align 64
-  %arrayidx67 = getelementptr inbounds <8 x i64> * %secondSource, i32 6
-  %s121 = load <8 x i64> * %arrayidx67, align 64
   %s122 = bitcast <8 x i64> %s120 to i512
   %data.i.i677.48.extract.shift = lshr i512 %s122, 384
   %data.i.i677.48.extract.trunc = trunc i512 %data.i.i677.48.extract.shift to i64
@@ -32,6 +31,11 @@ entry:
   %s128 = insertelement <8 x i64> %s127, i64 %data.i.i677.32.extract.trunc, i32 5
   %s129 = insertelement <8 x i64> %s128, i64 %data.i.i677.16.extract.trunc, i32 6
   %s130 = insertelement <8 x i64> %s129, i64 %data.i.i677.56.extract.trunc, i32 7
+
+; Load %secondSource
+  %s1 = load <8 x i64> * %secondSource, align 64
+  %arrayidx67 = getelementptr inbounds <8 x i64> * %secondSource, i32 6
+  %s121 = load <8 x i64> * %arrayidx67, align 64
   %s131 = bitcast <8 x i64> %s121 to i512
   %data.i1.i676.48.extract.shift = lshr i512 %s131, 384
   %data.i1.i676.48.extract.trunc = trunc i512 %data.i1.i676.48.extract.shift to i64
@@ -51,34 +55,16 @@ entry:
   %s137 = insertelement <8 x i64> %s136, i64 %data.i1.i676.32.extract.trunc, i32 5
   %s138 = insertelement <8 x i64> %s137, i64 %data.i1.i676.16.extract.trunc, i32 6
   %s139 = insertelement <8 x i64> %s138, i64 %data.i1.i676.56.extract.trunc, i32 7
+
+; Operations about %Source and %secondSource
   %vecinit28.i.i699 = shufflevector <8 x i64> %s139, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef>
   %vecinit35.i.i700 = shufflevector <8 x i64> %vecinit28.i.i699, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef>
   %vecinit42.i.i701 = shufflevector <8 x i64> %vecinit35.i.i700, <8 x i64> %s139, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef>
   %vecinit49.i.i702 = shufflevector <8 x i64> %vecinit42.i.i701, <8 x i64> %s130, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
   %arrayidx72 = getelementptr inbounds <8 x i64> * %dest, i32 6
   store <8 x i64> %vecinit49.i.i702, <8 x i64> * %arrayidx72, align 64
-  %arrayidx75 = getelementptr inbounds <8 x i64> * %source, i32 7
-  %s140 = load <8 x i64> * %arrayidx75, align 64
   %arrayidx78 = getelementptr inbounds <8 x i64> * %secondSource, i32 7
   %s141 = load <8 x i64> * %arrayidx78, align 64
-  %s142 = bitcast <8 x i64> %s140 to i512
-  %data.i.i650.32.extract.shift = lshr i512 %s142, 256
-  %data.i.i650.32.extract.trunc = trunc i512 %data.i.i650.32.extract.shift to i64
-  %s143 = insertelement <8 x i64> undef, i64 %data.i.i650.32.extract.trunc, i32 0
-  %s144 = insertelement <8 x i64> %s143, i64 %data.i.i650.32.extract.trunc, i32 1
-  %data.i.i650.16.extract.shift = lshr i512 %s142, 128
-  %data.i.i650.16.extract.trunc = trunc i512 %data.i.i650.16.extract.shift to i64
-  %s145 = insertelement <8 x i64> %s144, i64 %data.i.i650.16.extract.trunc, i32 2
-  %data.i.i650.8.extract.shift = lshr i512 %s142, 64
-  %data.i.i650.8.extract.trunc = trunc i512 %data.i.i650.8.extract.shift to i64
-  %s146 = insertelement <8 x i64> %s145, i64 %data.i.i650.8.extract.trunc, i32 3
-  %s147 = insertelement <8 x i64> %s146, i64 %data.i.i650.8.extract.trunc, i32 4
-  %data.i.i650.48.extract.shift = lshr i512 %s142, 384
-  %data.i.i650.48.extract.trunc = trunc i512 %data.i.i650.48.extract.shift to i64
-  %s148 = insertelement <8 x i64> %s147, i64 %data.i.i650.48.extract.trunc, i32 5
-  %s149 = insertelement <8 x i64> %s148, i64 %data.i.i650.16.extract.trunc, i32 6
-  %data.i.i650.0.extract.trunc = trunc i512 %s142 to i64
-  %s150 = insertelement <8 x i64> %s149, i64 %data.i.i650.0.extract.trunc, i32 7
   %s151 = bitcast <8 x i64> %s141 to i512
   %data.i1.i649.32.extract.shift = lshr i512 %s151, 256
   %data.i1.i649.32.extract.trunc = trunc i512 %data.i1.i649.32.extract.shift to i64
@@ -90,21 +76,7 @@ entry:
   %data.i1.i649.8.extract.shift = lshr i512 %s151, 64
   %data.i1.i649.8.extract.trunc = trunc i512 %data.i1.i649.8.extract.shift to i64
   %s155 = insertelement <8 x i64> %s154, i64 %data.i1.i649.8.extract.trunc, i32 3
-  %s156 = insertelement <8 x i64> %s155, i64 %data.i1.i649.8.extract.trunc, i32 4
-  %data.i1.i649.48.extract.shift = lshr i512 %s151, 384
-  %data.i1.i649.48.extract.trunc = trunc i512 %data.i1.i649.48.extract.shift to i64
-  %s157 = insertelement <8 x i64> %s156, i64 %data.i1.i649.48.extract.trunc, i32 5
-  %s158 = insertelement <8 x i64> %s157, i64 %data.i1.i649.16.extract.trunc, i32 6
-  %data.i1.i649.0.extract.trunc = trunc i512 %s151 to i64
-  %s159 = insertelement <8 x i64> %s158, i64 %data.i1.i649.0.extract.trunc, i32 7
-  %vecinit7.i.i669 = shufflevector <8 x i64> %s159, <8 x i64> %s150, <8 x i32> <i32 0, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit14.i.i670 = shufflevector <8 x i64> %vecinit7.i.i669, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 10, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit21.i.i671 = shufflevector <8 x i64> %vecinit14.i.i670, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit28.i.i672 = shufflevector <8 x i64> %vecinit21.i.i671, <8 x i64> %s150, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 undef, i32 undef, i32 undef>
-  %vecinit35.i.i673 = shufflevector <8 x i64> %vecinit28.i.i672, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 undef, i32 undef>
-  %vecinit42.i.i674 = shufflevector <8 x i64> %vecinit35.i.i673, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 undef>
-  %vecinit49.i.i675 = shufflevector <8 x i64> %vecinit42.i.i674, <8 x i64> %s159, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
   %arrayidx83 = getelementptr inbounds <8 x i64> * %dest, i32 7
-  store <8 x i64> %vecinit49.i.i675, <8 x i64> * %arrayidx83, align 64
+  store <8 x i64> %s155, <8 x i64> * %arrayidx83, align 64
   ret void
 }
diff --git a/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
new file mode 100644
index 0000000..de5fd31
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
@@ -0,0 +1,28 @@
+;Check 5.5 Parameter Passing --> Stage C --> C.1.cp statement for VA functions.
+;Note: There are no VFP CPRCs in a variadic procedure.
+;Check that after %C was sent to stack, we set Next Core Register Number to R4.
+
+;This test is simplified IR version of
+;test-suite/SingleSource/UnitTests/2002-05-02-ManyArguments.c
+
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+@.str = private unnamed_addr constant [13 x i8] c"%d %d %f %i\0A\00", align 1
+
+;CHECK: printfn:
+define void @printfn(i32 %a, i16 signext %b, double %C, i8 signext %E) {
+entry:
+  %conv = sext i16 %b to i32
+  %conv1 = sext i8 %E to i32
+  %call = tail call i32 (i8*, ...)* @printf(
+	i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), ; --> R0
+        i32 %a,                                          ; --> R1
+        i32 %conv,                                       ; --> R2
+        double %C,                                       ; --> SP, NCRN := R4
+;CHECK:    str r2, [sp, #8]                                                                     
+        i32 %conv1)                                      ; --> SP+8
+  ret void
+}
+
+declare i32 @printf(i8* nocapture, ...)
+
diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
new file mode 100644
index 0000000..6db71fe
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
@@ -0,0 +1,48 @@
+;Check AAPCS, 5.5 Parameters Passing, C4 and C5 rules.
+;Check case when NSAA != 0, and NCRN < R4, NCRN+ParamSize < R4
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+%st_t = type { i32, i32 }
+@static_val = constant %st_t { i32 777, i32 888}
+
+declare void @fooUseStruct(%st_t*)
+
+define void @foo(double %vfp0,     ; --> D0,     NSAA=SP
+                 double %vfp1,     ; --> D1,     NSAA=SP
+		 double %vfp2,     ; --> D2,     NSAA=SP
+		 double %vfp3,     ; --> D3,     NSAA=SP
+		 double %vfp4,     ; --> D4,     NSAA=SP
+		 double %vfp5,     ; --> D5,     NSAA=SP
+		 double %vfp6,     ; --> D6,     NSAA=SP
+		 double %vfp7,     ; --> D7,     NSAA=SP
+		 double %vfp8,     ; --> SP,     NSAA=SP+8 (!)
+                 i32 %p0,          ; --> R0,     NSAA=SP+8 
+		 %st_t* byval %p1, ; --> R1, R2, NSAA=SP+8
+		 i32 %p2,          ; --> R3,     NSAA=SP+8 
+                 i32 %p3) #0 {     ; --> SP+4,   NSAA=SP+12
+entry:
+  ;CHECK: sub sp, #8
+  ;CHECK: push.w {r11, lr}
+  ;CHECK: add r0, sp, #16
+  ;CHECK: str r2, [sp, #20]
+  ;CHECK: str r1, [sp, #16]
+  ;CHECK: bl  fooUseStruct
+  call void @fooUseStruct(%st_t* %p1)
+  ret void
+}
+
+define void @doFoo() {
+entry:
+  call void @foo(double 23.0,
+                 double 23.1,
+                 double 23.2,
+                 double 23.3,
+                 double 23.4,
+                 double 23.5,
+                 double 23.6,
+                 double 23.7,
+                 double 23.8,
+                 i32 0, %st_t* byval @static_val, i32 1, i32 2)
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
new file mode 100644
index 0000000..212bbc2
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
@@ -0,0 +1,45 @@
+;Check AAPCS, 5.5 Parameters Passing, C4 and C5 rules.
+;Check case when NSAA != 0, and NCRN < R4, NCRN+ParamSize > R4
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+%st_t = type { i32, i32, i32, i32 }
+@static_val = constant %st_t { i32 777, i32 888, i32 787, i32 878}
+
+define void @foo(double %vfp0,     ; --> D0,              NSAA=SP
+                 double %vfp1,     ; --> D1,              NSAA=SP
+		 double %vfp2,     ; --> D2,              NSAA=SP
+		 double %vfp3,     ; --> D3,              NSAA=SP
+		 double %vfp4,     ; --> D4,              NSAA=SP
+		 double %vfp5,     ; --> D5,              NSAA=SP
+		 double %vfp6,     ; --> D6,              NSAA=SP
+		 double %vfp7,     ; --> D7,              NSAA=SP
+		 double %vfp8,     ; --> SP,              NSAA=SP+8 (!)
+                 i32 %p0,          ; --> R0,              NSAA=SP+8 
+		 %st_t* byval %p1, ; --> SP+8, 4 words    NSAA=SP+24
+		 i32 %p2) #0 {     ; --> SP+24,           NSAA=SP+24 
+                 
+entry:
+  ;CHECK:  push.w {r11, lr}
+  ;CHECK:  ldr    r0, [sp, #32]
+  ;CHECK:  bl     fooUseI32
+  call void @fooUseI32(i32 %p2)
+  ret void
+}
+
+declare void @fooUseI32(i32)
+
+define void @doFoo() {
+entry:
+  call void @foo(double 23.0,
+                 double 23.1,
+                 double 23.2,
+                 double 23.3,
+                 double 23.4,
+                 double 23.5,
+                 double 23.6,
+                 double 23.7,
+                 double 23.8,
+                 i32 0, %st_t* byval @static_val, i32 1)
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
new file mode 100644
index 0000000..abc6e0d
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+; rdar://13782395
+
+define i32 @t1(i32 %a, i32 %b, i8** %retaddr) {
+; CHECK: t1:
+; CHECK: Block address taken
+; CHECK-NOT: Address of block that was removed by CodeGen
+  store i8* blockaddress(@t1, %cond_true), i8** %retaddr
+  %tmp2 = icmp eq i32 %a, 0
+  br i1 %tmp2, label %cond_false, label %cond_true
+
+cond_true:
+  %tmp5 = add i32 %b, 1
+  ret i32 %tmp5
+
+cond_false:
+  %tmp7 = add i32 %b, -1
+  ret i32 %tmp7
+}
+
+define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d, i8** %retaddr) {
+; CHECK: t2:
+; CHECK: Block address taken
+; CHECK: %cond_true
+; CHECK: add
+; CHECK: bx lr
+  store i8* blockaddress(@t2, %cond_true), i8** %retaddr
+  %tmp2 = icmp sgt i32 %c, 10
+  %tmp5 = icmp slt i32 %d, 4
+  %tmp8 = and i1 %tmp5, %tmp2
+  %tmp13 = add i32 %b, %a
+  br i1 %tmp8, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:
+  %tmp15 = add i32 %tmp13, %c
+  %tmp1821 = sub i32 %tmp15, %d
+  ret i32 %tmp1821
+
+UnifiedReturnBlock:
+  ret i32 %tmp13
+}
+
+define hidden fastcc void @t3(i8** %retaddr) {
+; CHECK: t3:
+; CHECK: Block address taken
+; CHECK-NOT: Address of block that was removed by CodeGen
+bb:
+  store i8* blockaddress(@t3, %KBBlockZero_return_1), i8** %retaddr
+  br i1 undef, label %bb77, label %bb7.i
+
+bb7.i:                                            ; preds = %bb35
+  br label %bb2.i
+
+KBBlockZero_return_1:                             ; preds = %KBBlockZero.exit
+  unreachable
+
+KBBlockZero_return_0:                             ; preds = %KBBlockZero.exit
+  unreachable
+
+bb77:                                             ; preds = %bb26, %bb12, %bb
+  ret void
+
+bb2.i:                                            ; preds = %bb6.i350, %bb7.i
+  br i1 undef, label %bb6.i350, label %KBBlockZero.exit
+
+bb6.i350:                                         ; preds = %bb2.i
+  br label %bb2.i
+
+KBBlockZero.exit:                                 ; preds = %bb2.i
+  indirectbr i8* undef, [label %KBBlockZero_return_1, label %KBBlockZero_return_0]
+}
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index c5d00a0..c14f530 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -91,7 +91,7 @@ entry:
 ; CHECK: t4
 ; CHECK: vmrs APSR_nzcv, fpscr
 ; CHECK: if.then
-; CHECK-NOT movs
+; CHECK-NOT: movs
   %0 = load double* %q, align 4
   %cmp = fcmp olt double %0, 1.000000e+01
   %incdec.ptr1 = getelementptr inbounds i32* %p, i32 1
diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll
index 769ba55..fbc25b4 100644
--- a/test/CodeGen/ARM/commute-movcc.ll
+++ b/test/CodeGen/ARM/commute-movcc.ll
@@ -32,7 +32,7 @@ for.body:                                         ; preds = %entry, %if.end8
   %BestCost.011 = phi i32 [ -1, %entry ], [ %BestCost.1, %if.end8 ]
   %BestIdx.010 = phi i32 [ 0, %entry ], [ %BestIdx.1, %if.end8 ]
   %arrayidx = getelementptr inbounds i32* %a, i32 %i.012
-  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %0 = load i32* %arrayidx, align 4
   %mul = mul i32 %0, %0
   %sub = add nsw i32 %i.012, -5
   %cmp2 = icmp eq i32 %sub, %Pref
@@ -53,7 +53,7 @@ if.else:                                          ; preds = %for.body
 if.end8:                                          ; preds = %if.else, %if.then
   %BestIdx.1 = phi i32 [ %i.0.BestIdx.0, %if.then ], [ %BestIdx.0.i.0, %if.else ]
   %BestCost.1 = phi i32 [ %mul.BestCost.0, %if.then ], [ %BestCost.0.mul, %if.else ]
-  store i32 %mul, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %mul, i32* %arrayidx, align 4
   %inc = add i32 %i.012, 1
   %cmp = icmp eq i32 %inc, 11
   br i1 %cmp, label %for.end, label %for.body
@@ -61,7 +61,3 @@ if.end8:                                          ; preds = %if.else, %if.then
 for.end:                                          ; preds = %if.end8
   ret i32 %BestIdx.1
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/dagcombine-concatvector.ll b/test/CodeGen/ARM/dagcombine-concatvector.ll
new file mode 100644
index 0000000..e9e0fe3
--- /dev/null
+++ b/test/CodeGen/ARM/dagcombine-concatvector.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 | FileCheck %s
+
+; PR15525
+; CHECK: test1:
+; CHECK: ldr.w	[[REG:r[0-9]+]], [sp]
+; CHECK-NEXT: vmov	{{d[0-9]+}}, r1, r2
+; CHECK-NEXT: vmov	{{d[0-9]+}}, r3, [[REG]]
+; CHECK-NEXT: vst1.8	{{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
+; CHECK-NEXT: bx	lr
+define void @test1(i8* %arg, [4 x i64] %vec.coerce) {
+bb:
+  %tmp = extractvalue [4 x i64] %vec.coerce, 0
+  %tmp2 = bitcast i64 %tmp to <8 x i8>
+  %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %tmp4 = extractvalue [4 x i64] %vec.coerce, 1
+  %tmp5 = bitcast i64 %tmp4 to <8 x i8>
+  %tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> %tmp3, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  tail call void @llvm.arm.neon.vst1.v16i8(i8* %arg, <16 x i8> %tmp7, i32 2)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index 33c8e9d..c162260 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -31,7 +31,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !30, null, null} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !30, null,  null, null} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !2, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 95e6cf2..38945ac 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -40,7 +40,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index e3e4d06..e4040fa 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -60,7 +60,7 @@ declare i32 @puts(i8* nocapture) nounwind
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, null, null, metadata !42, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, null, null, metadata !42, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8}
 !5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index 038c229..1de6ffa 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -39,7 +39,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index f3af0b9..1868942 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -65,7 +65,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !47, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !47, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index ae02a24..ba83f79 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -41,7 +41,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, null, null, metadata !16, null, null} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, null, null, metadata !16, null,  null, null} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/ARM/ehabi-filters.ll b/test/CodeGen/ARM/ehabi-filters.ll
index c42839d..4c92a29 100644
--- a/test/CodeGen/ARM/ehabi-filters.ll
+++ b/test/CodeGen/ARM/ehabi-filters.ll
@@ -19,7 +19,7 @@ define i32 @main() {
 entry:
   %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind
   %0 = bitcast i8* %exception.i to i32*
-  store i32 42, i32* %0, align 4, !tbaa !0
+  store i32 42, i32* %0, align 4
   invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (i8** @_ZTIi to i8*), i8* null) noreturn
           to label %unreachable.i unwind label %lpad.i
 
@@ -71,7 +71,3 @@ declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
 declare i8* @__cxa_begin_catch(i8*)
 
 declare void @__cxa_end_catch()
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll b/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll
new file mode 100644
index 0000000..11f3e6d
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-compact-pr0.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -r - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-RELOC
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -r - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-RELOC
+
+define void @_Z4testv() {
+entry:
+  tail call void @_Z15throw_exceptionv()
+  ret void
+}
+
+declare void @_Z15throw_exceptionv()
+
+; CHECK-NOT: section .ARM.extab
+; CHECK: section .text
+; CHECK-NOT: section .ARM.extab
+; CHECK: section .ARM.exidx
+; CHECK-NEXT: 0000 00000000 80849b80
+; CHECK-NOT: section .ARM.extab
+
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+; CHECK-FP-ELIM: section .text
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+; CHECK-FP-ELIM: section .ARM.exidx
+; CHECK-FP-ELIM-NEXT: 0000 00000000 b0808480
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+
+; CHECK-RELOC: RELOCATION RECORDS FOR [.ARM.exidx]
+; CHECK-RELOC-NEXT: 0 R_ARM_PREL31 .text
+; CHECK-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr0
diff --git a/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll b/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll
new file mode 100644
index 0000000..79dba08
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-mc-compact-pr1.ll
@@ -0,0 +1,62 @@
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -r - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-RELOC
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -r - \
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM-RELOC
+
+define i32 @_Z3addiiiiiiii(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
+entry:
+  %add = add nsw i32 %b, %a
+  %add1 = add nsw i32 %add, %c
+  %add2 = add nsw i32 %add1, %d
+  tail call void @_Z15throw_exceptioni(i32 %add2)
+  %add3 = add nsw i32 %f, %e
+  %add4 = add nsw i32 %add3, %g
+  %add5 = add nsw i32 %add4, %h
+  tail call void @_Z15throw_exceptioni(i32 %add5)
+  %add6 = add nsw i32 %add5, %add2
+  ret i32 %add6
+}
+
+declare void @_Z15throw_exceptioni(i32)
+
+; CHECK-NOT: section .ARM.extab
+; CHECK: section .text
+; CHECK: section .ARM.extab
+; CHECK-NEXT: 0000 419b0181 b0b08384
+; CHECK: section .ARM.exidx
+; CHECK-NEXT: 0000 00000000 00000000
+; CHECK-NOT: section .ARM.extab
+
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+; CHECK-FP-ELIM: section .text
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+; CHECK-FP-ELIM: section .ARM.exidx
+; CHECK-FP-ELIM-NEXT: 0000 00000000 b0838480
+; CHECK-FP-ELIM-NOT: section .ARM.extab
+
+; CHECK-RELOC: RELOCATION RECORDS FOR [.ARM.exidx]
+; CHECK-RELOC-NEXT: 0 R_ARM_PREL31 .text
+; CHECK-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr1
+
+; CHECK-FP-ELIM-RELOC: RELOCATION RECORDS FOR [.ARM.exidx]
+; CHECK-FP-ELIM-RELOC-NEXT: 0 R_ARM_PREL31 .text
+; CHECK-FP-ELIM-RELOC-NEXT: 0 R_ARM_NONE __aeabi_unwind_cpp_pr0
diff --git a/test/CodeGen/ARM/ehabi-mc-section-group.ll b/test/CodeGen/ARM/ehabi-mc-section-group.ll
index 5e4b509..616aa1b 100644
--- a/test/CodeGen/ARM/ehabi-mc-section-group.ll
+++ b/test/CodeGen/ARM/ehabi-mc-section-group.ll
@@ -8,7 +8,7 @@
 ; RUN: llc -mtriple arm-unknown-linux-gnueabi \
 ; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -filetype=obj -o - %s \
-; RUN:   | elf-dump --dump-section-data \
+; RUN:   | llvm-readobj -s -sd \
 ; RUN:   | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
@@ -68,12 +68,21 @@ declare void @__cxa_end_catch()
 
 declare void @_ZSt9terminatev()
 
-; CHECK:      # Section 1
-; CHECK-NEXT: (('sh_name', 0x0000002f) # '.group'
-; CHECK:       ('_section_data', '01000000 0a000000 0c000000 0e000000')
-; CHECK:      # Section 10
-; CHECK-NEXT: (('sh_name', 0x000000e1) # '.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
-; CHECK:      # Section 12
-; CHECK-NEXT: (('sh_name', 0x000000d7) # '.ARM.extab.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
-; CHECK:      # Section 14
-; CHECK-NEXT: (('sh_name', 0x00000065) # '.ARM.exidx.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_'
+; CHECK:        Section {
+; CHECK:          Index: 1
+; CHECK-NEXT:     Name: .group (47)
+; CHECK:          SectionData (
+; CHECK-NEXT:       0000: 01000000 09000000 0B000000 0D000000
+; CHECK-NEXT:     )
+
+; CHECK:        Section {
+; CHECK:          Index: 9
+; CHECK-NEXT:     Name: .text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (214)
+
+; CHECK:        Section {
+; CHECK:          Index: 11
+; CHECK-NEXT:     Name: .ARM.extab.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (204)
+
+; CHECK:        Section {
+; CHECK:          Index: 13
+; CHECK-NEXT:     Name: .ARM.exidx.text._Z4testIidEvT_S0_S0_S0_S0_T0_S1_S1_S1_S1_ (90)
diff --git a/test/CodeGen/ARM/ehabi-mc-section.ll b/test/CodeGen/ARM/ehabi-mc-section.ll
index fc51b24..4e6e468 100644
--- a/test/CodeGen/ARM/ehabi-mc-section.ll
+++ b/test/CodeGen/ARM/ehabi-mc-section.ll
@@ -1,8 +1,14 @@
-; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
 ; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -filetype=obj -o - %s \
 ; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
 
 define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) section ".test_section" {
 entry:
@@ -54,6 +60,12 @@ declare void @_ZSt9terminatev()
 
 ; CHECK: section .test_section
 ; CHECK: section .ARM.extab.test_section
-; CHECK-NEXT: 0000 00000000 b0b0b000
+; CHECK-NEXT: 0000 00000000 c9409b01 b0818484
 ; CHECK: section .ARM.exidx.test_section
 ; CHECK-NEXT: 0000 00000000 00000000
+
+; CHECK-FP-ELIM: section .test_section
+; CHECK-FP-ELIM: section .ARM.extab.test_section
+; CHECK-FP-ELIM-NEXT: 0000 00000000 84c90501 b0b0b0a8
+; CHECK-FP-ELIM: section .ARM.exidx.test_section
+; CHECK-FP-ELIM-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/ehabi-mc-sh_link.ll b/test/CodeGen/ARM/ehabi-mc-sh_link.ll
index f90e5f3..ac0a0fc 100644
--- a/test/CodeGen/ARM/ehabi-mc-sh_link.ll
+++ b/test/CodeGen/ARM/ehabi-mc-sh_link.ll
@@ -7,7 +7,7 @@
 ; RUN: llc -mtriple arm-unknown-linux-gnueabi \
 ; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -filetype=obj -o - %s \
-; RUN:   | elf-dump --dump-section-data \
+; RUN:   | llvm-readobj -s \
 ; RUN:   | FileCheck %s
 
 define void @test1() nounwind {
@@ -20,28 +20,39 @@ entry:
   ret void
 }
 
-; CHECK: # Section 1
-; CHECK-NEXT: (('sh_name', 0x00000010) # '.text'
-
-; CHECK:      (('sh_name', 0x00000005) # '.ARM.exidx'
-; CHECK-NEXT:  ('sh_type', 0x70000001)
-; CHECK-NEXT:  ('sh_flags', 0x00000082)
-; CHECK-NEXT:  ('sh_addr', 0x00000000)
-; CHECK-NEXT:  ('sh_offset', 0x0000005c)
-; CHECK-NEXT:  ('sh_size', 0x00000008)
-; CHECK-NEXT:  ('sh_link',  0x00000001)
-; CHECK-NEXT:  ('sh_info',  0x00000000)
-; CHECK-NEXT:  ('sh_addralign',  0x00000004)
-
-; CHECK: # Section 7
-; CHECK-NEXT: (('sh_name', 0x00000039) # '.test_section'
-
-; CHECK:      (('sh_name', 0x0000002f) # '.ARM.exidx.test_section'
-; CHECK-NEXT:  ('sh_type', 0x70000001)
-; CHECK-NEXT:  ('sh_flags', 0x00000082)
-; CHECK-NEXT:  ('sh_addr', 0x00000000)
-; CHECK-NEXT:  ('sh_offset', 0x00000068)
-; CHECK-NEXT:  ('sh_size', 0x00000008)
-; CHECK-NEXT:  ('sh_link',  0x00000007)
-; CHECK-NEXT:  ('sh_info',  0x00000000)
-; CHECK-NEXT:  ('sh_addralign',  0x00000004)
+; CHECK:      Sections [
+; CHECK:        Section {
+; CHECK:          Index: 1
+; CHECK-NEXT:     Name: .text (16)
+
+; CHECK:        Section {
+; CHECK:          Name: .ARM.exidx (5)
+; CHECK-NEXT:     Type: SHT_ARM_EXIDX
+; CHECK-NEXT:     Flags [ (0x82)
+; CHECK-NEXT:       SHF_ALLOC
+; CHECK-NEXT:       SHF_LINK_ORDER
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     Address: 0x0
+; CHECK-NEXT:     Offset: 0x5C
+; CHECK-NEXT:     Size: 8
+; CHECK-NEXT:     Link: 1
+; CHECK-NEXT:     Info: 0
+; CHECK-NEXT:     AddressAlignment: 4
+
+; CHECK:        Section {
+; CHECK:          Index: 7
+; CHECK-NEXT:     Name: .test_section (57)
+
+; CHECK:        Section {
+; CHECK:          Name: .ARM.exidx.test_section (47)
+; CHECK-NEXT:     Type: SHT_ARM_EXIDX
+; CHECK-NEXT:     Flags [ (0x82)
+; CHECK-NEXT:       SHF_ALLOC
+; CHECK-NEXT:       SHF_LINK_ORDER
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     Address: 0x0
+; CHECK-NEXT:     Offset: 0x68
+; CHECK-NEXT:     Size: 8
+; CHECK-NEXT:     Link: 7
+; CHECK-NEXT:     Info: 0
+; CHECK-NEXT:     AddressAlignment: 4
diff --git a/test/CodeGen/ARM/ehabi-mc.ll b/test/CodeGen/ARM/ehabi-mc.ll
index 0dc2ef7..83b8425 100644
--- a/test/CodeGen/ARM/ehabi-mc.ll
+++ b/test/CodeGen/ARM/ehabi-mc.ll
@@ -1,8 +1,14 @@
-; RUN: llc -mtriple arm-unknown-linux-gnueabi \
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
+; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
+; RUN:     -disable-fp-elim -filetype=obj -o - %s \
+; RUN:   | llvm-objdump -s - \
+; RUN:   | FileCheck %s --check-prefix=CHECK
+
+; RUN: llc -mtriple armv7-unknown-linux-gnueabi \
 ; RUN:     -arm-enable-ehabi -arm-enable-ehabi-descriptors \
 ; RUN:     -filetype=obj -o - %s \
 ; RUN:   | llvm-objdump -s - \
-; RUN:   | FileCheck %s
+; RUN:   | FileCheck %s --check-prefix=CHECK-FP-ELIM
 
 define void @_Z4testiiiiiddddd(i32 %u1, i32 %u2, i32 %u3, i32 %u4, i32 %u5, double %v1, double %v2, double %v3, double %v4, double %v5) {
 entry:
@@ -54,6 +60,12 @@ declare void @_ZSt9terminatev()
 
 ; CHECK: section .text
 ; CHECK: section .ARM.extab
-; CHECK-NEXT: 0000 00000000 b0b0b000
+; CHECK-NEXT: 0000 00000000 c9409b01 b0818484
 ; CHECK: section .ARM.exidx
 ; CHECK-NEXT: 0000 00000000 00000000
+
+; CHECK-FP-ELIM: section .text
+; CHECK-FP-ELIM: section .ARM.extab
+; CHECK-FP-ELIM-NEXT: 0000 00000000 84c90501 b0b0b0a8
+; CHECK-FP-ELIM: section .ARM.exidx
+; CHECK-FP-ELIM-NEXT: 0000 00000000 00000000
diff --git a/test/CodeGen/ARM/gpr-paired-spill-thumbinst.ll b/test/CodeGen/ARM/gpr-paired-spill-thumbinst.ll
new file mode 100644
index 0000000..0002711
--- /dev/null
+++ b/test/CodeGen/ARM/gpr-paired-spill-thumbinst.ll
@@ -0,0 +1,30 @@
+; REQUIRES: asserts
+; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -debug -o /dev/null < %s 2>&1 | FileCheck %s
+
+; This test makes sure spills of 64-bit pairs in Thumb mode actually
+; generate thumb instructions. Previously we were inserting an ARM
+; STMIA which happened to have the same encoding.
+
+define void @foo(i64* %addr) {
+  %val1 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val2 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val3 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val4 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val5 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val6 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val7 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+
+  ; Make sure we are actually creating the Thumb versions of the spill
+  ; instructions.
+; CHECK: t2STRDi8
+; CHECK: t2LDRDi8
+
+  store volatile i64 %val1, i64* %addr
+  store volatile i64 %val2, i64* %addr
+  store volatile i64 %val3, i64* %addr
+  store volatile i64 %val4, i64* %addr
+  store volatile i64 %val5, i64* %addr
+  store volatile i64 %val6, i64* %addr
+  store volatile i64 %val7, i64* %addr
+  ret void
+}
diff --git a/test/CodeGen/ARM/gpr-paired-spill.ll b/test/CodeGen/ARM/gpr-paired-spill.ll
new file mode 100644
index 0000000..ef3e5a5
--- /dev/null
+++ b/test/CodeGen/ARM/gpr-paired-spill.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=armv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD
+; RUN: llc -mtriple=armv4-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITHOUT-LDRD
+; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD
+
+define void @foo(i64* %addr) {
+  %val1 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val2 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val3 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val4 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val5 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val6 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+  %val7 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
+
+  ; Key point is that enough 64-bit paired GPR values are live that
+  ; one of them has to be spilled. This used to cause an abort because
+  ; an LDMIA was created with both a FrameIndex and an offset, which
+  ; is not allowed.
+
+; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
+; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
+
+; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
+; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
+
+  ; We also want to ensure the register scavenger is working (i.e. an
+  ; offset from sp can be generated), so we need two spills.
+; CHECK-WITHOUT-LDRD: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
+; CHECK-WITHOUT-LDRD: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
+; CHECK-WITHOUT-LDRD: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
+
+  ; In principle LLVM may have to recalculate the offset. At the moment
+  ; it reuses the original though.
+; CHECK-WITHOUT-LDRD: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
+; CHECK-WITHOUT-LDRD: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
+
+  store volatile i64 %val1, i64* %addr
+  store volatile i64 %val2, i64* %addr
+  store volatile i64 %val3, i64* %addr
+  store volatile i64 %val4, i64* %addr
+  store volatile i64 %val5, i64* %addr
+  store volatile i64 %val6, i64* %addr
+  store volatile i64 %val7, i64* %addr
+  ret void
+}
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index 5b4cf9d..9b0f3e5 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -26,8 +26,8 @@ outer.loop:                                 ; preds = %for.inc69, %entry
   %0 = phi i32 [ %inc71, %for.inc69 ], [ 0, %entry ]
   %offset = getelementptr %struct.partition_entry* %part, i32 %0, i32 2
   %len = getelementptr %struct.partition_entry* %part, i32 %0, i32 3
-  %tmp5 = load i64* %offset, align 4, !tbaa !0
-  %tmp15 = load i64* %len, align 4, !tbaa !0
+  %tmp5 = load i64* %offset, align 4
+  %tmp15 = load i64* %len, align 4
   %add = add nsw i64 %tmp15, %tmp5
   br label %inner.loop
 
@@ -40,8 +40,8 @@ inner.loop:                                       ; preds = %for.inc, %outer.loo
 if.end:                                           ; preds = %inner.loop
   %len39 = getelementptr %struct.partition_entry* %part, i32 %1, i32 3
   %offset28 = getelementptr %struct.partition_entry* %part, i32 %1, i32 2
-  %tmp29 = load i64* %offset28, align 4, !tbaa !0
-  %tmp40 = load i64* %len39, align 4, !tbaa !0
+  %tmp29 = load i64* %offset28, align 4
+  %tmp40 = load i64* %len39, align 4
   %add41 = add nsw i64 %tmp40, %tmp29
   %cmp44 = icmp sge i64 %tmp29, %tmp5
   %cmp47 = icmp slt i64 %tmp29, %add
@@ -74,7 +74,3 @@ for.end72:                                        ; preds = %for.inc69, %entry
   %overlap.0.lcssa = phi i32 [ 0, %entry ], [ %overlap.4, %for.inc69 ]
   ret i32 %overlap.0.lcssa
 }
-
-!0 = metadata !{metadata !"long long", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/misched-copy-arm.ll b/test/CodeGen/ARM/misched-copy-arm.ll
new file mode 100644
index 0000000..4b15326
--- /dev/null
+++ b/test/CodeGen/ARM/misched-copy-arm.ll
@@ -0,0 +1,30 @@
+; REQUIRES: asserts
+; RUN: llc < %s -march=thumb -mcpu=swift -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+;
+; Loop counter copies should be eliminated.
+; There is also a MUL here, but we don't care where it is scheduled.
+; CHECK: postinc
+; CHECK: *** Final schedule for BB#2 ***
+; CHECK: t2LDRs
+; CHECK: t2ADDrr
+; CHECK: t2CMPrr
+; CHECK: COPY
+define i32 @postinc(i32 %a, i32* nocapture %d, i32 %s) nounwind {
+entry:
+  %cmp4 = icmp eq i32 %a, 0
+  br i1 %cmp4, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %s.05 = phi i32 [ %mul, %for.body ], [ 0, %entry ]
+  %indvars.iv.next = add i32 %indvars.iv, %s
+  %arrayidx = getelementptr inbounds i32* %d, i32 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %mul = mul nsw i32 %0, %s.05
+  %exitcond = icmp eq i32 %indvars.iv.next, %a
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %s.0.lcssa = phi i32 [ 0, %entry ], [ %mul, %for.body ]
+  ret i32 %s.0.lcssa
+}
diff --git a/test/CodeGen/ARM/neon_vabs.ll b/test/CodeGen/ARM/neon_vabs.ll
new file mode 100644
index 0000000..bf2770b
--- /dev/null
+++ b/test/CodeGen/ARM/neon_vabs.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <4 x i32> @test1(<4 x i32> %a) nounwind {
+; CHECK: test1:
+; CHECK: vabs.s32 q
+        %tmp1neg = sub <4 x i32> zeroinitializer, %a
+        %b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+        %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
+        ret <4 x i32> %abs
+}
+
+define <4 x i32> @test2(<4 x i32> %a) nounwind {
+; CHECK: test2:
+; CHECK: vabs.s32 q
+        %tmp1neg = sub <4 x i32> zeroinitializer, %a
+        %b = icmp sge <4 x i32> %a, zeroinitializer
+        %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
+        ret <4 x i32> %abs
+}
+
+define <8 x i16> @test3(<8 x i16> %a) nounwind {
+; CHECK: test3:
+; CHECK: vabs.s16 q
+        %tmp1neg = sub <8 x i16> zeroinitializer, %a
+        %b = icmp sgt <8 x i16> %a, zeroinitializer
+        %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg
+        ret <8 x i16> %abs
+}
+
+define <16 x i8> @test4(<16 x i8> %a) nounwind {
+; CHECK: test4:
+; CHECK: vabs.s8 q
+        %tmp1neg = sub <16 x i8> zeroinitializer, %a
+        %b = icmp slt <16 x i8> %a, zeroinitializer
+        %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a
+        ret <16 x i8> %abs
+}
+
+define <4 x i32> @test5(<4 x i32> %a) nounwind {
+; CHECK: test5:
+; CHECK: vabs.s32 q
+        %tmp1neg = sub <4 x i32> zeroinitializer, %a
+        %b = icmp sle <4 x i32> %a, zeroinitializer
+        %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
+        ret <4 x i32> %abs
+}
+
+define <2 x i32> @test6(<2 x i32> %a) nounwind {
+; CHECK: test6:
+; CHECK: vabs.s32 d
+        %tmp1neg = sub <2 x i32> zeroinitializer, %a
+        %b = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
+        %abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg
+        ret <2 x i32> %abs
+}
+
+define <2 x i32> @test7(<2 x i32> %a) nounwind {
+; CHECK: test7:
+; CHECK: vabs.s32 d
+        %tmp1neg = sub <2 x i32> zeroinitializer, %a
+        %b = icmp sge <2 x i32> %a, zeroinitializer
+        %abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg
+        ret <2 x i32> %abs
+}
+
+define <4 x i16> @test8(<4 x i16> %a) nounwind {
+; CHECK: test8:
+; CHECK: vabs.s16 d
+        %tmp1neg = sub <4 x i16> zeroinitializer, %a
+        %b = icmp sgt <4 x i16> %a, zeroinitializer
+        %abs = select <4 x i1> %b, <4 x i16> %a, <4 x i16> %tmp1neg
+        ret <4 x i16> %abs
+}
+
+define <8 x i8> @test9(<8 x i8> %a) nounwind {
+; CHECK: test9:
+; CHECK: vabs.s8 d
+        %tmp1neg = sub <8 x i8> zeroinitializer, %a
+        %b = icmp slt <8 x i8> %a, zeroinitializer
+        %abs = select <8 x i1> %b, <8 x i8> %tmp1neg, <8 x i8> %a
+        ret <8 x i8> %abs
+}
+
+define <2 x i32> @test10(<2 x i32> %a) nounwind {
+; CHECK: test10:
+; CHECK: vabs.s32 d
+        %tmp1neg = sub <2 x i32> zeroinitializer, %a
+        %b = icmp sle <2 x i32> %a, zeroinitializer
+        %abs = select <2 x i1> %b, <2 x i32> %tmp1neg, <2 x i32> %a
+        ret <2 x i32> %abs
+}
diff --git a/test/CodeGen/ARM/nop_concat_vectors.ll b/test/CodeGen/ARM/nop_concat_vectors.ll
new file mode 100644
index 0000000..c810900
--- /dev/null
+++ b/test/CodeGen/ARM/nop_concat_vectors.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+;CHECK: _foo
+;CHECK-NOT: vld1.32
+;CHECK-NOT: vst1.32
+;CHECK: bx
+define void @foo(<16 x i8>* %J) {
+  %A = load <16 x i8>* %J
+  %T1 = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %T2 = shufflevector <8 x i8>  %T1, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  store <16 x i8> %T2, <16 x i8>* %J
+  ret void
+}
diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll
index f93ffe7..94578d8 100644
--- a/test/CodeGen/ARM/private.ll
+++ b/test/CodeGen/ARM/private.ll
@@ -1,10 +1,11 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llc < %s -mtriple=arm-linux-gnueabi > %t
-; RUN: grep .Lfoo: %t
-; RUN: egrep bl.*\.Lfoo %t
-; RUN: grep .Lbaz: %t
-; RUN: grep long.*\.Lbaz %t
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
+; CHECK: .Lfoo:
+; CHECK: bar:
+; CHECK: bl .Lfoo
+; CHECK: .long .Lbaz
+; CHECK: .Lbaz:
 
 define private void @foo() {
         ret void
diff --git a/test/CodeGen/ARM/returned-ext.ll b/test/CodeGen/ARM/returned-ext.ll
new file mode 100644
index 0000000..670b12f
--- /dev/null
+++ b/test/CodeGen/ARM/returned-ext.ll
@@ -0,0 +1,178 @@
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+
+declare i16 @identity16(i16 returned %x)
+declare i32 @identity32(i32 returned %x)
+declare zeroext i16 @retzext16(i16 returned %x)
+declare i16 @paramzext16(i16 zeroext returned %x)
+declare zeroext i16 @bothzext16(i16 zeroext returned %x)
+
+; The zeroext param attribute below is meant to have no effect
+define i16 @test_identity(i16 zeroext %x) {
+entry:
+; CHECKELF: test_identity:
+; CHECKELF: mov [[SAVEX:r[0-9]+]], r0
+; CHECKELF: bl identity16
+; CHECKELF: uxth r0, r0
+; CHECKELF: bl identity32
+; CHECKELF: mov r0, [[SAVEX]]
+; CHECKT2D: test_identity:
+; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0
+; CHECKT2D: blx _identity16
+; CHECKT2D: uxth r0, r0
+; CHECKT2D: blx _identity32
+; CHECKT2D: mov r0, [[SAVEX]]
+  %call = tail call i16 @identity16(i16 %x)
+  %b = zext i16 %call to i32
+  %call2 = tail call i32 @identity32(i32 %b)
+  ret i16 %x
+}
+
+; FIXME: This ought not to require register saving but currently does because
+; x is not considered equal to %call (see SelectionDAGBuilder.cpp)
+define i16 @test_matched_ret(i16 %x) {
+entry:
+; CHECKELF: test_matched_ret:
+
+; This shouldn't be required
+; CHECKELF: mov [[SAVEX:r[0-9]+]], r0
+
+; CHECKELF: bl retzext16
+; CHECKELF-NOT: uxth r0, {{r[0-9]+}}
+; CHECKELF: bl identity32
+
+; This shouldn't be required
+; CHECKELF: mov r0, [[SAVEX]]
+
+; CHECKT2D: test_matched_ret:
+
+; This shouldn't be required
+; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0
+
+; CHECKT2D: blx _retzext16
+; CHECKT2D-NOT: uxth r0, {{r[0-9]+}}
+; CHECKT2D: blx _identity32
+
+; This shouldn't be required
+; CHECKT2D: mov r0, [[SAVEX]]
+
+  %call = tail call i16 @retzext16(i16 %x)
+  %b = zext i16 %call to i32
+  %call2 = tail call i32 @identity32(i32 %b)
+  ret i16 %x
+}
+
+define i16 @test_mismatched_ret(i16 %x) {
+entry:
+; CHECKELF: test_mismatched_ret:
+; CHECKELF: mov [[SAVEX:r[0-9]+]], r0
+; CHECKELF: bl retzext16
+; CHECKELF: sxth r0, {{r[0-9]+}}
+; CHECKELF: bl identity32
+; CHECKELF: mov r0, [[SAVEX]]
+; CHECKT2D: test_mismatched_ret:
+; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0
+; CHECKT2D: blx _retzext16
+; CHECKT2D: sxth r0, {{r[0-9]+}}
+; CHECKT2D: blx _identity32
+; CHECKT2D: mov r0, [[SAVEX]]
+  %call = tail call i16 @retzext16(i16 %x)
+  %b = sext i16 %call to i32
+  %call2 = tail call i32 @identity32(i32 %b)
+  ret i16 %x
+}
+
+define i16 @test_matched_paramext(i16 %x) {
+entry:
+; CHECKELF: test_matched_paramext:
+; CHECKELF: uxth r0, r0
+; CHECKELF: bl paramzext16
+; CHECKELF: uxth r0, r0
+; CHECKELF: bl identity32
+; CHECKELF: b paramzext16
+; CHECKT2D: test_matched_paramext:
+; CHECKT2D: uxth r0, r0
+; CHECKT2D: blx _paramzext16
+; CHECKT2D: uxth r0, r0
+; CHECKT2D: blx _identity32
+; CHECKT2D: b.w _paramzext16
+  %call = tail call i16 @paramzext16(i16 %x)
+  %b = zext i16 %call to i32
+  %call2 = tail call i32 @identity32(i32 %b)
+  %call3 = tail call i16 @paramzext16(i16 %call)
+  ret i16 %call3
+}
+
+; FIXME: This theoretically ought to optimize to exact same output as the
+; version above, but doesn't currently (see SelectionDAGBuilder.cpp) 
+define i16 @test_matched_paramext2(i16 %x) {
+entry:
+
+; Since there doesn't seem to be an unambiguous optimal selection and
+; scheduling of uxth and mov instructions below in lieu of the 'returned'
+; optimization, don't bother checking: just verify that the calls are made
+; in the correct order as a basic sanity check
+
+; CHECKELF: test_matched_paramext2:
+; CHECKELF: bl paramzext16
+; CHECKELF: bl identity32
+; CHECKELF: b paramzext16
+; CHECKT2D: test_matched_paramext2:
+; CHECKT2D: blx _paramzext16
+; CHECKT2D: blx _identity32
+; CHECKT2D: b.w _paramzext16
+  %call = tail call i16 @paramzext16(i16 %x)
+
+; Should make no difference if %x is used below rather than %call, but it does
+  %b = zext i16 %x to i32
+
+  %call2 = tail call i32 @identity32(i32 %b)
+  %call3 = tail call i16 @paramzext16(i16 %call)
+  ret i16 %call3
+}
+
+define i16 @test_matched_bothext(i16 %x) {
+entry:
+; CHECKELF: test_matched_bothext:
+; CHECKELF: uxth r0, r0
+; CHECKELF: bl bothzext16
+; CHECKELF-NOT: uxth r0, r0
+
+; FIXME: Tail call should be OK here
+; CHECKELF: bl identity32
+
+; CHECKT2D: test_matched_bothext:
+; CHECKT2D: uxth r0, r0
+; CHECKT2D: blx _bothzext16
+; CHECKT2D-NOT: uxth r0, r0
+
+; FIXME: Tail call should be OK here
+; CHECKT2D: blx _identity32
+
+  %call = tail call i16 @bothzext16(i16 %x)
+  %b = zext i16 %x to i32
+  %call2 = tail call i32 @identity32(i32 %b)
+  ret i16 %call
+}
+
+define i16 @test_mismatched_bothext(i16 %x) {
+entry:
+; CHECKELF: test_mismatched_bothext:
+; CHECKELF: mov [[SAVEX:r[0-9]+]], r0
+; CHECKELF: uxth r0, {{r[0-9]+}}
+; CHECKELF: bl bothzext16
+; CHECKELF: sxth r0, [[SAVEX]]
+; CHECKELF: bl identity32
+; CHECKELF: mov r0, [[SAVEX]]
+; CHECKT2D: test_mismatched_bothext:
+; CHECKT2D: mov [[SAVEX:r[0-9]+]], r0
+; CHECKT2D: uxth r0, {{r[0-9]+}}
+; CHECKT2D: blx _bothzext16
+; CHECKT2D: sxth r0, [[SAVEX]]
+; CHECKT2D: blx _identity32
+; CHECKT2D: mov r0, [[SAVEX]]
+  %call = tail call i16 @bothzext16(i16 %x)
+  %b = sext i16 %x to i32
+  %call2 = tail call i32 @identity32(i32 %b)
+  ret i16 %x
+}
diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll
index e015bf0..eb4d0ba 100644
--- a/test/CodeGen/ARM/tail-dup.ll
+++ b/test/CodeGen/ARM/tail-dup.ll
@@ -11,19 +11,19 @@
 
 define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp {
 entry:
-  %0 = load i32* %opcodes, align 4, !tbaa !0
+  %0 = load i32* %opcodes, align 4
   %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0
   br label %indirectgoto
 
 INCREMENT:                                        ; preds = %indirectgoto
   %inc = add nsw i32 %result.0, 1
-  %1 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %1 = load i32* %opcodes.addr.0, align 4
   %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1
   br label %indirectgoto
 
 DECREMENT:                                        ; preds = %indirectgoto
   %dec = add nsw i32 %result.0, -1
-  %2 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %2 = load i32* %opcodes.addr.0, align 4
   %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2
   br label %indirectgoto
 
@@ -38,7 +38,3 @@ indirectgoto:                                     ; preds = %DECREMENT, %INCREME
 RETURN:                                           ; preds = %indirectgoto
   ret i32 %result.0
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/this-return.ll b/test/CodeGen/ARM/this-return.ll
new file mode 100644
index 0000000..f06e4a4
--- /dev/null
+++ b/test/CodeGen/ARM/this-return.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+
+%struct.A = type { i8 }
+%struct.B = type { i32 }
+%struct.C = type { %struct.B }
+%struct.D = type { %struct.B }
+%struct.E = type { %struct.B, %struct.B }
+
+declare %struct.A* @A_ctor_base(%struct.A* returned)
+declare %struct.B* @B_ctor_base(%struct.B* returned, i32)
+declare %struct.B* @B_ctor_complete(%struct.B* returned, i32)
+
+declare %struct.A* @A_ctor_base_nothisret(%struct.A*)
+declare %struct.B* @B_ctor_base_nothisret(%struct.B*, i32)
+declare %struct.B* @B_ctor_complete_nothisret(%struct.B*, i32)
+
+define %struct.C* @C_ctor_base(%struct.C* returned %this, i32 %x) {
+entry:
+; CHECKELF: C_ctor_base:
+; CHECKELF-NOT: mov {{r[0-9]+}}, r0
+; CHECKELF: bl A_ctor_base
+; CHECKELF-NOT: mov r0, {{r[0-9]+}}
+; CHECKELF: b B_ctor_base
+; CHECKT2D: C_ctor_base:
+; CHECKT2D-NOT: mov {{r[0-9]+}}, r0
+; CHECKT2D: blx _A_ctor_base
+; CHECKT2D-NOT: mov r0, {{r[0-9]+}}
+; CHECKT2D: b.w _B_ctor_base
+  %0 = bitcast %struct.C* %this to %struct.A*
+  %call = tail call %struct.A* @A_ctor_base(%struct.A* %0)
+  %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0
+  %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x)
+  ret %struct.C* %this
+}
+
+define %struct.C* @C_ctor_base_nothisret(%struct.C* %this, i32 %x) {
+entry:
+; CHECKELF: C_ctor_base_nothisret:
+; CHECKELF: mov [[SAVETHIS:r[0-9]+]], r0
+; CHECKELF: bl A_ctor_base_nothisret
+; CHECKELF: mov r0, [[SAVETHIS]]
+; CHECKELF-NOT: b B_ctor_base_nothisret
+; CHECKT2D: C_ctor_base_nothisret:
+; CHECKT2D: mov [[SAVETHIS:r[0-9]+]], r0
+; CHECKT2D: blx _A_ctor_base_nothisret
+; CHECKT2D: mov r0, [[SAVETHIS]]
+; CHECKT2D-NOT: b.w _B_ctor_base_nothisret
+  %0 = bitcast %struct.C* %this to %struct.A*
+  %call = tail call %struct.A* @A_ctor_base_nothisret(%struct.A* %0)
+  %1 = getelementptr inbounds %struct.C* %this, i32 0, i32 0
+  %call2 = tail call %struct.B* @B_ctor_base_nothisret(%struct.B* %1, i32 %x)
+  ret %struct.C* %this
+}
+
+define %struct.C* @C_ctor_complete(%struct.C* %this, i32 %x) {
+entry:
+; CHECKELF: C_ctor_complete:
+; CHECKELF: b C_ctor_base
+; CHECKT2D: C_ctor_complete:
+; CHECKT2D: b.w _C_ctor_base
+  %call = tail call %struct.C* @C_ctor_base(%struct.C* %this, i32 %x)
+  ret %struct.C* %this
+}
+
+define %struct.C* @C_ctor_complete_nothisret(%struct.C* %this, i32 %x) {
+entry:
+; CHECKELF: C_ctor_complete_nothisret:
+; CHECKELF-NOT: b C_ctor_base_nothisret
+; CHECKT2D: C_ctor_complete_nothisret:
+; CHECKT2D-NOT: b.w _C_ctor_base_nothisret
+  %call = tail call %struct.C* @C_ctor_base_nothisret(%struct.C* %this, i32 %x)
+  ret %struct.C* %this
+}
+
+define %struct.D* @D_ctor_base(%struct.D* %this, i32 %x) {
+entry:
+; CHECKELF: D_ctor_base:
+; CHECKELF-NOT: mov {{r[0-9]+}}, r0
+; CHECKELF: bl B_ctor_complete
+; CHECKELF-NOT: mov r0, {{r[0-9]+}}
+; CHECKELF: b B_ctor_complete
+; CHECKT2D: D_ctor_base:
+; CHECKT2D-NOT: mov {{r[0-9]+}}, r0
+; CHECKT2D: blx _B_ctor_complete
+; CHECKT2D-NOT: mov r0, {{r[0-9]+}}
+; CHECKT2D: b.w _B_ctor_complete
+  %b = getelementptr inbounds %struct.D* %this, i32 0, i32 0
+  %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
+  %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
+  ret %struct.D* %this
+}
+
+define %struct.E* @E_ctor_base(%struct.E* %this, i32 %x) {
+entry:
+; CHECKELF: E_ctor_base:
+; CHECKELF-NOT: b B_ctor_complete
+; CHECKT2D: E_ctor_base:
+; CHECKT2D-NOT: b.w _B_ctor_complete
+  %b = getelementptr inbounds %struct.E* %this, i32 0, i32 0
+  %call = tail call %struct.B* @B_ctor_complete(%struct.B* %b, i32 %x)
+  %b2 = getelementptr inbounds %struct.E* %this, i32 0, i32 1
+  %call2 = tail call %struct.B* @B_ctor_complete(%struct.B* %b2, i32 %x)
+  ret %struct.E* %this
+}
diff --git a/test/CodeGen/ARM/v1-constant-fold.ll b/test/CodeGen/ARM/v1-constant-fold.ll
new file mode 100644
index 0000000..b86d5db
--- /dev/null
+++ b/test/CodeGen/ARM/v1-constant-fold.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+v7,+vfp3,-neon  | FileCheck %s
+
+; PR15611. Check that we don't crash when constant folding v1i32 types.
+
+; CHECK: foo:
+define void @foo(i32 %arg) {
+bb:
+  %tmp = insertelement <4 x i32> undef, i32 %arg, i32 0
+  %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 1
+  %tmp2 = insertelement <4 x i32> %tmp1, i32 0, i32 2
+  %tmp3 = insertelement <4 x i32> %tmp2, i32 0, i32 3
+  %tmp4 = add <4 x i32> %tmp3, <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK:  bl bar
+  tail call void @bar(<4 x i32> %tmp4)
+  ret void
+}
+
+declare void @bar(<4 x i32>)
diff --git a/test/CodeGen/ARM/vcvt-cost.ll b/test/CodeGen/ARM/vcvt-cost.ll
new file mode 100644
index 0000000..0d45c40
--- /dev/null
+++ b/test/CodeGen/ARM/vcvt-cost.ll
@@ -0,0 +1,153 @@
+; We currently estimate the cost of sext/zext/trunc v8(v16)i32 <-> v8(v16)i8
+; instructions as expensive. If lowering is improved the cost model needs to
+; change.
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
+%T0_5 = type <8 x i8>
+%T1_5 = type <8 x i32>
+; CHECK: func_cvt5:
+define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) {
+; CHECK: vmovl.s8
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+  %v0 = load %T0_5* %loadaddr
+; COST: func_cvt5
+; COST: cost of 3 {{.*}} sext
+  %r = sext %T0_5 %v0 to %T1_5
+  store %T1_5 %r, %T1_5* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%TA0_5 = type <8 x i8>
+%TA1_5 = type <8 x i32>
+; CHECK: func_cvt1:
+define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) {
+; CHECK: vmovl.u8
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+  %v0 = load %TA0_5* %loadaddr
+; COST: func_cvt1
+; COST: cost of 3 {{.*}} zext
+  %r = zext %TA0_5 %v0 to %TA1_5
+  store %TA1_5 %r, %TA1_5* %storeaddr
+  ret void
+}
+
+%T0_51 = type <8 x i32>
+%T1_51 = type <8 x i8>
+; CHECK: func_cvt51:
+define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) {
+; CHECK: vmovn.i32
+; CHECK: vmovn.i32
+; CHECK: vmovn.i16
+  %v0 = load %T0_51* %loadaddr
+; COST: func_cvt51
+; COST: cost of 3 {{.*}} trunc
+  %r = trunc %T0_51 %v0 to %T1_51
+  store %T1_51 %r, %T1_51* %storeaddr
+  ret void
+}
+
+%TT0_5 = type <16 x i8>
+%TT1_5 = type <16 x i32>
+; CHECK: func_cvt52:
+define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) {
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+; CHECK: vmovl.s16
+  %v0 = load %TT0_5* %loadaddr
+; COST: func_cvt52
+; COST: cost of 6 {{.*}} sext
+  %r = sext %TT0_5 %v0 to %TT1_5
+  store %TT1_5 %r, %TT1_5* %storeaddr
+  ret void
+}
+;; We currently estimate the cost of this instruction as expensive. If lowering
+;; is improved the cost needs to change.
+%TTA0_5 = type <16 x i8>
+%TTA1_5 = type <16 x i32>
+; CHECK: func_cvt12:
+define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) {
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+; CHECK: vmovl.u16
+  %v0 = load %TTA0_5* %loadaddr
+; COST: func_cvt12
+; COST: cost of 6 {{.*}} zext
+  %r = zext %TTA0_5 %v0 to %TTA1_5
+  store %TTA1_5 %r, %TTA1_5* %storeaddr
+  ret void
+}
+
+%TT0_51 = type <16 x i32>
+%TT1_51 = type <16 x i8>
+; CHECK: func_cvt512:
+define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
+; CHECK: vmovn.i32
+; CHECK: vmovn.i32
+; CHECK: vmovn.i32
+; CHECK: vmovn.i32
+; CHECK: vmovn.i16
+; CHECK: vmovn.i16
+  %v0 = load %TT0_51* %loadaddr
+; COST: func_cvt512
+; COST: cost of 6 {{.*}} trunc
+  %r = trunc %TT0_51 %v0 to %TT1_51
+  store %TT1_51 %r, %TT1_51* %storeaddr
+  ret void
+}
+
+; CHECK: sext_v4i16_v4i64:
+define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+  %v0 = load <4 x i16>* %loadaddr
+; COST: sext_v4i16_v4i64
+; COST: cost of 3 {{.*}} sext
+  %r = sext <4 x i16> %v0 to <4 x i64>
+  store <4 x i64> %r, <4 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: zext_v4i16_v4i64:
+define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+  %v0 = load <4 x i16>* %loadaddr
+; COST: zext_v4i16_v4i64
+; COST: cost of 3 {{.*}} zext
+  %r = zext <4 x i16> %v0 to <4 x i64>
+  store <4 x i64> %r, <4 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: sext_v8i16_v8i64:
+define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+; CHECK: vmovl.s32
+  %v0 = load <8 x i16>* %loadaddr
+; COST: sext_v8i16_v8i64
+; COST: cost of 6 {{.*}} sext
+  %r = sext <8 x i16> %v0 to <8 x i64>
+  store <8 x i64> %r, <8 x i64>* %storeaddr
+  ret void
+}
+
+; CHECK: zext_v8i16_v8i64:
+define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+; CHECK: vmovl.u32
+  %v0 = load <8 x i16>* %loadaddr
+; COST: zext_v8i16_v8i64
+; COST: cost of 6 {{.*}} zext
+  %r = zext <8 x i16> %v0 to <8 x i64>
+  store <8 x i64> %r, <8 x i64>* %storeaddr
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll
index e67b478..c078f49 100644
--- a/test/CodeGen/ARM/vcvt.ll
+++ b/test/CodeGen/ARM/vcvt.ll
@@ -156,175 +156,3 @@ define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
 
 declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone
-
-; We currently estimate the cost of sext/zext/trunc v8(v16)i32 <-> v8(v16)i8
-; instructions as expensive. If lowering is improved the cost model needs to
-; change.
-; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
-%T0_5 = type <8 x i8>
-%T1_5 = type <8 x i32>
-; CHECK: func_cvt5:
-define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) {
-; CHECK: vmovl.s8
-; CHECK: vmovl.s16
-; CHECK: vmovl.s16
-  %v0 = load %T0_5* %loadaddr
-; COST: func_cvt5
-; COST: cost of 3 {{.*}} sext
-  %r = sext %T0_5 %v0 to %T1_5
-  store %T1_5 %r, %T1_5* %storeaddr
-  ret void
-}
-;; We currently estimate the cost of this instruction as expensive. If lowering
-;; is improved the cost needs to change.
-%TA0_5 = type <8 x i8>
-%TA1_5 = type <8 x i32>
-; CHECK: func_cvt1:
-define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) {
-; CHECK: vmovl.u8
-; CHECK: vmovl.u16
-; CHECK: vmovl.u16
-  %v0 = load %TA0_5* %loadaddr
-; COST: func_cvt1
-; COST: cost of 3 {{.*}} zext
-  %r = zext %TA0_5 %v0 to %TA1_5
-  store %TA1_5 %r, %TA1_5* %storeaddr
-  ret void
-}
-;; We currently estimate the cost of this instruction as expensive. If lowering
-;; is improved the cost needs to change.
-%T0_51 = type <8 x i32>
-%T1_51 = type <8 x i8>
-; CHECK: func_cvt51:
-define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) {
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-  %v0 = load %T0_51* %loadaddr
-; COST: func_cvt51
-; COST: cost of 19 {{.*}} trunc
-  %r = trunc %T0_51 %v0 to %T1_51
-  store %T1_51 %r, %T1_51* %storeaddr
-  ret void
-}
-;; We currently estimate the cost of this instruction as expensive. If lowering
-;; is improved the cost needs to change.
-%TT0_5 = type <16 x i8>
-%TT1_5 = type <16 x i32>
-; CHECK: func_cvt52:
-define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) {
-; CHECK: vmovl.s16
-; CHECK: vmovl.s16
-; CHECK: vmovl.s16
-; CHECK: vmovl.s16
-  %v0 = load %TT0_5* %loadaddr
-; COST: func_cvt52
-; COST: cost of 6 {{.*}} sext
-  %r = sext %TT0_5 %v0 to %TT1_5
-  store %TT1_5 %r, %TT1_5* %storeaddr
-  ret void
-}
-;; We currently estimate the cost of this instruction as expensive. If lowering
-;; is improved the cost needs to change.
-%TTA0_5 = type <16 x i8>
-%TTA1_5 = type <16 x i32>
-; CHECK: func_cvt12:
-define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) {
-; CHECK: vmovl.u16
-; CHECK: vmovl.u16
-; CHECK: vmovl.u16
-; CHECK: vmovl.u16
-  %v0 = load %TTA0_5* %loadaddr
-; COST: func_cvt12
-; COST: cost of 6 {{.*}} zext
-  %r = zext %TTA0_5 %v0 to %TTA1_5
-  store %TTA1_5 %r, %TTA1_5* %storeaddr
-  ret void
-}
-;; We currently estimate the cost of this instruction as expensive. If lowering
-;; is improved the cost needs to change.
-%TT0_51 = type <16 x i32>
-%TT1_51 = type <16 x i8>
-; CHECK: func_cvt512:
-define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) {
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-; CHECK: strb
-  %v0 = load %TT0_51* %loadaddr
-; COST: func_cvt512
-; COST: cost of 38 {{.*}} trunc
-  %r = trunc %TT0_51 %v0 to %TT1_51
-  store %TT1_51 %r, %TT1_51* %storeaddr
-  ret void
-}
-
-; CHECK: sext_v4i16_v4i64:
-define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
-; CHECK: vmovl.s32
-; CHECK: vmovl.s32
-  %v0 = load <4 x i16>* %loadaddr
-; COST: sext_v4i16_v4i64
-; COST: cost of 3 {{.*}} sext
-  %r = sext <4 x i16> %v0 to <4 x i64>
-  store <4 x i64> %r, <4 x i64>* %storeaddr
-  ret void
-}
-
-; CHECK: zext_v4i16_v4i64:
-define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
-; CHECK: vmovl.u32
-; CHECK: vmovl.u32
-  %v0 = load <4 x i16>* %loadaddr
-; COST: zext_v4i16_v4i64
-; COST: cost of 3 {{.*}} zext
-  %r = zext <4 x i16> %v0 to <4 x i64>
-  store <4 x i64> %r, <4 x i64>* %storeaddr
-  ret void
-}
-
-; CHECK: sext_v8i16_v8i64:
-define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
-; CHECK: vmovl.s32
-; CHECK: vmovl.s32
-; CHECK: vmovl.s32
-; CHECK: vmovl.s32
-  %v0 = load <8 x i16>* %loadaddr
-; COST: sext_v8i16_v8i64
-; COST: cost of 6 {{.*}} sext
-  %r = sext <8 x i16> %v0 to <8 x i64>
-  store <8 x i64> %r, <8 x i64>* %storeaddr
-  ret void
-}
-
-; CHECK: zext_v8i16_v8i64:
-define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) {
-; CHECK: vmovl.u32
-; CHECK: vmovl.u32
-; CHECK: vmovl.u32
-; CHECK: vmovl.u32
-  %v0 = load <8 x i16>* %loadaddr
-; COST: zext_v8i16_v8i64
-; COST: cost of 6 {{.*}} zext
-  %r = zext <8 x i16> %v0 to <8 x i64>
-  store <8 x i64> %r, <8 x i64>* %storeaddr
-  ret void
-}
-
diff --git a/test/CodeGen/ARM/vcvt_combine.ll b/test/CodeGen/ARM/vcvt_combine.ll
index 3009e50..07ba230 100644
--- a/test/CodeGen/ARM/vcvt_combine.ll
+++ b/test/CodeGen/ARM/vcvt_combine.ll
@@ -7,7 +7,7 @@
 ; CHECK-NOT: vmul
 define void @t0() nounwind {
 entry:
-  %tmp = load float* @in, align 4, !tbaa !0
+  %tmp = load float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
@@ -23,7 +23,7 @@ declare void @foo_int32x2_t(<2 x i32>)
 ; CHECK-NOT: vmul
 define void @t1() nounwind {
 entry:
-  %tmp = load float* @in, align 4, !tbaa !0
+  %tmp = load float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
@@ -39,7 +39,7 @@ declare void @foo_uint32x2_t(<2 x i32>)
 ; CHECK: vmul
 define void @t2() nounwind {
 entry:
-  %tmp = load float* @in, align 4, !tbaa !0
+  %tmp = load float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 0x401B333340000000, float 0x401B333340000000>
@@ -53,7 +53,7 @@ entry:
 ; CHECK: vmul
 define void @t3() nounwind {
 entry:
-  %tmp = load float* @in, align 4, !tbaa !0
+  %tmp = load float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 0x4200000000000000, float 0x4200000000000000>
@@ -67,7 +67,7 @@ entry:
 ; CHECK-NOT: vmul
 define void @t4() nounwind {
 entry:
-  %tmp = load float* @in, align 4, !tbaa !0
+  %tmp = load float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 0x41F0000000000000, float 0x41F0000000000000>
@@ -81,7 +81,7 @@ entry:
 ; CHECK-NOT: vmul
 define void @t5() nounwind {
 entry:
-  %tmp = load float* @in, align 4, !tbaa !0
+  %tmp = load float* @in, align 4
   %vecinit.i = insertelement <4 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %tmp, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %tmp, i32 2
@@ -93,7 +93,3 @@ entry:
 }
 
 declare void @foo_int32x4_t(<4 x i32>)
-
-!0 = metadata !{metadata !"float", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll
index 7fddbed..e6f1338 100644
--- a/test/CodeGen/ARM/vdiv_combine.ll
+++ b/test/CodeGen/ARM/vdiv_combine.ll
@@ -11,7 +11,7 @@ declare void @foo_int32x4_t(<4 x i32>)
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t1() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4, !tbaa !3
+  %tmp = load i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -27,7 +27,7 @@ declare void @foo_float32x2_t(<2 x float>)
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t2() nounwind {
 entry:
-  %tmp = load i32* @uin, align 4, !tbaa !3
+  %tmp = load i32* @uin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -41,7 +41,7 @@ entry:
 ; CHECK: {{vdiv|vmul}}
 define void @t3() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4, !tbaa !3
+  %tmp = load i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -55,7 +55,7 @@ entry:
 ; CHECK: {{vdiv|vmul}}
 define void @t4() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4, !tbaa !3
+  %tmp = load i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -69,7 +69,7 @@ entry:
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t5() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4, !tbaa !3
+  %tmp = load i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -83,7 +83,7 @@ entry:
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t6() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4, !tbaa !3
+  %tmp = load i32* @iin, align 4
   %vecinit.i = insertelement <4 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %tmp, i32 1
   %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %tmp, i32 2
@@ -95,8 +95,3 @@ entry:
 }
 
 declare void @foo_float32x4_t(<4 x float>)
-
-!0 = metadata !{metadata !"float", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 74628f0..eb5ad8f 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -599,3 +599,27 @@ for.end179:                                       ; preds = %for.cond.loopexit,
 declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
+
+; vmull lowering would create a zext(v4i8 load()) instead of a zextload(v4i8),
+; creating an illegal type during legalization and causing an assert.
+; PR15970
+define void @no_illegal_types_vmull_sext(<4 x i32> %a) {
+entry:
+  %wide.load283.i = load <4 x i8>* undef, align 1
+  %0 = sext <4 x i8> %wide.load283.i to <4 x i32>
+  %1 = sub nsw <4 x i32> %0, %a
+  %2 = mul nsw <4 x i32> %1, %1
+  %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2
+  store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4
+  ret void
+}
+define void @no_illegal_types_vmull_zext(<4 x i32> %a) {
+entry:
+  %wide.load283.i = load <4 x i8>* undef, align 1
+  %0 = zext <4 x i8> %wide.load283.i to <4 x i32>
+  %1 = sub nsw <4 x i32> %0, %a
+  %2 = mul nsw <4 x i32> %1, %1
+  %predphi290.v.i = select <4 x i1> undef, <4 x i32> undef, <4 x i32> %2
+  store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4
+  ret void
+}
diff --git a/test/CodeGen/Generic/annotate.ll b/test/CodeGen/Generic/annotate.ll
new file mode 100644
index 0000000..c617eb0
--- /dev/null
+++ b/test/CodeGen/Generic/annotate.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s
+
+; PR15253
+
+@.str = private unnamed_addr constant [4 x i8] c"sth\00", section "llvm.metadata"
+@.str1 = private unnamed_addr constant [4 x i8] c"t.c\00", section "llvm.metadata"
+
+
+define i32 @foo(i32 %a) {
+entry:
+  %0 = call i32 @llvm.annotation.i32(i32 %a, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i32 0, i32 0), i32 2)
+  ret i32 %0
+}
+
+declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) #1
diff --git a/test/CodeGen/Generic/crash.ll b/test/CodeGen/Generic/crash.ll
index d889389..d3fc204 100644
--- a/test/CodeGen/Generic/crash.ll
+++ b/test/CodeGen/Generic/crash.ll
@@ -51,7 +51,7 @@ for.body.i:                                       ; preds = %for.body.i, %entry
 
 func_74.exit.for.cond29.thread_crit_edge:         ; preds = %for.body.i
   %f13576.pre = getelementptr inbounds %struct.S0* undef, i64 0, i32 1
-  store i8 0, i8* %f13576.pre, align 4, !tbaa !0
+  store i8 0, i8* %f13576.pre, align 4
   br label %lbl_468
 
 lbl_468:                                          ; preds = %lbl_468, %func_74.exit.for.cond29.thread_crit_edge
@@ -63,6 +63,3 @@ lbl_468:                                          ; preds = %lbl_468, %func_74.e
 for.end74:                                        ; preds = %lbl_468
   ret void
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/Generic/ptr-annotate.ll b/test/CodeGen/Generic/ptr-annotate.ll
new file mode 100644
index 0000000..ac5bd55
--- /dev/null
+++ b/test/CodeGen/Generic/ptr-annotate.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+
+; PR15253
+
+%struct.mystruct = type { i32 }
+
+@.str = private unnamed_addr constant [4 x i8] c"sth\00", section "llvm.metadata"
+@.str1 = private unnamed_addr constant [4 x i8] c"t.c\00", section "llvm.metadata"
+
+define void @foo() {
+entry:
+  %m = alloca i8, align 4
+  %0 = call i8* @llvm.ptr.annotation.p0i8(i8* %m, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i32 0, i32 0), i32 2)
+  store i8 1, i8* %0, align 4
+  ret void
+}
+
+declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32) #1
diff --git a/test/CodeGen/Hexagon/absimm.ll b/test/CodeGen/Hexagon/absimm.ll
new file mode 100644
index 0000000..b8f5edc
--- /dev/null
+++ b/test/CodeGen/Hexagon/absimm.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate absolute addressing mode instructions
+; with immediate value.
+
+define i32 @f1(i32 %i) nounwind {
+; CHECK: memw(##786432){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  store volatile i32 %i, i32* inttoptr (i32 786432 to i32*), align 262144
+  ret i32 %i
+}
+
+define i32* @f2(i32* nocapture %i) nounwind {
+entry:
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(##786432)
+  %0 = load volatile i32* inttoptr (i32 786432 to i32*), align 262144
+  %1 = inttoptr i32 %0 to i32*
+  ret i32* %1
+  }
diff --git a/test/CodeGen/Hexagon/always-ext.ll b/test/CodeGen/Hexagon/always-ext.ll
new file mode 100644
index 0000000..9c8d708
--- /dev/null
+++ b/test/CodeGen/Hexagon/always-ext.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check that we don't generate an invalid packet with too many instructions
+; due to a store that has a must-extend operand.
+
+; CHECK: CuSuiteAdd.exit.us
+; CHECK: {
+; CHECK-NOT: call abort
+; CHECK: memw(##0)
+; CHECK: memw(r{{[0-9+]}}<<#2+##4)
+; CHECK: }
+
+%struct.CuTest.1.28.31.37.40.43.52.55.67.85.111 = type { i8*, void (%struct.CuTest.1.28.31.37.40.43.52.55.67.85.111*)*, i32, i32, i8*, [23 x i32]* }
+%struct.CuSuite.2.29.32.38.41.44.53.56.68.86.112 = type { i32, [1024 x %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111*], i32 }
+
+@__func__.CuSuiteAdd = external unnamed_addr constant [11 x i8], align 8
+@.str24 = external unnamed_addr constant [140 x i8], align 8
+
+declare void @_Assert()
+
+define void @CuSuiteAddSuite() nounwind {
+entry:
+  br i1 undef, label %for.body.us, label %for.end
+
+for.body.us:                                      ; preds = %entry
+  %0 = load %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111** null, align 4
+  %1 = load i32* undef, align 4
+  %cmp.i.us = icmp slt i32 %1, 1024
+  br i1 %cmp.i.us, label %CuSuiteAdd.exit.us, label %cond.false6.i.us
+
+cond.false6.i.us:                                 ; preds = %for.body.us
+  tail call void @_Assert() nounwind
+  unreachable
+
+CuSuiteAdd.exit.us:                               ; preds = %for.body.us
+  %arrayidx.i.us = getelementptr inbounds %struct.CuSuite.2.29.32.38.41.44.53.56.68.86.112* null, i32 0, i32 1, i32 %1
+  store %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111* %0, %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111** %arrayidx.i.us, align 4
+  call void @llvm.trap()
+  unreachable
+
+for.end:                                          ; preds = %entry
+  ret void
+}
+
+declare void @llvm.trap() noreturn nounwind
diff --git a/test/CodeGen/Hexagon/cmp_pred2.ll b/test/CodeGen/Hexagon/cmp_pred2.ll
new file mode 100644
index 0000000..a20b9f0
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp_pred2.ll
@@ -0,0 +1,87 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Make sure that the assembler mapped compare instructions are correctly generated.
+
+@c = common global i32 0, align 4
+
+define i32 @test1(i32 %a, i32 %b) nounwind {
+; CHECK-NOT: cmp.ge
+; CHECK: cmp.gt
+entry:
+  %cmp = icmp slt i32 %a, 100
+  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+entry.if.end_crit_edge:
+  %.pre = load i32* @c, align 4
+  br label %if.end
+
+if.then:
+  %sub = add nsw i32 %a, -10
+  store i32 %sub, i32* @c, align 4
+  br label %if.end
+
+if.end:
+  %0 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %sub, %if.then ]
+  ret i32 %0
+}
+
+define i32 @test2(i32 %a, i32 %b) nounwind {
+; CHECK-NOT: cmp.lt
+; CHECK: cmp.gt
+entry:
+  %cmp = icmp sge i32 %a, %b
+  br i1 %cmp, label %entry.if.end_crit_edge, label %if.then
+
+entry.if.end_crit_edge:
+  %.pre = load i32* @c, align 4
+  br label %if.end
+
+if.then:
+  %sub = add nsw i32 %a, -10
+  store i32 %sub, i32* @c, align 4
+  br label %if.end
+
+if.end:
+  %0 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %sub, %if.then ]
+  ret i32 %0
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind {
+; CHECK-NOT: cmp.ltu
+; CHECK: cmp.gtu
+entry:
+  %cmp = icmp uge i32 %a, %b
+  br i1 %cmp, label %entry.if.end_crit_edge, label %if.then
+
+entry.if.end_crit_edge:
+  %.pre = load i32* @c, align 4
+  br label %if.end
+
+if.then:
+  %sub = add i32 %a, -10
+  store i32 %sub, i32* @c, align 4
+  br label %if.end
+
+if.end:
+  %0 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %sub, %if.then ]
+  ret i32 %0
+}
+
+define i32 @test5(i32 %a, i32 %b) nounwind {
+; CHECK: cmp.gtu
+entry:
+  %cmp = icmp uge i32 %a, 29999
+  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+entry.if.end_crit_edge:
+  %.pre = load i32* @c, align 4
+  br label %if.end
+
+if.then:
+  %sub = add i32 %a, -10
+  store i32 %sub, i32* @c, align 4
+  br label %if.end
+
+if.end:
+  %0 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %sub, %if.then ]
+  ret i32 %0
+}
diff --git a/test/CodeGen/Hexagon/cmpb_pred.ll b/test/CodeGen/Hexagon/cmpb_pred.ll
index 1e61447..0960da1 100644
--- a/test/CodeGen/Hexagon/cmpb_pred.ll
+++ b/test/CodeGen/Hexagon/cmpb_pred.ll
@@ -16,7 +16,7 @@ entry:
 define i32 @Func_3b(i32) nounwind readonly {
 entry:
 ; CHECK-NOT: mux
-  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %1 = load i8* @Enum_global, align 1
   %2 = trunc i32 %0 to i8
   %cmp = icmp ne i8 %1, %2
   %selv = zext i1 %cmp to i32
@@ -35,7 +35,7 @@ entry:
 define i32 @Func_3d(i32) nounwind readonly {
 entry:
 ; CHECK-NOT: mux
-  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %1 = load i8* @Enum_global, align 1
   %2 = trunc i32 %0 to i8
   %cmp = icmp eq i8 %1, %2
   %selv = zext i1 %cmp to i32
@@ -45,7 +45,7 @@ entry:
 define i32 @Func_3e(i32) nounwind readonly {
 entry:
 ; CHECK-NOT: mux
-  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %1 = load i8* @Enum_global, align 1
   %2 = trunc i32 %0 to i8
   %cmp = icmp eq i8 %1, %2
   %selv = zext i1 %cmp to i32
@@ -87,6 +87,3 @@ entry:
   %selv = zext i1 %cmp to i32
   ret i32 %selv
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/combine_ir.ll b/test/CodeGen/Hexagon/combine_ir.ll
index 921ce99..8b99ef7 100644
--- a/test/CodeGen/Hexagon/combine_ir.ll
+++ b/test/CodeGen/Hexagon/combine_ir.ll
@@ -6,12 +6,7 @@ define void @word(i32* nocapture %a) nounwind {
 entry:
   %0 = load i32* %a, align 4, !tbaa !0
   %1 = zext i32 %0 to i64
-  %add.ptr = getelementptr inbounds i32* %a, i32 1
-  %2 = load i32* %add.ptr, align 4, !tbaa !0
-  %3 = zext i32 %2 to i64
-  %4 = shl nuw i64 %3, 32
-  %ins = or i64 %4, %1
-  tail call void @bar(i64 %ins) nounwind
+  tail call void @bar(i64 %1) nounwind
   ret void
 }
 
diff --git a/test/CodeGen/Hexagon/hwloop-const.ll b/test/CodeGen/Hexagon/hwloop-const.ll
index a621c58..8204dde 100644
--- a/test/CodeGen/Hexagon/hwloop-const.ll
+++ b/test/CodeGen/Hexagon/hwloop-const.ll
@@ -15,9 +15,9 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds [25000 x i32]* @b, i32 0, i32 %i.02
-  store i32 %i.02, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %i.02, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds [25000 x i32]* @a, i32 0, i32 %i.02
-  store i32 %i.02, i32* %arrayidx1, align 4, !tbaa !0
+  store i32 %i.02, i32* %arrayidx1, align 4
   %inc = add nsw i32 %i.02, 1
   %exitcond = icmp eq i32 %inc, 25000
   br i1 %exitcond, label %for.end, label %for.body
@@ -25,7 +25,3 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret i32 0
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll
index c2e8153..17fe7b9 100644
--- a/test/CodeGen/Hexagon/hwloop-dbg.ll
+++ b/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -19,8 +19,8 @@ for.body:                                         ; preds = %for.body, %entry
   %b.addr.01 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.body ]
   %incdec.ptr = getelementptr inbounds i32* %b.addr.01, i32 1, !dbg !21
   tail call void @llvm.dbg.value(metadata !{i32* %incdec.ptr}, i64 0, metadata !14), !dbg !21
-  %0 = load i32* %b.addr.01, align 4, !dbg !21, !tbaa !23
-  store i32 %0, i32* %arrayidx.phi, align 4, !dbg !21, !tbaa !23
+  %0 = load i32* %b.addr.01, align 4, !dbg !21
+  store i32 %0, i32* %arrayidx.phi, align 4, !dbg !21
   %inc = add nsw i32 %i.02, 1, !dbg !26
   tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !15), !dbg !26
   %exitcond = icmp eq i32 %inc, 10, !dbg !19
@@ -57,8 +57,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !20 = metadata !{i32 786443, metadata !16, i32 3, i32 3, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
 !21 = metadata !{i32 4, i32 5, metadata !22, null}
 !22 = metadata !{i32 786443, metadata !20, i32 3, i32 28, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
-!23 = metadata !{metadata !"int", metadata !24}
-!24 = metadata !{metadata !"omnipotent char", metadata !25}
-!25 = metadata !{metadata !"Simple C/C++ TBAA"}
 !26 = metadata !{i32 3, i32 23, metadata !20, null}
 !27 = metadata !{i32 6, i32 1, metadata !16, null}
diff --git a/test/CodeGen/Hexagon/memops2.ll b/test/CodeGen/Hexagon/memops2.ll
index b1b2544..d6d1a50 100644
--- a/test/CodeGen/Hexagon/memops2.ll
+++ b/test/CodeGen/Hexagon/memops2.ll
@@ -6,11 +6,11 @@ define void @f(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1
   %add.ptr = getelementptr inbounds i16* %p, i32 10
-  %0 = load i16* %add.ptr, align 2, !tbaa !0
+  %0 = load i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %sub = add nsw i32 %conv2, 65535
   %conv1 = trunc i32 %sub to i16
-  store i16 %conv1, i16* %add.ptr, align 2, !tbaa !0
+  store i16 %conv1, i16* %add.ptr, align 2
   ret void
 }
 
@@ -19,14 +19,10 @@ entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1
   %add.ptr.sum = add i32 %i, 10
   %add.ptr1 = getelementptr inbounds i16* %p, i32 %add.ptr.sum
-  %0 = load i16* %add.ptr1, align 2, !tbaa !0
+  %0 = load i16* %add.ptr1, align 2
   %conv3 = zext i16 %0 to i32
   %sub = add nsw i32 %conv3, 65535
   %conv2 = trunc i32 %sub to i16
-  store i16 %conv2, i16* %add.ptr1, align 2, !tbaa !0
+  store i16 %conv2, i16* %add.ptr1, align 2
   ret void
 }
-
-!0 = metadata !{metadata !"short", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/memops3.ll b/test/CodeGen/Hexagon/memops3.ll
index 5b8bd6c..d9e4e8f 100644
--- a/test/CodeGen/Hexagon/memops3.ll
+++ b/test/CodeGen/Hexagon/memops3.ll
@@ -6,11 +6,11 @@ define void @f(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1
   %add.ptr = getelementptr inbounds i8* %p, i32 10
-  %0 = load i8* %add.ptr, align 1, !tbaa !0
+  %0 = load i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %sub = add nsw i32 %conv, 255
   %conv1 = trunc i32 %sub to i8
-  store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0
+  store i8 %conv1, i8* %add.ptr, align 1
   ret void
 }
 
@@ -19,13 +19,10 @@ entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1
   %add.ptr.sum = add i32 %i, 10
   %add.ptr1 = getelementptr inbounds i8* %p, i32 %add.ptr.sum
-  %0 = load i8* %add.ptr1, align 1, !tbaa !0
+  %0 = load i8* %add.ptr1, align 1
   %conv = zext i8 %0 to i32
   %sub = add nsw i32 %conv, 255
   %conv2 = trunc i32 %sub to i8
-  store i8 %conv2, i8* %add.ptr1, align 1, !tbaa !0
+  store i8 %conv2, i8* %add.ptr1, align 1
   ret void
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/remove_lsr.ll b/test/CodeGen/Hexagon/remove_lsr.ll
index 79b5f4a..3128dbb 100644
--- a/test/CodeGen/Hexagon/remove_lsr.ll
+++ b/test/CodeGen/Hexagon/remove_lsr.ll
@@ -46,17 +46,17 @@ for.body:                                         ; preds = %for.body, %entry
   %1 = trunc i64 %val.021 to i32
   %2 = trunc i64 %0 to i32
   %3 = tail call i32 @llvm.hexagon.C2.mux(i32 %conv3, i32 %1, i32 %2)
-  store i32 %3, i32* %lsr.iv3335, align 4, !tbaa !0
+  store i32 %3, i32* %lsr.iv3335, align 4
   %conv8 = sext i8 %predicate_1.023 to i32
   %4 = lshr i64 %val.021, 32
   %5 = trunc i64 %4 to i32
   %6 = lshr i64 %0, 32
   %7 = trunc i64 %6 to i32
   %8 = tail call i32 @llvm.hexagon.C2.mux(i32 %conv8, i32 %5, i32 %7)
-  store i32 %8, i32* %lsr.iv2931, align 4, !tbaa !0
+  store i32 %8, i32* %lsr.iv2931, align 4
   %srcval = load i64* %lsr.iv27, align 8
-  %9 = load i8* %lsr.iv40, align 1, !tbaa !1
-  %10 = load i8* %lsr.iv37, align 1, !tbaa !1
+  %9 = load i8* %lsr.iv40, align 1
+  %10 = load i8* %lsr.iv37, align 1
   %lftr.wideiv = trunc i32 %lsr.iv42 to i8
   %exitcond = icmp eq i8 %lftr.wideiv, 32
   %scevgep26 = getelementptr %union.vect64* %lsr.iv, i32 1
@@ -74,7 +74,3 @@ for.end:                                          ; preds = %for.body
 declare i64 @llvm.hexagon.A2.vsubhs(i64, i64) nounwind readnone
 
 declare i32 @llvm.hexagon.C2.mux(i32, i32, i32) nounwind readnone
-
-!0 = metadata !{metadata !"long", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/Hexagon/union-1.ll b/test/CodeGen/Hexagon/union-1.ll
new file mode 100644
index 0000000..7c6da74
--- /dev/null
+++ b/test/CodeGen/Hexagon/union-1.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: word
+; CHECK-NOT: combine(#0
+; CHECK: jump bar
+
+define void @word(i32* nocapture %a) nounwind {
+entry:
+  %0 = load i32* %a, align 4, !tbaa !0
+  %1 = zext i32 %0 to i64
+  %add.ptr = getelementptr inbounds i32* %a, i32 1
+  %2 = load i32* %add.ptr, align 4, !tbaa !0
+  %3 = zext i32 %2 to i64
+  %4 = shl nuw i64 %3, 32
+  %ins = or i64 %4, %1
+  tail call void @bar(i64 %ins) nounwind
+  ret void
+}
+
+declare void @bar(i64)
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Mips/alloca.ll b/test/CodeGen/Mips/alloca.ll
index d79ea91..fc7ef86 100644
--- a/test/CodeGen/Mips/alloca.ll
+++ b/test/CodeGen/Mips/alloca.ll
@@ -59,23 +59,23 @@ if.end:                                           ; preds = %if.else, %if.then
 ; CHECK: lw  $25, %call16(printf)
 
   %.pre-phi = phi i32* [ %2, %if.else ], [ %.pre, %if.then ]
-  %tmp7 = load i32* %0, align 4, !tbaa !0
+  %tmp7 = load i32* %0, align 4
   %arrayidx9 = getelementptr inbounds i8* %tmp1, i32 4
   %3 = bitcast i8* %arrayidx9 to i32*
-  %tmp10 = load i32* %3, align 4, !tbaa !0
+  %tmp10 = load i32* %3, align 4
   %arrayidx12 = getelementptr inbounds i8* %tmp1, i32 8
   %4 = bitcast i8* %arrayidx12 to i32*
-  %tmp13 = load i32* %4, align 4, !tbaa !0
-  %tmp16 = load i32* %.pre-phi, align 4, !tbaa !0
+  %tmp13 = load i32* %4, align 4
+  %tmp16 = load i32* %.pre-phi, align 4
   %arrayidx18 = getelementptr inbounds i8* %tmp1, i32 16
   %5 = bitcast i8* %arrayidx18 to i32*
-  %tmp19 = load i32* %5, align 4, !tbaa !0
+  %tmp19 = load i32* %5, align 4
   %arrayidx21 = getelementptr inbounds i8* %tmp1, i32 20
   %6 = bitcast i8* %arrayidx21 to i32*
-  %tmp22 = load i32* %6, align 4, !tbaa !0
+  %tmp22 = load i32* %6, align 4
   %arrayidx24 = getelementptr inbounds i8* %tmp1, i32 24
   %7 = bitcast i8* %arrayidx24 to i32*
-  %tmp25 = load i32* %7, align 4, !tbaa !0
+  %tmp25 = load i32* %7, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str, i32 0, i32 0), i32 %tmp7, i32 %tmp10, i32 %tmp13, i32 %tmp16, i32 %tmp19, i32 %tmp22, i32 %tmp25) nounwind
   ret i32 0
 }
@@ -83,7 +83,3 @@ if.end:                                           ; preds = %if.else, %if.then
 declare void @foo3(i32*)
 
 declare i32 @printf(i8* nocapture, ...) nounwind
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/Mips/divrem.ll b/test/CodeGen/Mips/divrem.ll
index 398d1b7..c470d1c 100644
--- a/test/CodeGen/Mips/divrem.ll
+++ b/test/CodeGen/Mips/divrem.ll
@@ -32,7 +32,7 @@ entry:
 define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
 entry:
   %rem = srem i32 %a0, %a1
-  store i32 %rem, i32* %r, align 4, !tbaa !0
+  store i32 %rem, i32* %r, align 4
   %div = sdiv i32 %a0, %a1
   ret i32 %div
 }
@@ -41,11 +41,7 @@ entry:
 define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
 entry:
   %rem = urem i32 %a0, %a1
-  store i32 %rem, i32* %r, align 4, !tbaa !0
+  store i32 %rem, i32* %r, align 4
   %div = udiv i32 %a0, %a1
   ret i32 %div
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/Mips/dsp-patterns-cmp-vselect.ll b/test/CodeGen/Mips/dsp-patterns-cmp-vselect.ll
new file mode 100644
index 0000000..9f2f066
--- /dev/null
+++ b/test/CodeGen/Mips/dsp-patterns-cmp-vselect.ll
@@ -0,0 +1,641 @@
+; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s
+
+; CHECK: select_v2q15_eq_:
+; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.ph ${{[0-9]+}}, $6, $7
+
+define { i32 } @select_v2q15_eq_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %2 = bitcast i32 %a2.coerce to <2 x i16>
+  %3 = bitcast i32 %a3.coerce to <2 x i16>
+  %cmp = icmp eq <2 x i16> %0, %1
+  %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+  %4 = bitcast <2 x i16> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2q15_lt_:
+; CHECK: cmp.lt.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, $6, $7
+
+define { i32 } @select_v2q15_lt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %2 = bitcast i32 %a2.coerce to <2 x i16>
+  %3 = bitcast i32 %a3.coerce to <2 x i16>
+  %cmp = icmp slt <2 x i16> %0, %1
+  %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+  %4 = bitcast <2 x i16> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2q15_le_:
+; CHECK: cmp.le.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, $6, $7
+
+define { i32 } @select_v2q15_le_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %2 = bitcast i32 %a2.coerce to <2 x i16>
+  %3 = bitcast i32 %a3.coerce to <2 x i16>
+  %cmp = icmp sle <2 x i16> %0, %1
+  %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+  %4 = bitcast <2 x i16> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2q15_ne_:
+; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.ph ${{[0-9]+}}, $7, $6
+
+define { i32 } @select_v2q15_ne_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %2 = bitcast i32 %a2.coerce to <2 x i16>
+  %3 = bitcast i32 %a3.coerce to <2 x i16>
+  %cmp = icmp ne <2 x i16> %0, %1
+  %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+  %4 = bitcast <2 x i16> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2q15_gt_:
+; CHECK: cmp.le.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, $7, $6
+
+define { i32 } @select_v2q15_gt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %2 = bitcast i32 %a2.coerce to <2 x i16>
+  %3 = bitcast i32 %a3.coerce to <2 x i16>
+  %cmp = icmp sgt <2 x i16> %0, %1
+  %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+  %4 = bitcast <2 x i16> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2q15_ge_:
+; CHECK: cmp.lt.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, $7, $6
+
+define { i32 } @select_v2q15_ge_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %2 = bitcast i32 %a2.coerce to <2 x i16>
+  %3 = bitcast i32 %a3.coerce to <2 x i16>
+  %cmp = icmp sge <2 x i16> %0, %1
+  %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+  %4 = bitcast <2 x i16> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4ui8_eq_:
+; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.qb ${{[0-9]+}}, $6, $7
+
+define { i32 } @select_v4ui8_eq_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %2 = bitcast i32 %a2.coerce to <4 x i8>
+  %3 = bitcast i32 %a3.coerce to <4 x i8>
+  %cmp = icmp eq <4 x i8> %0, %1
+  %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+  %4 = bitcast <4 x i8> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4ui8_lt_:
+; CHECK: cmpu.lt.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, $6, $7
+
+define { i32 } @select_v4ui8_lt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %2 = bitcast i32 %a2.coerce to <4 x i8>
+  %3 = bitcast i32 %a3.coerce to <4 x i8>
+  %cmp = icmp ult <4 x i8> %0, %1
+  %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+  %4 = bitcast <4 x i8> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4ui8_le_:
+; CHECK: cmpu.le.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, $6, $7
+
+define { i32 } @select_v4ui8_le_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %2 = bitcast i32 %a2.coerce to <4 x i8>
+  %3 = bitcast i32 %a3.coerce to <4 x i8>
+  %cmp = icmp ule <4 x i8> %0, %1
+  %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+  %4 = bitcast <4 x i8> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4ui8_ne_:
+; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.qb ${{[0-9]+}}, $7, $6
+
+define { i32 } @select_v4ui8_ne_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %2 = bitcast i32 %a2.coerce to <4 x i8>
+  %3 = bitcast i32 %a3.coerce to <4 x i8>
+  %cmp = icmp ne <4 x i8> %0, %1
+  %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+  %4 = bitcast <4 x i8> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4ui8_gt_:
+; CHECK: cmpu.le.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, $7, $6
+
+define { i32 } @select_v4ui8_gt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %2 = bitcast i32 %a2.coerce to <4 x i8>
+  %3 = bitcast i32 %a3.coerce to <4 x i8>
+  %cmp = icmp ugt <4 x i8> %0, %1
+  %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+  %4 = bitcast <4 x i8> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4ui8_ge_:
+; CHECK: cmpu.lt.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, $7, $6
+
+define { i32 } @select_v4ui8_ge_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %2 = bitcast i32 %a2.coerce to <4 x i8>
+  %3 = bitcast i32 %a3.coerce to <4 x i8>
+  %cmp = icmp uge <4 x i8> %0, %1
+  %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+  %4 = bitcast <4 x i8> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2ui16_lt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v2ui16_lt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %2 = bitcast i32 %a2.coerce to <2 x i16>
+  %3 = bitcast i32 %a3.coerce to <2 x i16>
+  %cmp = icmp ult <2 x i16> %0, %1
+  %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+  %4 = bitcast <2 x i16> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2ui16_le_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v2ui16_le_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %2 = bitcast i32 %a2.coerce to <2 x i16>
+  %3 = bitcast i32 %a3.coerce to <2 x i16>
+  %cmp = icmp ule <2 x i16> %0, %1
+  %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+  %4 = bitcast <2 x i16> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2ui16_gt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v2ui16_gt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %2 = bitcast i32 %a2.coerce to <2 x i16>
+  %3 = bitcast i32 %a3.coerce to <2 x i16>
+  %cmp = icmp ugt <2 x i16> %0, %1
+  %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+  %4 = bitcast <2 x i16> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v2ui16_ge_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v2ui16_ge_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %2 = bitcast i32 %a2.coerce to <2 x i16>
+  %3 = bitcast i32 %a3.coerce to <2 x i16>
+  %cmp = icmp uge <2 x i16> %0, %1
+  %or = select <2 x i1> %cmp, <2 x i16> %2, <2 x i16> %3
+  %4 = bitcast <2 x i16> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4i8_lt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v4i8_lt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %2 = bitcast i32 %a2.coerce to <4 x i8>
+  %3 = bitcast i32 %a3.coerce to <4 x i8>
+  %cmp = icmp slt <4 x i8> %0, %1
+  %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+  %4 = bitcast <4 x i8> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4i8_le_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v4i8_le_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %2 = bitcast i32 %a2.coerce to <4 x i8>
+  %3 = bitcast i32 %a3.coerce to <4 x i8>
+  %cmp = icmp sle <4 x i8> %0, %1
+  %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+  %4 = bitcast <4 x i8> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4i8_gt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v4i8_gt_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %2 = bitcast i32 %a2.coerce to <4 x i8>
+  %3 = bitcast i32 %a3.coerce to <4 x i8>
+  %cmp = icmp sgt <4 x i8> %0, %1
+  %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+  %4 = bitcast <4 x i8> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: select_v4i8_ge_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @select_v4i8_ge_(i32 %a0.coerce, i32 %a1.coerce, i32 %a2.coerce, i32 %a3.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %2 = bitcast i32 %a2.coerce to <4 x i8>
+  %3 = bitcast i32 %a3.coerce to <4 x i8>
+  %cmp = icmp sge <4 x i8> %0, %1
+  %or = select <4 x i1> %cmp, <4 x i8> %2, <4 x i8> %3
+  %4 = bitcast <4 x i8> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2q15_eq_:
+; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v2q15_eq_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %cmp = icmp eq <2 x i16> %0, %1
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  %2 = bitcast <2 x i16> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2q15_lt_:
+; CHECK: cmp.lt.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v2q15_lt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %cmp = icmp slt <2 x i16> %0, %1
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  %2 = bitcast <2 x i16> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2q15_le_:
+; CHECK: cmp.le.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v2q15_le_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %cmp = icmp sle <2 x i16> %0, %1
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  %2 = bitcast <2 x i16> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2q15_ne_:
+; CHECK: cmp.eq.ph ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v2q15_ne_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %cmp = icmp ne <2 x i16> %0, %1
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  %2 = bitcast <2 x i16> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2q15_gt_:
+; CHECK: cmp.le.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v2q15_gt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %cmp = icmp sgt <2 x i16> %0, %1
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  %2 = bitcast <2 x i16> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2q15_ge_:
+; CHECK: cmp.lt.ph $4, $5
+; CHECK: pick.ph ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v2q15_ge_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %cmp = icmp sge <2 x i16> %0, %1
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  %2 = bitcast <2 x i16> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4ui8_eq_:
+; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v4ui8_eq_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %cmp = icmp eq <4 x i8> %0, %1
+  %sext = sext <4 x i1> %cmp to <4 x i8>
+  %2 = bitcast <4 x i8> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4ui8_lt_:
+; CHECK: cmpu.lt.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v4ui8_lt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %cmp = icmp ult <4 x i8> %0, %1
+  %sext = sext <4 x i1> %cmp to <4 x i8>
+  %2 = bitcast <4 x i8> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4ui8_le_:
+; CHECK: cmpu.le.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v4ui8_le_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %cmp = icmp ule <4 x i8> %0, %1
+  %sext = sext <4 x i1> %cmp to <4 x i8>
+  %2 = bitcast <4 x i8> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4ui8_ne_:
+; CHECK: cmpu.eq.qb ${{[0-9]+}}, ${{[0-9]+}}
+; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v4ui8_ne_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %cmp = icmp ne <4 x i8> %0, %1
+  %sext = sext <4 x i1> %cmp to <4 x i8>
+  %2 = bitcast <4 x i8> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4ui8_gt_:
+; CHECK: cmpu.le.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v4ui8_gt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %cmp = icmp ugt <4 x i8> %0, %1
+  %sext = sext <4 x i1> %cmp to <4 x i8>
+  %2 = bitcast <4 x i8> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4ui8_ge_:
+; CHECK: cmpu.lt.qb $4, $5
+; CHECK: pick.qb ${{[0-9]+}}, ${{[a-z0-9]+}}, ${{[a-z0-9]+}}
+
+define { i32 } @compare_v4ui8_ge_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %cmp = icmp uge <4 x i8> %0, %1
+  %sext = sext <4 x i1> %cmp to <4 x i8>
+  %2 = bitcast <4 x i8> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2ui16_lt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v2ui16_lt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %cmp = icmp ult <2 x i16> %0, %1
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  %2 = bitcast <2 x i16> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2ui16_le_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v2ui16_le_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %cmp = icmp ule <2 x i16> %0, %1
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  %2 = bitcast <2 x i16> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2ui16_gt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v2ui16_gt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %cmp = icmp ugt <2 x i16> %0, %1
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  %2 = bitcast <2 x i16> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v2ui16_ge_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v2ui16_ge_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %1 = bitcast i32 %a1.coerce to <2 x i16>
+  %cmp = icmp uge <2 x i16> %0, %1
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  %2 = bitcast <2 x i16> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4i8_lt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v4i8_lt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %cmp = icmp slt <4 x i8> %0, %1
+  %sext = sext <4 x i1> %cmp to <4 x i8>
+  %2 = bitcast <4 x i8> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4i8_le_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v4i8_le_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %cmp = icmp sle <4 x i8> %0, %1
+  %sext = sext <4 x i1> %cmp to <4 x i8>
+  %2 = bitcast <4 x i8> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4i8_gt_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v4i8_gt_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %cmp = icmp sgt <4 x i8> %0, %1
+  %sext = sext <4 x i1> %cmp to <4 x i8>
+  %2 = bitcast <4 x i8> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; CHECK: compare_v4i8_ge_:
+; CHECK-NOT: cmp
+; CHECK-NOT: pick
+
+define { i32 } @compare_v4i8_ge_(i32 %a0.coerce, i32 %a1.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %1 = bitcast i32 %a1.coerce to <4 x i8>
+  %cmp = icmp sge <4 x i8> %0, %1
+  %sext = sext <4 x i1> %cmp to <4 x i8>
+  %2 = bitcast <4 x i8> %sext to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
diff --git a/test/CodeGen/Mips/dsp-patterns.ll b/test/CodeGen/Mips/dsp-patterns.ll
index 0752f69..eeb7140 100644
--- a/test/CodeGen/Mips/dsp-patterns.ll
+++ b/test/CodeGen/Mips/dsp-patterns.ll
@@ -1,7 +1,8 @@
-; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s
+; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s -check-prefix=R1
+; RUN: llc -march=mips -mattr=dspr2 < %s | FileCheck %s -check-prefix=R2
 
-; CHECK: test_lbux:
-; CHECK: lbux ${{[0-9]+}}
+; R1: test_lbux:
+; R1: lbux ${{[0-9]+}}
 
 define zeroext i8 @test_lbux(i8* nocapture %b, i32 %i) {
 entry:
@@ -10,8 +11,8 @@ entry:
   ret i8 %0
 }
 
-; CHECK: test_lhx:
-; CHECK: lhx ${{[0-9]+}}
+; R1: test_lhx:
+; R1: lhx ${{[0-9]+}}
 
 define signext i16 @test_lhx(i16* nocapture %b, i32 %i) {
 entry:
@@ -20,8 +21,8 @@ entry:
   ret i16 %0
 }
 
-; CHECK: test_lwx:
-; CHECK: lwx ${{[0-9]+}}
+; R1: test_lwx:
+; R1: lwx ${{[0-9]+}}
 
 define i32 @test_lwx(i32* nocapture %b, i32 %i) {
 entry:
@@ -29,3 +30,232 @@ entry:
   %0 = load i32* %add.ptr, align 4
   ret i32 %0
 }
+
+; R1: test_add_v2q15_:
+; R1: addq.ph ${{[0-9]+}}
+
+define { i32 } @test_add_v2q15_(i32 %a.coerce, i32 %b.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = bitcast i32 %b.coerce to <2 x i16>
+  %add = add <2 x i16> %0, %1
+  %2 = bitcast <2 x i16> %add to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: test_sub_v2q15_:
+; R1: subq.ph ${{[0-9]+}}
+
+define { i32 } @test_sub_v2q15_(i32 %a.coerce, i32 %b.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = bitcast i32 %b.coerce to <2 x i16>
+  %sub = sub <2 x i16> %0, %1
+  %2 = bitcast <2 x i16> %sub to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R2: test_mul_v2q15_:
+; R2: mul.ph ${{[0-9]+}}
+
+; mul.ph is an R2 instruction. Check that multiply node gets expanded.
+; R1: test_mul_v2q15_:
+; R1: mul ${{[0-9]+}}
+; R1: mul ${{[0-9]+}}
+
+define { i32 } @test_mul_v2q15_(i32 %a.coerce, i32 %b.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = bitcast i32 %b.coerce to <2 x i16>
+  %mul = mul <2 x i16> %0, %1
+  %2 = bitcast <2 x i16> %mul to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: test_add_v4i8_:
+; R1: addu.qb ${{[0-9]+}}
+
+define { i32 } @test_add_v4i8_(i32 %a.coerce, i32 %b.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <4 x i8>
+  %1 = bitcast i32 %b.coerce to <4 x i8>
+  %add = add <4 x i8> %0, %1
+  %2 = bitcast <4 x i8> %add to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: test_sub_v4i8_:
+; R1: subu.qb ${{[0-9]+}}
+
+define { i32 } @test_sub_v4i8_(i32 %a.coerce, i32 %b.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <4 x i8>
+  %1 = bitcast i32 %b.coerce to <4 x i8>
+  %sub = sub <4 x i8> %0, %1
+  %2 = bitcast <4 x i8> %sub to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; DSP-ASE doesn't have a v4i8 multiply instruction. Check that multiply node gets expanded.
+; R2: test_mul_v4i8_:
+; R2: mul ${{[0-9]+}}
+; R2: mul ${{[0-9]+}}
+; R2: mul ${{[0-9]+}}
+; R2: mul ${{[0-9]+}}
+
+define { i32 } @test_mul_v4i8_(i32 %a.coerce, i32 %b.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <4 x i8>
+  %1 = bitcast i32 %b.coerce to <4 x i8>
+  %mul = mul <4 x i8> %0, %1
+  %2 = bitcast <4 x i8> %mul to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %2, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: test_addsc:
+; R1: addsc ${{[0-9]+}}
+; R1: addwc ${{[0-9]+}}
+
+define i64 @test_addsc(i64 %a, i64 %b) {
+entry:
+  %add = add nsw i64 %b, %a
+  ret i64 %add
+}
+
+; R1: shift1_v2i16_shl_:
+; R1: shll.ph ${{[0-9]+}}, ${{[0-9]+}}, 15
+
+define { i32 } @shift1_v2i16_shl_(i32 %a0.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %shl = shl <2 x i16> %0, <i16 15, i16 15>
+  %1 = bitcast <2 x i16> %shl to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: shift1_v2i16_sra_:
+; R1: shra.ph ${{[0-9]+}}, ${{[0-9]+}}, 15
+
+define { i32 } @shift1_v2i16_sra_(i32 %a0.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %shr = ashr <2 x i16> %0, <i16 15, i16 15>
+  %1 = bitcast <2 x i16> %shr to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: shift1_v2ui16_srl_:
+; R1-NOT: shrl.ph
+; R2: shift1_v2ui16_srl_:
+; R2: shrl.ph ${{[0-9]+}}, ${{[0-9]+}}, 15
+
+define { i32 } @shift1_v2ui16_srl_(i32 %a0.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <2 x i16>
+  %shr = lshr <2 x i16> %0, <i16 15, i16 15>
+  %1 = bitcast <2 x i16> %shr to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: shift1_v4i8_shl_:
+; R1: shll.qb ${{[0-9]+}}, ${{[0-9]+}}, 7
+
+define { i32 } @shift1_v4i8_shl_(i32 %a0.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %shl = shl <4 x i8> %0, <i8 7, i8 7, i8 7, i8 7>
+  %1 = bitcast <4 x i8> %shl to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: shift1_v4i8_sra_:
+; R1-NOT: shra.qb
+; R2: shift1_v4i8_sra_:
+; R2: shra.qb ${{[0-9]+}}, ${{[0-9]+}}, 7
+
+define { i32 } @shift1_v4i8_sra_(i32 %a0.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %shr = ashr <4 x i8> %0, <i8 7, i8 7, i8 7, i8 7>
+  %1 = bitcast <4 x i8> %shr to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; R1: shift1_v4ui8_srl_:
+; R1: shrl.qb ${{[0-9]+}}, ${{[0-9]+}}, 7
+
+define { i32 } @shift1_v4ui8_srl_(i32 %a0.coerce) {
+entry:
+  %0 = bitcast i32 %a0.coerce to <4 x i8>
+  %shr = lshr <4 x i8> %0, <i8 7, i8 7, i8 7, i8 7>
+  %1 = bitcast <4 x i8> %shr to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; Check that shift node is expanded if splat element size is not 16-bit.
+;
+; R1: test_vector_splat_imm_v2q15:
+; R1-NOT: shll.ph
+
+define { i32 } @test_vector_splat_imm_v2q15(i32 %a.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %shl = shl <2 x i16> %0, <i16 0, i16 2>
+  %1 = bitcast <2 x i16> %shl to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; Check that shift node is expanded if splat element size is not 8-bit.
+;
+; R1: test_vector_splat_imm_v4i8:
+; R1-NOT: shll.qb
+
+define { i32 } @test_vector_splat_imm_v4i8(i32 %a.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <4 x i8>
+  %shl = shl <4 x i8> %0, <i8 0, i8 2, i8 0, i8 2>
+  %1 = bitcast <4 x i8> %shl to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; Check that shift node is expanded if shift amount doesn't fit in 4-bit sa field.
+;
+; R1: test_shift_amount_v2q15:
+; R1-NOT: shll.ph
+
+define { i32 } @test_shift_amount_v2q15(i32 %a.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %shl = shl <2 x i16> %0, <i16 16, i16 16>
+  %1 = bitcast <2 x i16> %shl to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
+
+; Check that shift node is expanded if shift amount doesn't fit in 3-bit sa field.
+;
+; R1: test_shift_amount_v4i8:
+; R1-NOT: shll.qb
+
+define { i32 } @test_shift_amount_v4i8(i32 %a.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <4 x i8>
+  %shl = shl <4 x i8> %0, <i8 8, i8 8, i8 8, i8 8>
+  %1 = bitcast <4 x i8> %shl to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0
+  ret { i32 } %.fca.0.insert
+}
diff --git a/test/CodeGen/Mips/dsp-r1.ll b/test/CodeGen/Mips/dsp-r1.ll
index c9dc8cf..acdd17d 100644
--- a/test/CodeGen/Mips/dsp-r1.ll
+++ b/test/CodeGen/Mips/dsp-r1.ll
@@ -772,6 +772,7 @@ entry:
 
   %0 = bitcast i32 %a0.coerce to <4 x i8>
   %1 = bitcast i32 %a1.coerce to <4 x i8>
+  tail call void @llvm.mips.wrdsp(i32 %i0, i32 16)
   %2 = tail call <4 x i8> @llvm.mips.pick.qb(<4 x i8> %0, <4 x i8> %1)
   %3 = bitcast <4 x i8> %2 to i32
   %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
@@ -786,6 +787,7 @@ entry:
 
   %0 = bitcast i32 %a0.coerce to <2 x i16>
   %1 = bitcast i32 %a1.coerce to <2 x i16>
+  tail call void @llvm.mips.wrdsp(i32 %i0, i32 16)
   %2 = tail call <2 x i16> @llvm.mips.pick.ph(<2 x i16> %0, <2 x i16> %1)
   %3 = bitcast <2 x i16> %2 to i32
   %.fca.0.insert = insertvalue { i32 } undef, i32 %3, 0
@@ -808,14 +810,6 @@ entry:
 
 declare <2 x i16> @llvm.mips.packrl.ph(<2 x i16>, <2 x i16>) nounwind readnone
 
-define i32 @test__builtin_mips_rddsp1(i32 %i0) nounwind readonly {
-entry:
-; CHECK: rddsp ${{[0-9]+}}
-
-  %0 = tail call i32 @llvm.mips.rddsp(i32 31)
-  ret i32 %0
-}
-
 define { i32 } @test__builtin_mips_shll_qb1(i32 %i0, i32 %a0.coerce) nounwind {
 entry:
 ; CHECK: shll.qb
@@ -1232,6 +1226,7 @@ declare i32 @llvm.mips.lwx(i8*, i32) nounwind readonly
 define i32 @test__builtin_mips_wrdsp1(i32 %i0, i32 %a0) nounwind {
 entry:
 ; CHECK: wrdsp ${{[0-9]+}}
+; CHECK: rddsp ${{[0-9]+}}
 
   tail call void @llvm.mips.wrdsp(i32 %a0, i32 31)
   %0 = tail call i32 @llvm.mips.rddsp(i32 31)
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
index d14150a..fc9e2ef 100644
--- a/test/CodeGen/Mips/eh.ll
+++ b/test/CodeGen/Mips/eh.ll
@@ -18,7 +18,7 @@ entry:
 
   %exception = tail call i8* @__cxa_allocate_exception(i32 8) nounwind
   %0 = bitcast i8* %exception to double*
-  store double 3.200000e+00, double* %0, align 8, !tbaa !0
+  store double 3.200000e+00, double* %0, align 8
   invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTId to i8*), i8* null) noreturn
           to label %unreachable unwind label %lpad
 
@@ -39,7 +39,7 @@ catch:                                            ; preds = %lpad
   %4 = bitcast i8* %3 to double*
   %exn.scalar = load double* %4, align 8
   %add = fadd double %exn.scalar, %i2
-  store double %add, double* @g1, align 8, !tbaa !0
+  store double %add, double* @g1, align 8
   tail call void @__cxa_end_catch() nounwind
   ret void
 
@@ -61,7 +61,3 @@ declare void @__cxa_throw(i8*, i8*, i8*)
 declare i8* @__cxa_begin_catch(i8*)
 
 declare void @__cxa_end_catch()
-
-!0 = metadata !{metadata !"double", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/Mips/fpneeded.ll b/test/CodeGen/Mips/fpneeded.ll
new file mode 100644
index 0000000..623883a
--- /dev/null
+++ b/test/CodeGen/Mips/fpneeded.ll
@@ -0,0 +1,149 @@
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-os16  | FileCheck %s -check-prefix=32
+
+@x = global float 1.000000e+00, align 4
+@y = global float 2.000000e+00, align 4
+@zz = common global float 0.000000e+00, align 4
+@z = common global float 0.000000e+00, align 4
+
+define float @fv() #0 {
+entry:
+  ret float 1.000000e+00
+}
+
+; 32: 	.set	nomips16                  # @fv
+; 32: 	.ent	fv
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	fv
+
+define double @dv() #0 {
+entry:
+  ret double 2.000000e+00
+}
+
+; 32: 	.set	nomips16                  # @dv
+; 32: 	.ent	dv
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	dv
+
+define void @vf(float %x) #0 {
+entry:
+  %x.addr = alloca float, align 4
+  store float %x, float* %x.addr, align 4
+  ret void
+}
+
+; 32: 	.set	nomips16                  # @vf
+; 32: 	.ent	vf
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	vf
+
+define void @vd(double %x) #0 {
+entry:
+  %x.addr = alloca double, align 8
+  store double %x, double* %x.addr, align 8
+  ret void
+}
+
+; 32: 	.set	nomips16                  # @vd
+; 32: 	.ent	vd
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	vd
+
+define void @foo1() #0 {
+entry:
+  store float 1.000000e+00, float* @zz, align 4
+  %0 = load float* @y, align 4
+  %1 = load float* @x, align 4
+  %add = fadd float %0, %1
+  store float %add, float* @z, align 4
+  ret void
+}
+
+; 32: 	.set	nomips16                  # @foo1
+; 32: 	.ent	foo1
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	foo1
+
+define void @foo2() #0 {
+entry:
+  %0 = load float* @x, align 4
+  call void @vf(float %0)
+  ret void
+}
+
+
+; 32: 	.set	nomips16                  # @foo2
+; 32: 	.ent	foo2
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	foo2
+
+define void @foo3() #0 {
+entry:
+  %call = call float @fv()
+  store float %call, float* @x, align 4
+  ret void
+}
+
+; 32: 	.set	nomips16                  # @foo3
+; 32: 	.ent	foo3
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	foo3
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+define void @vv() #0 {
+entry:
+  ret void
+}
+
+; 32: 	.set	mips16                  # @vv
+; 32: 	.ent	vv
+
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	vv
+
+
+
diff --git a/test/CodeGen/Mips/fpnotneeded.ll b/test/CodeGen/Mips/fpnotneeded.ll
new file mode 100644
index 0000000..dc2ec10
--- /dev/null
+++ b/test/CodeGen/Mips/fpnotneeded.ll
@@ -0,0 +1,77 @@
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-os16  | FileCheck %s -check-prefix=32
+
+@i = global i32 1, align 4
+@f = global float 1.000000e+00, align 4
+
+define void @vv() #0 {
+entry:
+  ret void
+}
+
+; 32: 	.set	mips16                  # @vv
+; 32: 	.ent	vv
+
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	vv
+
+define i32 @iv() #0 {
+entry:
+  %0 = load i32* @i, align 4
+  ret i32 %0
+}
+
+; 32: 	.set	mips16                  # @iv
+; 32: 	.ent	iv
+
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	iv
+
+define void @vif(i32 %i, float %f) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  %f.addr = alloca float, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store float %f, float* %f.addr, align 4
+  ret void
+}
+
+; 32: 	.set	mips16                  # @vif
+; 32: 	.ent	vif
+
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	vif
+
+define void @foo() #0 {
+entry:
+  store float 2.000000e+00, float* @f, align 4
+  ret void
+}
+
+; 32: 	.set	mips16                  # @foo
+; 32: 	.ent	foo
+
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	foo
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+
+define float @fv() #0 {
+entry:
+  ret float 1.000000e+00
+}
+
+; 32: 	.set	nomips16                  # @fv
+; 32: 	.ent	fv
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	fv
diff --git a/test/CodeGen/Mips/inlineasmmemop.ll b/test/CodeGen/Mips/inlineasmmemop.ll
index 1c7c443..a08a024 100644
--- a/test/CodeGen/Mips/inlineasmmemop.ll
+++ b/test/CodeGen/Mips/inlineasmmemop.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s
 
+; Simple memory
 @g1 = external global i32
 
 define i32 @f1(i32 %x) nounwind {
@@ -21,3 +22,42 @@ entry:
   ret i32 %0
 }
 
+; "D": Second word of double word. This works for any memory element
+; double or single.
+; CHECK: #APP
+; CHECK-NEXT: lw ${{[0-9]+}},4(${{[0-9]+}});
+; CHECK-NEXT: #NO_APP
+
+; No "D": First word of double word. This works for any memory element 
+; double or single.
+; CHECK: #APP
+; CHECK-NEXT: lw ${{[0-9]+}},0(${{[0-9]+}});
+; CHECK-NEXT: #NO_APP
+
+;int b[8] = {0,1,2,3,4,5,6,7};
+;int main()
+;{
+;  int i;
+; 
+;  // The first word. Notice, no 'D'
+;  { asm (
+;    "lw    %0,%1;\n"
+;    : "=r" (i) : "m" (*(b+4)));}
+; 
+;  // The second word
+;  { asm (
+;    "lw    %0,%D1;\n"
+;    : "=r" (i) "m" (*(b+4)));}
+;}
+
+@b = common global [20 x i32] zeroinitializer, align 4
+
+define void @main() {
+entry:
+  tail call void asm sideeffect "    lw    $0,${1:D};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32]* @b, i32 0, i32 3))
+  tail call void asm sideeffect "    lw    $0,${1};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32]* @b, i32 0, i32 3))
+  ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/Mips/mips16_32_1.ll b/test/CodeGen/Mips/mips16_32_1.ll
new file mode 100644
index 0000000..6f4826e
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_1.ll
@@ -0,0 +1,14 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s -mips-mixed-16-32  | FileCheck %s 
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=pic -O3 < %s -mips-mixed-16-32  | FileCheck %s 
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; CHECK: 	.set	mips16                  # @foo
+; CHECK:	.ent	foo
+; CHECK:	save	{{.+}}
+; CHECK:	restore	{{.+}} 
+; CHECK:	.end	foo
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_10.ll b/test/CodeGen/Mips/mips16_32_10.ll
new file mode 100644
index 0000000..330dbfe
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_10.ll
@@ -0,0 +1,59 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+; 16: 	.set	nomips16                  # @foo
+; 16: 	.ent	foo
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	nop
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	foo
+
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 16: 	.set	mips16                  # @nofoo
+; 16: 	.ent	nofoo
+
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	nofoo
+
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 16: 	.set	nomips16                  # @main
+; 16: 	.ent	main
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	main
+
+
+
+
+
+
+
+
+
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false"  "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_3.ll b/test/CodeGen/Mips/mips16_32_3.ll
new file mode 100644
index 0000000..8874a88
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_3.ll
@@ -0,0 +1,70 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; 16: 	.set	mips16                  # @foo
+; 16: 	.ent	foo
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	foo
+; 32: 	.set	mips16                  # @foo
+; 32: 	.ent	foo
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	foo
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.ent	nofoo
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	nop
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	nofoo
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 16: 	.set	mips16                  # @main
+; 16: 	.ent	main
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	main
+; 32: 	.set	nomips16                  # @main
+; 32: 	.ent	main
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	addiu	$2, $zero, 0
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	main
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_4.ll b/test/CodeGen/Mips/mips16_32_4.ll
new file mode 100644
index 0000000..cdaed6c
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_4.ll
@@ -0,0 +1,65 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; 16: 	.set	mips16                  # @foo
+; 16: 	.ent	foo
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	foo
+; 32: 	.set	mips16                  # @foo
+; 32: 	.ent	foo
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	foo
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.ent	nofoo
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	nop
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	nofoo
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 16: 	.set	mips16                  # @main
+; 16: 	.ent	main
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	main
+; 32: 	.set	mips16                  # @main
+; 32: 	.ent	main
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	main
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_5.ll b/test/CodeGen/Mips/mips16_32_5.ll
new file mode 100644
index 0000000..45e0bf4
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_5.ll
@@ -0,0 +1,80 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; 16: 	.set	mips16                  # @foo
+; 16: 	.ent	foo
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	foo
+; 32: 	.set	mips16                  # @foo
+; 32: 	.ent	foo
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	foo
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.ent	nofoo
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	nop
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	nofoo
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 16: 	.set	nomips16                  # @main
+; 16: 	.ent	main
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	addiu	$2, $zero, 0
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	main
+
+; 32: 	.set	nomips16                  # @main
+; 32: 	.ent	main
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	addiu	$2, $zero, 0
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	main
+
+
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_6.ll b/test/CodeGen/Mips/mips16_32_6.ll
new file mode 100644
index 0000000..f4b8e7a
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_6.ll
@@ -0,0 +1,86 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; 16: 	.set	mips16                  # @foo
+; 16: 	.ent	foo
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	foo
+; 32: 	.set	nomips16                  # @foo
+; 32: 	.ent	foo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end    foo
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.ent	nofoo
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	nop
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	nofoo
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 16: 	.set	nomips16                  # @main
+; 16: 	.ent	main
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	addiu	$2, $zero, 0
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	main
+
+; 32: 	.set	nomips16                  # @main
+; 32: 	.ent	main
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	addiu	$2, $zero, 0
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	main
+
+
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false"  "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "nomips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_7.ll b/test/CodeGen/Mips/mips16_32_7.ll
new file mode 100644
index 0000000..f8726ea
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_7.ll
@@ -0,0 +1,76 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; 16: 	.set	mips16                  # @foo
+; 16: 	.ent	foo
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	foo
+; 32: 	.set	nomips16                  # @foo
+; 32: 	.ent	foo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end    foo
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 16: 	.set	nomips16                  # @nofoo
+; 16: 	.ent	nofoo
+; 16:	.set	noreorder
+; 16:	.set	nomacro
+; 16:	.set	noat
+; 16:	jr	$ra
+; 16:	nop
+; 16:	.set	at
+; 16:	.set	macro
+; 16:	.set	reorder
+; 16:	.end	nofoo
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 16: 	.set	mips16                  # @main
+; 16: 	.ent	main
+; 16:	save	{{.+}}
+; 16:	restore	{{.+}} 
+; 16:	.end	main
+
+; 32: 	.set	mips16                  # @main
+; 32: 	.ent	main
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	main
+
+
+
+
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false"  "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_8.ll b/test/CodeGen/Mips/mips16_32_8.ll
new file mode 100644
index 0000000..e51f296
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_8.ll
@@ -0,0 +1,74 @@
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+@x = global float 1.000000e+00, align 4
+@y = global float 0x4007333340000000, align 4
+@i = common global i32 0, align 4
+@f = common global float 0.000000e+00, align 4
+@.str = private unnamed_addr constant [8 x i8] c"f = %f\0A\00", align 1
+@.str1 = private unnamed_addr constant [11 x i8] c"hello %i \0A\00", align 1
+@.str2 = private unnamed_addr constant [13 x i8] c"goodbye %i \0A\00", align 1
+
+define void @foo() #0 {
+entry:
+  store i32 10, i32* @i, align 4
+  ret void
+}
+
+; 32: 	.set	mips16                  # @foo
+; 32: 	.ent	foo
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	foo
+
+define void @nofoo() #1 {
+entry:
+  store i32 20, i32* @i, align 4
+  %0 = load float* @x, align 4
+  %1 = load float* @y, align 4
+  %add = fadd float %0, %1
+  store float %add, float* @f, align 4
+  %2 = load float* @f, align 4
+  %conv = fpext float %2 to double
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), double %conv)
+  ret void
+}
+
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	add.s	{{.+}}
+; 32:	mfc1    {{.+}}
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+declare i32 @printf(i8*, ...) #2
+
+define i32 @main() #3 {
+entry:
+  call void @foo()
+  %0 = load i32* @i, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str1, i32 0, i32 0), i32 %0)
+  call void @nofoo()
+  %1 = load i32* @i, align 4
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str2, i32 0, i32 0), i32 %1)
+  ret i32 0
+}
+
+; 32: 	.set	nomips16                  # @main
+; 32: 	.ent	main
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	main
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/mips16_32_9.ll b/test/CodeGen/Mips/mips16_32_9.ll
new file mode 100644
index 0000000..f5ff368
--- /dev/null
+++ b/test/CodeGen/Mips/mips16_32_9.ll
@@ -0,0 +1,51 @@
+; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
+
+define void @foo() #0 {
+entry:
+  ret void
+}
+
+; 32: 	.set	mips16                  # @foo
+; 32: 	.ent	foo
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	foo
+define void @nofoo() #1 {
+entry:
+  ret void
+}
+
+; 32: 	.set	nomips16                  # @nofoo
+; 32: 	.ent	nofoo
+; 32:	.set	noreorder
+; 32:	.set	nomacro
+; 32:	.set	noat
+; 32:	jr	$ra
+; 32:	nop
+; 32:	.set	at
+; 32:	.set	macro
+; 32:	.set	reorder
+; 32:	.end	nofoo
+define i32 @main() #2 {
+entry:
+  ret i32 0
+}
+
+; 32: 	.set	mips16                  # @main
+; 32: 	.ent	main
+; 32:	save	{{.+}}
+; 32:	restore	{{.+}} 
+; 32:	.end	main
+
+
+
+
+
+
+
+
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false"  "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/select.ll b/test/CodeGen/Mips/select.ll
index 40115be..06e2a86 100644
--- a/test/CodeGen/Mips/select.ll
+++ b/test/CodeGen/Mips/select.ll
@@ -130,8 +130,8 @@ define i32 @sel12(i32 %f0, i32 %f1) nounwind readonly {
 entry:
 ; CHECK: c.eq.d
 ; CHECK: movt
-  %tmp = load double* @d2, align 8, !tbaa !0
-  %tmp1 = load double* @d3, align 8, !tbaa !0
+  %tmp = load double* @d2, align 8
+  %tmp1 = load double* @d3, align 8
   %cmp = fcmp oeq double %tmp, %tmp1
   %cond = select i1 %cmp, i32 %f0, i32 %f1
   ret i32 %cond
@@ -141,8 +141,8 @@ define i32 @sel13(i32 %f0, i32 %f1) nounwind readonly {
 entry:
 ; CHECK: c.olt.d
 ; CHECK: movt
-  %tmp = load double* @d2, align 8, !tbaa !0
-  %tmp1 = load double* @d3, align 8, !tbaa !0
+  %tmp = load double* @d2, align 8
+  %tmp1 = load double* @d3, align 8
   %cmp = fcmp olt double %tmp, %tmp1
   %cond = select i1 %cmp, i32 %f0, i32 %f1
   ret i32 %cond
@@ -152,13 +152,9 @@ define i32 @sel14(i32 %f0, i32 %f1) nounwind readonly {
 entry:
 ; CHECK: c.ule.d
 ; CHECK: movf
-  %tmp = load double* @d2, align 8, !tbaa !0
-  %tmp1 = load double* @d3, align 8, !tbaa !0
+  %tmp = load double* @d2, align 8
+  %tmp1 = load double* @d3, align 8
   %cmp = fcmp ogt double %tmp, %tmp1
   %cond = select i1 %cmp, i32 %f0, i32 %f1
   ret i32 %cond
 }
-
-!0 = metadata !{metadata !"double", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/Mips/spill-copy-acreg.ll b/test/CodeGen/Mips/spill-copy-acreg.ll
new file mode 100644
index 0000000..6563a5c
--- /dev/null
+++ b/test/CodeGen/Mips/spill-copy-acreg.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=mipsel -mattr=+dsp < %s
+
+@g1 = common global i64 0, align 8
+@g2 = common global i64 0, align 8
+@g3 = common global i64 0, align 8
+
+define i64 @test_acreg_copy(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
+entry:
+  %0 = load i64* @g1, align 8
+  %1 = tail call i64 @llvm.mips.maddu(i64 %0, i32 %a0, i32 %a1)
+  %2 = tail call i64 @llvm.mips.maddu(i64 %0, i32 %a2, i32 %a3)
+  store i64 %1, i64* @g1, align 8
+  store i64 %2, i64* @g2, align 8
+  tail call void @foo1()
+  store i64 %2, i64* @g3, align 8
+  ret i64 %1
+}
+
+declare i64 @llvm.mips.maddu(i64, i32, i32)
+
+declare void @foo1()
+
+@g4 = common global <2 x i16> zeroinitializer, align 4
+@g5 = common global <2 x i16> zeroinitializer, align 4
+@g6 = common global <2 x i16> zeroinitializer, align 4
+
+define { i32 } @test_ccond_spill(i32 %a.coerce, i32 %b.coerce) {
+entry:
+  %0 = bitcast i32 %a.coerce to <2 x i16>
+  %1 = bitcast i32 %b.coerce to <2 x i16>
+  %cmp3 = icmp slt <2 x i16> %0, %1
+  %sext = sext <2 x i1> %cmp3 to <2 x i16>
+  store <2 x i16> %sext, <2 x i16>* @g4, align 4
+  tail call void @foo1()
+  %2 = load <2 x i16>* @g5, align 4
+  %3 = load <2 x i16>* @g6, align 4
+  %or = select <2 x i1> %cmp3, <2 x i16> %2, <2 x i16> %3
+  %4 = bitcast <2 x i16> %or to i32
+  %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
+  ret { i32 } %.fca.0.insert
+}
diff --git a/test/CodeGen/Mips/tnaked.ll b/test/CodeGen/Mips/tnaked.ll
new file mode 100644
index 0000000..f5bdd91
--- /dev/null
+++ b/test/CodeGen/Mips/tnaked.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+
+define void @tnaked() #0 {
+entry:
+  ret void
+}
+
+; CHECK: 	.ent	tnaked
+; CHECK:          tnaked: 
+; CHECK-NOT:	.frame	{{.*}}
+; CHECK-NOT:     .mask 	{{.*}}
+; CHECK-NOT:	.fmask	{{.*}}
+; CHECK-NOT:	 addiu	$sp, $sp, -8
+
+define void @tnonaked() #1 {
+entry:
+  ret void
+}
+
+; CHECK: 	.ent	tnonaked
+; CHECK:         tnonaked: 
+; CHECK:	.frame	$fp,8,$ra
+; CHECK:        .mask 	0x40000000,-4
+; CHECK:	.fmask	0x00000000,0
+; CHECK: 	addiu	$sp, $sp, -8
+
+attributes #0 = { naked noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/zeroreg.ll b/test/CodeGen/Mips/zeroreg.ll
index 79ed609..e0e93e2 100644
--- a/test/CodeGen/Mips/zeroreg.ll
+++ b/test/CodeGen/Mips/zeroreg.ll
@@ -6,7 +6,7 @@ define i32 @foo0(i32 %s) nounwind readonly {
 entry:
 ; CHECK:     movn ${{[0-9]+}}, $zero
   %tobool = icmp ne i32 %s, 0
-  %0 = load i32* @g1, align 4, !tbaa !0
+  %0 = load i32* @g1, align 4
   %cond = select i1 %tobool, i32 0, i32 %0
   ret i32 %cond
 }
@@ -15,11 +15,7 @@ define i32 @foo1(i32 %s) nounwind readonly {
 entry:
 ; CHECK:     movz ${{[0-9]+}}, $zero
   %tobool = icmp ne i32 %s, 0
-  %0 = load i32* @g1, align 4, !tbaa !0
+  %0 = load i32* @g1, align 4
   %cond = select i1 %tobool, i32 %0, i32 0
   ret i32 %cond
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/NVPTX/generic-to-nvvm.ll b/test/CodeGen/NVPTX/generic-to-nvvm.ll
new file mode 100644
index 0000000..c9cb2f7
--- /dev/null
+++ b/test/CodeGen/NVPTX/generic-to-nvvm.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+; Ensure global variables in address space 0 are promoted to address space 1
+
+; CHECK: .global .align 4 .u32 myglobal = 42;
+@myglobal = internal global i32 42, align 4
+; CHECK: .global .align 4 .u32 myconst = 42;
+@myconst = internal constant i32 42, align 4
+
+
+define void @foo(i32* %a, i32* %b) {
+; CHECK: cvta.global.u32
+  %ld1 = load i32* @myglobal
+; CHECK: cvta.global.u32
+  %ld2 = load i32* @myconst
+  store i32 %ld1, i32* %a
+  store i32 %ld2, i32* %b
+  ret void
+}
+
+
+!nvvm.annotations = !{!0}
+!0 = metadata !{void (i32*, i32*)* @foo, metadata !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/i1-global.ll b/test/CodeGen/NVPTX/i1-global.ll
new file mode 100644
index 0000000..0595325
--- /dev/null
+++ b/test/CodeGen/NVPTX/i1-global.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+
+; CHECK: .visible .global .align 1 .u8 mypred
+@mypred = addrspace(1) global i1 true, align 1
+
+
+define void @foo(i1 %p, i32* %out) {
+  %ld = load i1 addrspace(1)* @mypred
+  %val = zext i1 %ld to i32
+  store i32 %val, i32* %out
+  ret void
+}
+
+
+!nvvm.annotations = !{!0}
+!0 = metadata !{void (i1, i32*)* @foo, metadata !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/i1-param.ll b/test/CodeGen/NVPTX/i1-param.ll
new file mode 100644
index 0000000..fabd61a
--- /dev/null
+++ b/test/CodeGen/NVPTX/i1-param.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+; Make sure predicate (i1) operands to kernels get expanded out to .u8
+
+; CHECK: .entry foo
+; CHECK:   .param .u8 foo_param_0
+; CHECK:   .param .u32 foo_param_1
+define void @foo(i1 %p, i32* %out) {
+  %val = zext i1 %p to i32
+  store i32 %val, i32* %out
+  ret void
+}
+
+
+!nvvm.annotations = !{!0}
+!0 = metadata !{void (i1, i32*)* @foo, metadata !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll
index 8b0357b..1676f20 100644
--- a/test/CodeGen/NVPTX/intrinsics.ll
+++ b/test/CodeGen/NVPTX/intrinsics.ll
@@ -15,5 +15,12 @@ define ptx_device double @test_fabs(double %d) {
 	ret double %x
 }
 
+define float @test_nvvm_sqrt(float %a) {
+  %val = call float @llvm.nvvm.sqrt.f(float %a)
+  ret float %val
+}
+
+
 declare float @llvm.fabs.f32(float)
 declare double @llvm.fabs.f64(double)
+declare float @llvm.nvvm.sqrt.f(float)
diff --git a/test/CodeGen/NVPTX/refl1.ll b/test/CodeGen/NVPTX/refl1.ll
new file mode 100644
index 0000000..5a9dac1
--- /dev/null
+++ b/test/CodeGen/NVPTX/refl1.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s
+
+; Function Attrs: nounwind
+; CHECK: .entry foo
+define void @foo(float* nocapture %a) #0 {
+  %val = load float* %a
+  %tan = tail call fastcc float @__nv_fast_tanf(float %val)
+  store float %tan, float* %a
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.nvvm.sin.approx.ftz.f(float) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.nvvm.cos.approx.ftz.f(float) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.nvvm.div.approx.ftz.f(float, float) #1
+
+; Function Attrs: alwaysinline inlinehint nounwind readnone
+; CHECK: .func (.param .b32 func_retval0) __nv_fast_tanf
+define internal fastcc float @__nv_fast_tanf(float %a) #2 {
+entry:
+  %0 = tail call float @llvm.nvvm.sin.approx.ftz.f(float %a)
+  %1 = tail call float @llvm.nvvm.cos.approx.ftz.f(float %a)
+  %2 = tail call float @llvm.nvvm.div.approx.ftz.f(float %0, float %1)
+  ret float %2
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { alwaysinline inlinehint nounwind readnone }
+
+!nvvm.annotations = !{!0}
+
+!0 = metadata !{void (float*)* @foo, metadata !"kernel", i32 1}
diff --git a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
index ea7de98..40f46fd 100644
--- a/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
+++ b/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 | grep lwzx
+; RUN: llc < %s -march=ppc64 | FileCheck %s
 
         %struct.__db_region = type { %struct.__mutex_t, [4 x i8], %struct.anon, i32, [1 x i32] }
         %struct.__mutex_t = type { i32 }
@@ -11,6 +11,10 @@ entry:
         %tmp = load i32* %ttype, align 4                ; <i32> [#uses=1]
         %tmp1 = call i32 (...)* @bork( i32 %tmp )               ; <i32> [#uses=0]
         ret void
+
+; CHECK: @foo
+; CHECK: lwzx
+; CHECK: blr
 }
 
 declare i32 @bork(...)
diff --git a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
index 47d985c..3acd01d 100644
--- a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
+++ b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
@@ -47,11 +47,11 @@ for.body4.us:                                     ; preds = %for.body4.lr.ph.us,
   %sext = shl i64 %sub5.us, 32
   %idxprom.us = ashr exact i64 %sext, 32
   %arrayidx.us = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us
-  %2 = load float* %arrayidx.us, align 4, !tbaa !5
+  %2 = load float* %arrayidx.us, align 4
   %arrayidx7.us = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv
-  %3 = load float* %arrayidx7.us, align 4, !tbaa !5
+  %3 = load float* %arrayidx7.us, align 4
   %add8.us = fadd float %3, %2
-  store float %add8.us, float* %arrayidx7.us, align 4, !tbaa !5
+  store float %add8.us, float* %arrayidx7.us, align 4
   %indvars.iv.next = add i64 %indvars.iv, %1
   %4 = trunc i64 %indvars.iv.next to i32
   %cmp3.us = icmp slt i32 %4, 32000
@@ -82,11 +82,11 @@ for.body4.us.1:                                   ; preds = %for.body4.us.1, %fo
   %sext23 = shl i64 %sub5.us.1, 32
   %idxprom.us.1 = ashr exact i64 %sext23, 32
   %arrayidx.us.1 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.1
-  %5 = load float* %arrayidx.us.1, align 4, !tbaa !5
+  %5 = load float* %arrayidx.us.1, align 4
   %arrayidx7.us.1 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.1
-  %6 = load float* %arrayidx7.us.1, align 4, !tbaa !5
+  %6 = load float* %arrayidx7.us.1, align 4
   %add8.us.1 = fadd float %6, %5
-  store float %add8.us.1, float* %arrayidx7.us.1, align 4, !tbaa !5
+  store float %add8.us.1, float* %arrayidx7.us.1, align 4
   %indvars.iv.next.1 = add i64 %indvars.iv.1, %1
   %7 = trunc i64 %indvars.iv.next.1 to i32
   %cmp3.us.1 = icmp slt i32 %7, 32000
@@ -104,11 +104,11 @@ for.body4.us.2:                                   ; preds = %for.body4.us.2, %fo
   %sext24 = shl i64 %sub5.us.2, 32
   %idxprom.us.2 = ashr exact i64 %sext24, 32
   %arrayidx.us.2 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.2
-  %8 = load float* %arrayidx.us.2, align 4, !tbaa !5
+  %8 = load float* %arrayidx.us.2, align 4
   %arrayidx7.us.2 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.2
-  %9 = load float* %arrayidx7.us.2, align 4, !tbaa !5
+  %9 = load float* %arrayidx7.us.2, align 4
   %add8.us.2 = fadd float %9, %8
-  store float %add8.us.2, float* %arrayidx7.us.2, align 4, !tbaa !5
+  store float %add8.us.2, float* %arrayidx7.us.2, align 4
   %indvars.iv.next.2 = add i64 %indvars.iv.2, %1
   %10 = trunc i64 %indvars.iv.next.2 to i32
   %cmp3.us.2 = icmp slt i32 %10, 32000
@@ -126,11 +126,11 @@ for.body4.us.3:                                   ; preds = %for.body4.us.3, %fo
   %sext25 = shl i64 %sub5.us.3, 32
   %idxprom.us.3 = ashr exact i64 %sext25, 32
   %arrayidx.us.3 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.3
-  %11 = load float* %arrayidx.us.3, align 4, !tbaa !5
+  %11 = load float* %arrayidx.us.3, align 4
   %arrayidx7.us.3 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.3
-  %12 = load float* %arrayidx7.us.3, align 4, !tbaa !5
+  %12 = load float* %arrayidx7.us.3, align 4
   %add8.us.3 = fadd float %12, %11
-  store float %add8.us.3, float* %arrayidx7.us.3, align 4, !tbaa !5
+  store float %add8.us.3, float* %arrayidx7.us.3, align 4
   %indvars.iv.next.3 = add i64 %indvars.iv.3, %1
   %13 = trunc i64 %indvars.iv.next.3 to i32
   %cmp3.us.3 = icmp slt i32 %13, 32000
@@ -148,11 +148,11 @@ for.body4.us.4:                                   ; preds = %for.body4.us.4, %fo
   %sext26 = shl i64 %sub5.us.4, 32
   %idxprom.us.4 = ashr exact i64 %sext26, 32
   %arrayidx.us.4 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.4
-  %14 = load float* %arrayidx.us.4, align 4, !tbaa !5
+  %14 = load float* %arrayidx.us.4, align 4
   %arrayidx7.us.4 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.4
-  %15 = load float* %arrayidx7.us.4, align 4, !tbaa !5
+  %15 = load float* %arrayidx7.us.4, align 4
   %add8.us.4 = fadd float %15, %14
-  store float %add8.us.4, float* %arrayidx7.us.4, align 4, !tbaa !5
+  store float %add8.us.4, float* %arrayidx7.us.4, align 4
   %indvars.iv.next.4 = add i64 %indvars.iv.4, %1
   %16 = trunc i64 %indvars.iv.next.4 to i32
   %cmp3.us.4 = icmp slt i32 %16, 32000
@@ -183,9 +183,4 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 
 declare i32 @puts(i8* nocapture) nounwind
 
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !3 = metadata !{metadata !"branch_weights", i32 64, i32 4}
-!4 = metadata !{metadata !"int", metadata !1}
-!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
index 52bf6c7..4a1a512 100644
--- a/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
+++ b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
@@ -35,7 +35,7 @@ entry:
 
 for.body:                                         ; preds = %for.end17, %entry
   %nl.041 = phi i32 [ 0, %entry ], [ %inc22, %for.end17 ]
-  %0 = load float* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0, i64 0), align 16, !tbaa !5
+  %0 = load float* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0, i64 0), align 16
   br label %for.cond5.preheader
 
 for.cond5.preheader:                              ; preds = %for.inc15, %for.body
@@ -51,7 +51,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %xindex.234 = phi i32 [ %xindex.138, %for.cond5.preheader ], [ %xindex.3.15, %for.body7 ]
   %yindex.233 = phi i32 [ %yindex.137, %for.cond5.preheader ], [ %yindex.3.15, %for.body7 ]
   %arrayidx9 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv
-  %1 = load float* %arrayidx9, align 16, !tbaa !5
+  %1 = load float* %arrayidx9, align 16
   %cmp10 = fcmp ogt float %1, %max.235
   %2 = trunc i64 %indvars.iv to i32
   %yindex.3 = select i1 %cmp10, i32 %2, i32 %yindex.233
@@ -60,7 +60,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3 = select i1 %cmp10, float %1, float %max.235
   %indvars.iv.next45 = or i64 %indvars.iv, 1
   %arrayidx9.1 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next45
-  %4 = load float* %arrayidx9.1, align 4, !tbaa !5
+  %4 = load float* %arrayidx9.1, align 4
   %cmp10.1 = fcmp ogt float %4, %max.3
   %5 = trunc i64 %indvars.iv.next45 to i32
   %yindex.3.1 = select i1 %cmp10.1, i32 %5, i32 %yindex.3
@@ -68,7 +68,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.1 = select i1 %cmp10.1, float %4, float %max.3
   %indvars.iv.next.146 = or i64 %indvars.iv, 2
   %arrayidx9.2 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.146
-  %6 = load float* %arrayidx9.2, align 8, !tbaa !5
+  %6 = load float* %arrayidx9.2, align 8
   %cmp10.2 = fcmp ogt float %6, %max.3.1
   %7 = trunc i64 %indvars.iv.next.146 to i32
   %yindex.3.2 = select i1 %cmp10.2, i32 %7, i32 %yindex.3.1
@@ -76,7 +76,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.2 = select i1 %cmp10.2, float %6, float %max.3.1
   %indvars.iv.next.247 = or i64 %indvars.iv, 3
   %arrayidx9.3 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.247
-  %8 = load float* %arrayidx9.3, align 4, !tbaa !5
+  %8 = load float* %arrayidx9.3, align 4
   %cmp10.3 = fcmp ogt float %8, %max.3.2
   %9 = trunc i64 %indvars.iv.next.247 to i32
   %yindex.3.3 = select i1 %cmp10.3, i32 %9, i32 %yindex.3.2
@@ -84,7 +84,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.3 = select i1 %cmp10.3, float %8, float %max.3.2
   %indvars.iv.next.348 = or i64 %indvars.iv, 4
   %arrayidx9.4 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.348
-  %10 = load float* %arrayidx9.4, align 16, !tbaa !5
+  %10 = load float* %arrayidx9.4, align 16
   %cmp10.4 = fcmp ogt float %10, %max.3.3
   %11 = trunc i64 %indvars.iv.next.348 to i32
   %yindex.3.4 = select i1 %cmp10.4, i32 %11, i32 %yindex.3.3
@@ -92,7 +92,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.4 = select i1 %cmp10.4, float %10, float %max.3.3
   %indvars.iv.next.449 = or i64 %indvars.iv, 5
   %arrayidx9.5 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.449
-  %12 = load float* %arrayidx9.5, align 4, !tbaa !5
+  %12 = load float* %arrayidx9.5, align 4
   %cmp10.5 = fcmp ogt float %12, %max.3.4
   %13 = trunc i64 %indvars.iv.next.449 to i32
   %yindex.3.5 = select i1 %cmp10.5, i32 %13, i32 %yindex.3.4
@@ -100,7 +100,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.5 = select i1 %cmp10.5, float %12, float %max.3.4
   %indvars.iv.next.550 = or i64 %indvars.iv, 6
   %arrayidx9.6 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.550
-  %14 = load float* %arrayidx9.6, align 8, !tbaa !5
+  %14 = load float* %arrayidx9.6, align 8
   %cmp10.6 = fcmp ogt float %14, %max.3.5
   %15 = trunc i64 %indvars.iv.next.550 to i32
   %yindex.3.6 = select i1 %cmp10.6, i32 %15, i32 %yindex.3.5
@@ -108,7 +108,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.6 = select i1 %cmp10.6, float %14, float %max.3.5
   %indvars.iv.next.651 = or i64 %indvars.iv, 7
   %arrayidx9.7 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.651
-  %16 = load float* %arrayidx9.7, align 4, !tbaa !5
+  %16 = load float* %arrayidx9.7, align 4
   %cmp10.7 = fcmp ogt float %16, %max.3.6
   %17 = trunc i64 %indvars.iv.next.651 to i32
   %yindex.3.7 = select i1 %cmp10.7, i32 %17, i32 %yindex.3.6
@@ -116,7 +116,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.7 = select i1 %cmp10.7, float %16, float %max.3.6
   %indvars.iv.next.752 = or i64 %indvars.iv, 8
   %arrayidx9.8 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.752
-  %18 = load float* %arrayidx9.8, align 16, !tbaa !5
+  %18 = load float* %arrayidx9.8, align 16
   %cmp10.8 = fcmp ogt float %18, %max.3.7
   %19 = trunc i64 %indvars.iv.next.752 to i32
   %yindex.3.8 = select i1 %cmp10.8, i32 %19, i32 %yindex.3.7
@@ -124,7 +124,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.8 = select i1 %cmp10.8, float %18, float %max.3.7
   %indvars.iv.next.853 = or i64 %indvars.iv, 9
   %arrayidx9.9 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.853
-  %20 = load float* %arrayidx9.9, align 4, !tbaa !5
+  %20 = load float* %arrayidx9.9, align 4
   %cmp10.9 = fcmp ogt float %20, %max.3.8
   %21 = trunc i64 %indvars.iv.next.853 to i32
   %yindex.3.9 = select i1 %cmp10.9, i32 %21, i32 %yindex.3.8
@@ -132,7 +132,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.9 = select i1 %cmp10.9, float %20, float %max.3.8
   %indvars.iv.next.954 = or i64 %indvars.iv, 10
   %arrayidx9.10 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.954
-  %22 = load float* %arrayidx9.10, align 8, !tbaa !5
+  %22 = load float* %arrayidx9.10, align 8
   %cmp10.10 = fcmp ogt float %22, %max.3.9
   %23 = trunc i64 %indvars.iv.next.954 to i32
   %yindex.3.10 = select i1 %cmp10.10, i32 %23, i32 %yindex.3.9
@@ -140,7 +140,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.10 = select i1 %cmp10.10, float %22, float %max.3.9
   %indvars.iv.next.1055 = or i64 %indvars.iv, 11
   %arrayidx9.11 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1055
-  %24 = load float* %arrayidx9.11, align 4, !tbaa !5
+  %24 = load float* %arrayidx9.11, align 4
   %cmp10.11 = fcmp ogt float %24, %max.3.10
   %25 = trunc i64 %indvars.iv.next.1055 to i32
   %yindex.3.11 = select i1 %cmp10.11, i32 %25, i32 %yindex.3.10
@@ -148,7 +148,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.11 = select i1 %cmp10.11, float %24, float %max.3.10
   %indvars.iv.next.1156 = or i64 %indvars.iv, 12
   %arrayidx9.12 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1156
-  %26 = load float* %arrayidx9.12, align 16, !tbaa !5
+  %26 = load float* %arrayidx9.12, align 16
   %cmp10.12 = fcmp ogt float %26, %max.3.11
   %27 = trunc i64 %indvars.iv.next.1156 to i32
   %yindex.3.12 = select i1 %cmp10.12, i32 %27, i32 %yindex.3.11
@@ -156,7 +156,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.12 = select i1 %cmp10.12, float %26, float %max.3.11
   %indvars.iv.next.1257 = or i64 %indvars.iv, 13
   %arrayidx9.13 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1257
-  %28 = load float* %arrayidx9.13, align 4, !tbaa !5
+  %28 = load float* %arrayidx9.13, align 4
   %cmp10.13 = fcmp ogt float %28, %max.3.12
   %29 = trunc i64 %indvars.iv.next.1257 to i32
   %yindex.3.13 = select i1 %cmp10.13, i32 %29, i32 %yindex.3.12
@@ -164,7 +164,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.13 = select i1 %cmp10.13, float %28, float %max.3.12
   %indvars.iv.next.1358 = or i64 %indvars.iv, 14
   %arrayidx9.14 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1358
-  %30 = load float* %arrayidx9.14, align 8, !tbaa !5
+  %30 = load float* %arrayidx9.14, align 8
   %cmp10.14 = fcmp ogt float %30, %max.3.13
   %31 = trunc i64 %indvars.iv.next.1358 to i32
   %yindex.3.14 = select i1 %cmp10.14, i32 %31, i32 %yindex.3.13
@@ -172,7 +172,7 @@ for.body7:                                        ; preds = %for.body7, %for.con
   %max.3.14 = select i1 %cmp10.14, float %30, float %max.3.13
   %indvars.iv.next.1459 = or i64 %indvars.iv, 15
   %arrayidx9.15 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1459
-  %32 = load float* %arrayidx9.15, align 4, !tbaa !5
+  %32 = load float* %arrayidx9.15, align 4
   %cmp10.15 = fcmp ogt float %32, %max.3.14
   %33 = trunc i64 %indvars.iv.next.1459 to i32
   %yindex.3.15 = select i1 %cmp10.15, i32 %33, i32 %yindex.3.14
@@ -208,7 +208,7 @@ for.end23:                                        ; preds = %for.end17
   %add29 = fadd float %add, 1.000000e+00
   %add31 = fadd float %add29, %conv18
   %add32 = fadd float %add31, 1.000000e+00
-  store float %add32, float* @temp, align 4, !tbaa !5
+  store float %add32, float* @temp, align 4
   tail call void @check(i32 -1)
   ret i32 0
 }
@@ -217,9 +217,4 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 
 declare i32 @puts(i8* nocapture) nounwind
 
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !3 = metadata !{metadata !"branch_weights", i32 64, i32 4}
-!4 = metadata !{metadata !"int", metadata !1}
-!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/PowerPC/bdzlr.ll b/test/CodeGen/PowerPC/bdzlr.ll
new file mode 100644
index 0000000..656a858
--- /dev/null
+++ b/test/CodeGen/PowerPC/bdzlr.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.lua_TValue.17.692 = type { %union.Value.16.691, i32 }
+%union.Value.16.691 = type { %union.GCObject.15.690* }
+%union.GCObject.15.690 = type { %struct.lua_State.14.689 }
+%struct.lua_State.14.689 = type { %union.GCObject.15.690*, i8, i8, i8, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.global_State.10.685*, %struct.CallInfo.11.686*, i32*, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.CallInfo.11.686*, %struct.CallInfo.11.686*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State.14.689*, %struct.lua_Debug.12.687*)*, %struct.lua_TValue.17.692, %struct.lua_TValue.17.692, %union.GCObject.15.690*, %union.GCObject.15.690*, %struct.lua_longjmp.13.688*, i64 }
+%struct.global_State.10.685 = type { %struct.stringtable.0.675, i8* (i8*, i8*, i64, i64)*, i8*, i8, i8, i32, %union.GCObject.15.690*, %union.GCObject.15.690**, %union.GCObject.15.690*, %union.GCObject.15.690*, %union.GCObject.15.690*, %union.GCObject.15.690*, %struct.Mbuffer.1.676, i64, i64, i64, i64, i32, i32, i32 (%struct.lua_State.14.689*)*, %struct.lua_TValue.17.692, %struct.lua_State.14.689*, %struct.UpVal.3.678, [9 x %struct.Table.7.682*], [17 x %union.TString.9.684*] }
+%struct.stringtable.0.675 = type { %union.GCObject.15.690**, i32, i32 }
+%struct.Mbuffer.1.676 = type { i8*, i64, i64 }
+%struct.UpVal.3.678 = type { %union.GCObject.15.690*, i8, i8, %struct.lua_TValue.17.692*, %union.anon.2.677 }
+%union.anon.2.677 = type { %struct.lua_TValue.17.692 }
+%struct.Table.7.682 = type { %union.GCObject.15.690*, i8, i8, i8, i8, %struct.Table.7.682*, %struct.lua_TValue.17.692*, %struct.Node.6.681*, %struct.Node.6.681*, %union.GCObject.15.690*, i32 }
+%struct.Node.6.681 = type { %struct.lua_TValue.17.692, %union.TKey.5.680 }
+%union.TKey.5.680 = type { %struct.anon.0.4.679 }
+%struct.anon.0.4.679 = type { %union.Value.16.691, i32, %struct.Node.6.681* }
+%union.TString.9.684 = type { %struct.anon.1.8.683 }
+%struct.anon.1.8.683 = type { %union.GCObject.15.690*, i8, i8, i8, i32, i64 }
+%struct.CallInfo.11.686 = type { %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692*, i32*, i32, i32 }
+%struct.lua_Debug.12.687 = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 }
+%struct.lua_longjmp.13.688 = type opaque
+
+define void @lua_xmove(i32 signext %n) #0 {
+entry:
+  br i1 undef, label %for.end, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %if.end
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.for.body_crit_edge, %for.body.lr.ph
+  %0 = phi %struct.lua_TValue.17.692* [ undef, %for.body.lr.ph ], [ %.pre, %for.body.for.body_crit_edge ]
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
+  %tt = getelementptr inbounds %struct.lua_TValue.17.692* %0, i64 %indvars.iv, i32 1
+  %1 = load i32* %tt, align 4, !tbaa !0
+  store i32 %1, i32* undef, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge
+
+for.body.for.body_crit_edge:                      ; preds = %for.body
+  %.pre = load %struct.lua_TValue.17.692** undef, align 8, !tbaa !3
+  br label %for.body
+
+for.end:                                          ; preds = %for.body, %if.end, %entry
+  ret void
+
+; CHECK: @lua_xmove
+; CHECK: bnelr
+; CHECK: bnelr
+; CHECK: bdzlr
+; CHECK-NOT: blr
+}
+
+attributes #0 = { nounwind }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
diff --git a/test/CodeGen/PowerPC/crsave.ll b/test/CodeGen/PowerPC/crsave.ll
index 3e98dbd..f1cbc5a 100644
--- a/test/CodeGen/PowerPC/crsave.ll
+++ b/test/CodeGen/PowerPC/crsave.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC32
-; RUN: llc -O0 -disable-fp-elim -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC64
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC64
 
 declare void @foo()
 
@@ -13,15 +13,19 @@ entry:
   ret i32 %1
 }
 
+; PPC32: stw 31, -4(1)
+; PPC32: stwu 1, -32(1)
 ; PPC32: mfcr 12
-; PPC32-NEXT: stw 12, {{[0-9]+}}(31)
-; PPC32: lwz 12, {{[0-9]+}}(31)
+; PPC32-NEXT: stw 12, 24(31)
+; PPC32: lwz 12, 24(31)
 ; PPC32-NEXT: mtcrf 32, 12
 
 ; PPC64: mfcr 12
-; PPC64-NEXT: stw 12, 8(1)
+; PPC64: stw 12, 8(1)
+; PPC64: stdu 1, -[[AMT:[0-9]+]](1)
+; PPC64: addi 1, 1, [[AMT]]
 ; PPC64: lwz 12, 8(1)
-; PPC64-NEXT: mtcrf 32, 12
+; PPC64: mtcrf 32, 12
 
 define i32 @test_cr234() nounwind {
 entry:
@@ -33,17 +37,21 @@ entry:
   ret i32 %1
 }
 
+; PPC32: stw 31, -4(1)
+; PPC32: stwu 1, -32(1)
 ; PPC32: mfcr 12
-; PPC32-NEXT: stw 12, {{[0-9]+}}(31)
-; PPC32: lwz 12, {{[0-9]+}}(31)
+; PPC32-NEXT: stw 12, 24(31)
+; PPC32: lwz 12, 24(31)
 ; PPC32-NEXT: mtcrf 32, 12
 ; PPC32-NEXT: mtcrf 16, 12
 ; PPC32-NEXT: mtcrf 8, 12
 
 ; PPC64: mfcr 12
-; PPC64-NEXT: stw 12, 8(1)
+; PPC64: stw 12, 8(1)
+; PPC64: stdu 1, -[[AMT:[0-9]+]](1)
+; PPC64: addi 1, 1, [[AMT]]
 ; PPC64: lwz 12, 8(1)
-; PPC64-NEXT: mtcrf 32, 12
-; PPC64-NEXT: mtcrf 16, 12
-; PPC64-NEXT: mtcrf 8, 12
+; PPC64: mtcrf 32, 12
+; PPC64: mtcrf 16, 12
+; PPC64: mtcrf 8, 12
 
diff --git a/test/CodeGen/PowerPC/ctrloop-s000.ll b/test/CodeGen/PowerPC/ctrloop-s000.ll
index dcea06f..4d8ef50 100644
--- a/test/CodeGen/PowerPC/ctrloop-s000.ll
+++ b/test/CodeGen/PowerPC/ctrloop-s000.ll
@@ -36,100 +36,100 @@ for.cond1.preheader:                              ; preds = %for.end, %entry
 for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
   %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next.15, %for.body3 ]
   %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 32, !tbaa !0
+  %0 = load double* %arrayidx, align 32
   %add = fadd double %0, 1.000000e+00
   %arrayidx5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv
-  store double %add, double* %arrayidx5, align 32, !tbaa !0
+  store double %add, double* %arrayidx5, align 32
   %indvars.iv.next11 = or i64 %indvars.iv, 1
   %arrayidx.1 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next11
-  %1 = load double* %arrayidx.1, align 8, !tbaa !0
+  %1 = load double* %arrayidx.1, align 8
   %add.1 = fadd double %1, 1.000000e+00
   %arrayidx5.1 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next11
-  store double %add.1, double* %arrayidx5.1, align 8, !tbaa !0
+  store double %add.1, double* %arrayidx5.1, align 8
   %indvars.iv.next.112 = or i64 %indvars.iv, 2
   %arrayidx.2 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.112
-  %2 = load double* %arrayidx.2, align 16, !tbaa !0
+  %2 = load double* %arrayidx.2, align 16
   %add.2 = fadd double %2, 1.000000e+00
   %arrayidx5.2 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.112
-  store double %add.2, double* %arrayidx5.2, align 16, !tbaa !0
+  store double %add.2, double* %arrayidx5.2, align 16
   %indvars.iv.next.213 = or i64 %indvars.iv, 3
   %arrayidx.3 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.213
-  %3 = load double* %arrayidx.3, align 8, !tbaa !0
+  %3 = load double* %arrayidx.3, align 8
   %add.3 = fadd double %3, 1.000000e+00
   %arrayidx5.3 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.213
-  store double %add.3, double* %arrayidx5.3, align 8, !tbaa !0
+  store double %add.3, double* %arrayidx5.3, align 8
   %indvars.iv.next.314 = or i64 %indvars.iv, 4
   %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.314
-  %4 = load double* %arrayidx.4, align 32, !tbaa !0
+  %4 = load double* %arrayidx.4, align 32
   %add.4 = fadd double %4, 1.000000e+00
   %arrayidx5.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.314
-  store double %add.4, double* %arrayidx5.4, align 32, !tbaa !0
+  store double %add.4, double* %arrayidx5.4, align 32
   %indvars.iv.next.415 = or i64 %indvars.iv, 5
   %arrayidx.5 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.415
-  %5 = load double* %arrayidx.5, align 8, !tbaa !0
+  %5 = load double* %arrayidx.5, align 8
   %add.5 = fadd double %5, 1.000000e+00
   %arrayidx5.5 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.415
-  store double %add.5, double* %arrayidx5.5, align 8, !tbaa !0
+  store double %add.5, double* %arrayidx5.5, align 8
   %indvars.iv.next.516 = or i64 %indvars.iv, 6
   %arrayidx.6 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.516
-  %6 = load double* %arrayidx.6, align 16, !tbaa !0
+  %6 = load double* %arrayidx.6, align 16
   %add.6 = fadd double %6, 1.000000e+00
   %arrayidx5.6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.516
-  store double %add.6, double* %arrayidx5.6, align 16, !tbaa !0
+  store double %add.6, double* %arrayidx5.6, align 16
   %indvars.iv.next.617 = or i64 %indvars.iv, 7
   %arrayidx.7 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.617
-  %7 = load double* %arrayidx.7, align 8, !tbaa !0
+  %7 = load double* %arrayidx.7, align 8
   %add.7 = fadd double %7, 1.000000e+00
   %arrayidx5.7 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.617
-  store double %add.7, double* %arrayidx5.7, align 8, !tbaa !0
+  store double %add.7, double* %arrayidx5.7, align 8
   %indvars.iv.next.718 = or i64 %indvars.iv, 8
   %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.718
-  %8 = load double* %arrayidx.8, align 32, !tbaa !0
+  %8 = load double* %arrayidx.8, align 32
   %add.8 = fadd double %8, 1.000000e+00
   %arrayidx5.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.718
-  store double %add.8, double* %arrayidx5.8, align 32, !tbaa !0
+  store double %add.8, double* %arrayidx5.8, align 32
   %indvars.iv.next.819 = or i64 %indvars.iv, 9
   %arrayidx.9 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.819
-  %9 = load double* %arrayidx.9, align 8, !tbaa !0
+  %9 = load double* %arrayidx.9, align 8
   %add.9 = fadd double %9, 1.000000e+00
   %arrayidx5.9 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.819
-  store double %add.9, double* %arrayidx5.9, align 8, !tbaa !0
+  store double %add.9, double* %arrayidx5.9, align 8
   %indvars.iv.next.920 = or i64 %indvars.iv, 10
   %arrayidx.10 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.920
-  %10 = load double* %arrayidx.10, align 16, !tbaa !0
+  %10 = load double* %arrayidx.10, align 16
   %add.10 = fadd double %10, 1.000000e+00
   %arrayidx5.10 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.920
-  store double %add.10, double* %arrayidx5.10, align 16, !tbaa !0
+  store double %add.10, double* %arrayidx5.10, align 16
   %indvars.iv.next.1021 = or i64 %indvars.iv, 11
   %arrayidx.11 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1021
-  %11 = load double* %arrayidx.11, align 8, !tbaa !0
+  %11 = load double* %arrayidx.11, align 8
   %add.11 = fadd double %11, 1.000000e+00
   %arrayidx5.11 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1021
-  store double %add.11, double* %arrayidx5.11, align 8, !tbaa !0
+  store double %add.11, double* %arrayidx5.11, align 8
   %indvars.iv.next.1122 = or i64 %indvars.iv, 12
   %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1122
-  %12 = load double* %arrayidx.12, align 32, !tbaa !0
+  %12 = load double* %arrayidx.12, align 32
   %add.12 = fadd double %12, 1.000000e+00
   %arrayidx5.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1122
-  store double %add.12, double* %arrayidx5.12, align 32, !tbaa !0
+  store double %add.12, double* %arrayidx5.12, align 32
   %indvars.iv.next.1223 = or i64 %indvars.iv, 13
   %arrayidx.13 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1223
-  %13 = load double* %arrayidx.13, align 8, !tbaa !0
+  %13 = load double* %arrayidx.13, align 8
   %add.13 = fadd double %13, 1.000000e+00
   %arrayidx5.13 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1223
-  store double %add.13, double* %arrayidx5.13, align 8, !tbaa !0
+  store double %add.13, double* %arrayidx5.13, align 8
   %indvars.iv.next.1324 = or i64 %indvars.iv, 14
   %arrayidx.14 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1324
-  %14 = load double* %arrayidx.14, align 16, !tbaa !0
+  %14 = load double* %arrayidx.14, align 16
   %add.14 = fadd double %14, 1.000000e+00
   %arrayidx5.14 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1324
-  store double %add.14, double* %arrayidx5.14, align 16, !tbaa !0
+  store double %add.14, double* %arrayidx5.14, align 16
   %indvars.iv.next.1425 = or i64 %indvars.iv, 15
   %arrayidx.15 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1425
-  %15 = load double* %arrayidx.15, align 8, !tbaa !0
+  %15 = load double* %arrayidx.15, align 8
   %add.15 = fadd double %15, 1.000000e+00
   %arrayidx5.15 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1425
-  store double %add.15, double* %arrayidx5.15, align 8, !tbaa !0
+  store double %add.15, double* %arrayidx5.15, align 8
   %indvars.iv.next.15 = add i64 %indvars.iv, 16
   %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
   %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000
@@ -150,7 +150,3 @@ for.end8:                                         ; preds = %for.end
 }
 
 declare i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double)
-
-!0 = metadata !{metadata !"double", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/ctrloop-sums.ll b/test/CodeGen/PowerPC/ctrloop-sums.ll
index eae8c38..d9965f2 100644
--- a/test/CodeGen/PowerPC/ctrloop-sums.ll
+++ b/test/CodeGen/PowerPC/ctrloop-sums.ll
@@ -24,7 +24,7 @@ for.body3.us:                                     ; preds = %for.body3.us, %for.
   %indvars.iv = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next, %for.body3.us ]
   %Result.111.us = phi i32 [ %Result.014.us, %for.body3.lr.ph.us ], [ %add.us, %for.body3.us ]
   %arrayidx5.us = getelementptr inbounds [100 x i32]* %Array, i64 %indvars.iv16, i64 %indvars.iv
-  %0 = load i32* %arrayidx5.us, align 4, !tbaa !0
+  %0 = load i32* %arrayidx5.us, align 4
   %add.us = add nsw i32 %0, %Result.111.us
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -60,7 +60,7 @@ for.body:                                         ; preds = %for.body, %entry
   %0 = trunc i64 %indvars.iv33 to i32
   %sub = sub i32 0, %0
   %arrayidx2 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv33, i64 %indvars.iv33
-  store i32 %sub, i32* %arrayidx2, align 4, !tbaa !0
+  store i32 %sub, i32* %arrayidx2, align 4
   %indvars.iv.next34 = add i64 %indvars.iv33, 1
   %lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32
   %exitcond36 = icmp eq i32 %lftr.wideiv35, 100
@@ -81,7 +81,7 @@ if.then:                                          ; preds = %for.body8
   %3 = add i64 %indvars.iv, %indvars.iv29
   %arrayidx13 = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv29, i64 %indvars.iv
   %4 = trunc i64 %3 to i32
-  store i32 %4, i32* %arrayidx13, align 4, !tbaa !0
+  store i32 %4, i32* %arrayidx13, align 4
   br label %for.inc14
 
 for.inc14:                                        ; preds = %for.body8, %if.then
@@ -106,7 +106,7 @@ for.body3.us.i:                                   ; preds = %for.body3.lr.ph.us.
   %indvars.iv.i = phi i64 [ 0, %for.body3.lr.ph.us.i ], [ %indvars.iv.next.i, %for.body3.us.i ]
   %Result.111.us.i = phi i32 [ %Result.014.us.i, %for.body3.lr.ph.us.i ], [ %add.us.i, %for.body3.us.i ]
   %arrayidx5.us.i = getelementptr inbounds [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv16.i, i64 %indvars.iv.i
-  %5 = load i32* %arrayidx5.us.i, align 4, !tbaa !0
+  %5 = load i32* %arrayidx5.us.i, align 4
   %add.us.i = add nsw i32 %5, %Result.111.us.i
   %indvars.iv.next.i = add i64 %indvars.iv.i, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next.i to i32
@@ -128,7 +128,3 @@ SumArray.exit:                                    ; preds = %for.inc6.us.i
 }
 
 declare i32 @printf(i8* nocapture, ...) nounwind
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/ctrloops.ll b/test/CodeGen/PowerPC/ctrloops.ll
index 4b6f7b9..f11e332 100644
--- a/test/CodeGen/PowerPC/ctrloops.ll
+++ b/test/CodeGen/PowerPC/ctrloops.ll
@@ -10,9 +10,9 @@ entry:
 
 for.body:                                         ; preds = %for.body, %entry
   %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %0 = load volatile i32* @a, align 4, !tbaa !0
+  %0 = load volatile i32* @a, align 4
   %add = add nsw i32 %0, %c
-  store volatile i32 %add, i32* @a, align 4, !tbaa !0
+  store volatile i32 %add, i32* @a, align 4
   %inc = add nsw i32 %i.01, 1
   %exitcond = icmp eq i32 %inc, 2048
   br i1 %exitcond, label %for.end, label %for.body
@@ -34,9 +34,9 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %0 = load volatile i32* @a, align 4, !tbaa !0
+  %0 = load volatile i32* @a, align 4
   %add = add nsw i32 %0, %c
-  store volatile i32 %add, i32* @a, align 4, !tbaa !0
+  store volatile i32 %add, i32* @a, align 4
   %inc = add nsw i32 %i.02, 1
   %exitcond = icmp eq i32 %inc, %d
   br i1 %exitcond, label %for.end, label %for.body
@@ -58,9 +58,9 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %mul = mul nsw i32 %i.02, %c
-  %0 = load volatile i32* @a, align 4, !tbaa !0
+  %0 = load volatile i32* @a, align 4
   %add = add nsw i32 %0, %mul
-  store volatile i32 %add, i32* @a, align 4, !tbaa !0
+  store volatile i32 %add, i32* @a, align 4
   %inc = add nsw i32 %i.02, 1
   %exitcond = icmp eq i32 %inc, %d
   br i1 %exitcond, label %for.end, label %for.body
@@ -73,7 +73,3 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK-NOT: cmplwi
 ; CHECK: bdnz
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/early-ret.ll b/test/CodeGen/PowerPC/early-ret.ll
new file mode 100644
index 0000000..7d3e225
--- /dev/null
+++ b/test/CodeGen/PowerPC/early-ret.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @foo(i32* %P) #0 {
+entry:
+  %tobool = icmp eq i32* %P, null
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* %P, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+
+; CHECK: @foo
+; CHECK: beqlr
+; CHECK: blr
+}
+
+define void @bar(i32* %P, i32* %Q) #0 {
+entry:
+  %tobool = icmp eq i32* %P, null
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* %P, align 4
+  %tobool1 = icmp eq i32* %Q, null
+  br i1 %tobool1, label %if.end3, label %if.then2
+
+if.then2:                                         ; preds = %if.then
+  store i32 1, i32* %Q, align 4
+  br label %if.end3
+
+if.else:                                          ; preds = %entry
+  store i32 0, i32* %Q, align 4
+  br label %if.end3
+
+if.end3:                                          ; preds = %if.then, %if.then2, %if.else
+  ret void
+
+; CHECK: @bar
+; CHECK: beqlr
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/PowerPC/early-ret2.ll b/test/CodeGen/PowerPC/early-ret2.ll
new file mode 100644
index 0000000..a274e2c
--- /dev/null
+++ b/test/CodeGen/PowerPC/early-ret2.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @_Z8example3iPiS_() #0 {
+entry:
+  br i1 undef, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph:                                 ; preds = %entry
+  br i1 undef, label %while.end, label %while.body
+
+while.body:                                       ; preds = %while.body, %while.body.lr.ph
+  br i1 false, label %while.end, label %while.body, !llvm.vectorizer.already_vectorized !0
+
+while.end:                                        ; preds = %while.body, %while.body.lr.ph, %entry
+  ret void
+
+; CHECK: @_Z8example3iPiS_
+; CHECK: bnelr
+}
+
+attributes #0 = { noinline nounwind }
+
+!0 = metadata !{}
+
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
index 27496f7..a173c91 100644
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -1,22 +1,30 @@
-; RUN: llc < %s -march=ppc32 -fp-contract=fast | \
-; RUN:   egrep "fn?madd|fn?msub" | count 8
+; RUN: llc < %s -march=ppc32 -fp-contract=fast | FileCheck %s
 
 define double @test_FMADD1(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
 	%E = fadd double %D, %C		; <double> [#uses=1]
 	ret double %E
+; CHECK: test_FMADD1:
+; CHECK: fmadd
+; CHECK-NEXT: blr
 }
 
 define double @test_FMADD2(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
 	%E = fadd double %D, %C		; <double> [#uses=1]
 	ret double %E
+; CHECK: test_FMADD2:
+; CHECK: fmadd
+; CHECK-NEXT: blr
 }
 
 define double @test_FMSUB(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
 	%E = fsub double %D, %C		; <double> [#uses=1]
 	ret double %E
+; CHECK: test_FMSUB:
+; CHECK: fmsub
+; CHECK-NEXT: blr
 }
 
 define double @test_FNMADD1(double %A, double %B, double %C) {
@@ -24,6 +32,9 @@ define double @test_FNMADD1(double %A, double %B, double %C) {
 	%E = fadd double %D, %C		; <double> [#uses=1]
 	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
+; CHECK: test_FNMADD1:
+; CHECK: fnmadd
+; CHECK-NEXT: blr
 }
 
 define double @test_FNMADD2(double %A, double %B, double %C) {
@@ -31,12 +42,18 @@ define double @test_FNMADD2(double %A, double %B, double %C) {
 	%E = fadd double %C, %D		; <double> [#uses=1]
 	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
+; CHECK: test_FNMADD2:
+; CHECK: fnmadd
+; CHECK-NEXT: blr
 }
 
 define double @test_FNMSUB1(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
 	%E = fsub double %C, %D		; <double> [#uses=1]
 	ret double %E
+; CHECK: test_FNMSUB1:
+; CHECK: fnmsub
+; CHECK-NEXT: blr
 }
 
 define double @test_FNMSUB2(double %A, double %B, double %C) {
@@ -44,6 +61,9 @@ define double @test_FNMSUB2(double %A, double %B, double %C) {
 	%E = fsub double %D, %C		; <double> [#uses=1]
 	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
 	ret double %F
+; CHECK: test_FNMSUB2:
+; CHECK: fnmsub
+; CHECK-NEXT: blr
 }
 
 define float @test_FNMSUBS(float %A, float %B, float %C) {
@@ -51,4 +71,7 @@ define float @test_FNMSUBS(float %A, float %B, float %C) {
 	%E = fsub float %D, %C		; <float> [#uses=1]
 	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
 	ret float %F
+; CHECK: test_FNMSUBS:
+; CHECK: fnmsubs
+; CHECK-NEXT: blr
 }
diff --git a/test/CodeGen/PowerPC/fold-zero.ll b/test/CodeGen/PowerPC/fold-zero.ll
new file mode 100644
index 0000000..c7ec6fa
--- /dev/null
+++ b/test/CodeGen/PowerPC/fold-zero.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @test1(i1 %a, i32 %c) nounwind  {
+  %x = select i1 %a, i32 %c, i32 0
+  ret i32 %x
+
+; CHECK: @test1
+; CHECK-NOT: li {{[0-9]+}}, 0
+; CHECK: isel 3, 0,
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/fsel.ll b/test/CodeGen/PowerPC/fsel.ll
new file mode 100644
index 0000000..8cd43e6
--- /dev/null
+++ b/test/CodeGen/PowerPC/fsel.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-no-infs-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=CHECK-FM %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @zerocmp1(double %a, double %y, double %z) #0 {
+entry:
+  %cmp = fcmp ult double %a, 0.000000e+00
+  %z.y = select i1 %cmp, double %z, double %y
+  ret double %z.y
+
+; CHECK: @zerocmp1
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @zerocmp1
+; CHECK-FM: fsel 1, 1, 2, 3
+; CHECK-FM: blr
+}
+
+define double @zerocmp2(double %a, double %y, double %z) #0 {
+entry:
+  %cmp = fcmp ogt double %a, 0.000000e+00
+  %y.z = select i1 %cmp, double %y, double %z
+  ret double %y.z
+
+; CHECK: @zerocmp2
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @zerocmp2
+; CHECK-FM: fneg [[REG:[0-9]+]], 1
+; CHECK-FM: fsel 1, [[REG]], 3, 2
+; CHECK-FM: blr
+}
+
+define double @zerocmp3(double %a, double %y, double %z) #0 {
+entry:
+  %cmp = fcmp oeq double %a, 0.000000e+00
+  %y.z = select i1 %cmp, double %y, double %z
+  ret double %y.z
+
+; CHECK: @zerocmp3
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @zerocmp3
+; CHECK-FM: fsel [[REG:[0-9]+]], 1, 2, 3
+; CHECK-FM: fneg [[REG2:[0-9]+]], 1
+; CHECK-FM: fsel 1, [[REG2]], [[REG]], 3
+; CHECK-FM: blr
+}
+
+define double @min1(double %a, double %b) #0 {
+entry:
+  %cmp = fcmp ole double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+
+; CHECK: @min1
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @min1
+; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1
+; CHECK-FM: fsel 1, [[REG]], 1, 2
+; CHECK-FM: blr
+}
+
+define double @max1(double %a, double %b) #0 {
+entry:
+  %cmp = fcmp oge double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+
+; CHECK: @max1
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @max1
+; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2
+; CHECK-FM: fsel 1, [[REG]], 1, 2
+; CHECK-FM: blr
+}
+
+define double @cmp1(double %a, double %b, double %y, double %z) #0 {
+entry:
+  %cmp = fcmp ult double %a, %b
+  %z.y = select i1 %cmp, double %z, double %y
+  ret double %z.y
+
+; CHECK: @cmp1
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @cmp1
+; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2
+; CHECK-FM: fsel 1, [[REG]], 3, 4
+; CHECK-FM: blr
+}
+
+define double @cmp2(double %a, double %b, double %y, double %z) #0 {
+entry:
+  %cmp = fcmp ogt double %a, %b
+  %y.z = select i1 %cmp, double %y, double %z
+  ret double %y.z
+
+; CHECK: @cmp2
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @cmp2
+; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1
+; CHECK-FM: fsel 1, [[REG]], 4, 3
+; CHECK-FM: blr
+}
+
+define double @cmp3(double %a, double %b, double %y, double %z) #0 {
+entry:
+  %cmp = fcmp oeq double %a, %b
+  %y.z = select i1 %cmp, double %y, double %z
+  ret double %y.z
+
+; CHECK: @cmp3
+; CHECK-NOT: fsel
+; CHECK: blr
+
+; CHECK-FM: @cmp3
+; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2
+; CHECK-FM: fsel [[REG2:[0-9]+]], [[REG]], 3, 4
+; CHECK-FM: fneg [[REG3:[0-9]+]], [[REG]]
+; CHECK-FM: fsel 1, [[REG3]], [[REG2]], 4
+; CHECK-FM: blr
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/ifcvt.ll b/test/CodeGen/PowerPC/ifcvt.ll
new file mode 100644
index 0000000..9c966c9
--- /dev/null
+++ b/test/CodeGen/PowerPC/ifcvt.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -verify-machineinstrs | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+  %sext82 = shl i32 %d, 16
+  %conv29 = ashr exact i32 %sext82, 16
+  %cmp = icmp slt i32 %sext82, 0
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %sw.epilog
+  %and33 = and i32 %conv29, 32767
+  %sub34 = sub nsw i32 %a, %and33
+  br label %cond.end
+
+cond.false:                                       ; preds = %sw.epilog
+  %add37 = add nsw i32 %conv29, %a
+  br label %cond.end
+
+; CHECK: @test
+; CHECK: add [[REG:[0-9]+]], 
+; CHECK: subf [[REG2:[0-9]+]],
+; CHECK: isel {{[0-9]+}}, [[REG]], [[REG2]],
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %sub34, %cond.true ], [ %add37, %cond.false ]
+  %sext83 = shl i32 %cond, 16
+  %conv39 = ashr exact i32 %sext83, 16
+  %add41 = sub i32 %b, %a
+  %sub43 = add i32 %add41, %conv39
+  ret i32 %sub43
+}
+
diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll
index 9895130..f3158b3 100644
--- a/test/CodeGen/PowerPC/lbzux.ll
+++ b/test/CodeGen/PowerPC/lbzux.ll
@@ -4,7 +4,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 
 define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
 entry:
-  %0 = load i8** undef, align 8, !tbaa !0
+  %0 = load i8** undef, align 8
   br i1 undef, label %return, label %lor.lhs.false
 
 lor.lhs.false:                                    ; preds = %entry
@@ -43,7 +43,3 @@ return:                                           ; preds = %if.then45, %lor.lhs
 ; CHECK: @allocateSpace
 ; CHECK: lbzux
 }
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/lsa.ll b/test/CodeGen/PowerPC/lsa.ll
new file mode 100644
index 0000000..8a6338e
--- /dev/null
+++ b/test/CodeGen/PowerPC/lsa.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define signext i32 @foo() #0 {
+entry:
+  %v = alloca [8200 x i32], align 4
+  %w = alloca [8200 x i32], align 4
+  %q = alloca [8200 x i32], align 4
+  %0 = bitcast [8200 x i32]* %v to i8*
+  call void @llvm.lifetime.start(i64 32800, i8* %0) #0
+  %1 = bitcast [8200 x i32]* %w to i8*
+  call void @llvm.lifetime.start(i64 32800, i8* %1) #0
+  %2 = bitcast [8200 x i32]* %q to i8*
+  call void @llvm.lifetime.start(i64 32800, i8* %2) #0
+  %arraydecay = getelementptr inbounds [8200 x i32]* %q, i64 0, i64 0
+  %arraydecay1 = getelementptr inbounds [8200 x i32]* %v, i64 0, i64 0
+  %arraydecay2 = getelementptr inbounds [8200 x i32]* %w, i64 0, i64 0
+  call void @bar(i32* %arraydecay, i32* %arraydecay1, i32* %arraydecay2) #0
+  %3 = load i32* %arraydecay2, align 4
+  %arrayidx3 = getelementptr inbounds [8200 x i32]* %w, i64 0, i64 1
+  %4 = load i32* %arrayidx3, align 4
+
+; CHECK: @foo
+; CHECK-NOT: lwzx
+; CHECK: lwz {{[0-9]+}}, 4([[REG:[0-9]+]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG]])
+; CHECK: blr
+
+  %add = add nsw i32 %4, %3
+  call void @llvm.lifetime.end(i64 32800, i8* %2) #0
+  call void @llvm.lifetime.end(i64 32800, i8* %1) #0
+  call void @llvm.lifetime.end(i64 32800, i8* %0) #0
+  ret i32 %add
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+declare void @bar(i32*, i32*, i32*)
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/PowerPC/mcm-obj-2.ll b/test/CodeGen/PowerPC/mcm-obj-2.ll
index 2dd1718..bc60b3b 100644
--- a/test/CodeGen/PowerPC/mcm-obj-2.ll
+++ b/test/CodeGen/PowerPC/mcm-obj-2.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O1 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s
+; RUN: llvm-readobj -r | FileCheck %s
 
 ; FIXME: When asm-parse is available, could make this an assembly test.
 
@@ -19,18 +19,11 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing function-scoped variable si.
 ;
-; CHECK:       Relocation 0
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
-; CHECK-NEXT:  'r_type', 0x00000032
-; CHECK:       Relocation 1
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
-; CHECK-NEXT:  'r_type', 0x00000030
-; CHECK:       Relocation 2
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
-; CHECK-NEXT:  'r_type', 0x00000030
+; CHECK: Relocations [
+; CHECK:   Section (1) .text {
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM2:[^ ]+]]
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
 
 @gi = global i32 5, align 4
 
@@ -45,18 +38,9 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing file-scope variable gi.
 ;
-; CHECK:       Relocation 3
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
-; CHECK-NEXT:  'r_type', 0x00000032
-; CHECK:       Relocation 4
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
-; CHECK-NEXT:  'r_type', 0x00000030
-; CHECK:       Relocation 5
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
-; CHECK-NEXT:  'r_type', 0x00000030
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM3:[^ ]+]]
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM3]]
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM3]]
 
 define double @test_double_const() nounwind {
 entry:
@@ -66,12 +50,5 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing a constant.
 ;
-; CHECK:       Relocation 6
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
-; CHECK-NEXT:  'r_type', 0x00000032
-; CHECK:       Relocation 7
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM4]]
-; CHECK-NEXT:  'r_type', 0x00000030
-
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM4]]
diff --git a/test/CodeGen/PowerPC/mcm-obj.ll b/test/CodeGen/PowerPC/mcm-obj.ll
index 117c3b3..720c5fb 100644
--- a/test/CodeGen/PowerPC/mcm-obj.ll
+++ b/test/CodeGen/PowerPC/mcm-obj.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -O0 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=MEDIUM %s
+; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM %s
 ; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=LARGE %s
+; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE %s
 
 ; FIXME: When asm-parse is available, could make this an assembly test.
 
@@ -21,25 +21,15 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing external variable ei.
 ;
-; MEDIUM:       '.rela.text'
-; MEDIUM:       Relocation 0
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 1
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM1]]
-; MEDIUM-NEXT:  'r_type', 0x00000040
+; MEDIUM:      Relocations [
+; MEDIUM:        Section (1) .text {
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM1:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM1]]
 ;
-; LARGE:       '.rela.text'
-; LARGE:       Relocation 0
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 1
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM1]]
-; LARGE-NEXT:  'r_type', 0x00000040
+; LARGE:       Relocations [
+; LARGE:         Section (1) .text {
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM1:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM1]]
 
 @test_fn_static.si = internal global i32 0, align 4
 
@@ -54,26 +44,14 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing function-scoped variable si.
 ;
-; MEDIUM:       Relocation 2
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 3
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM2]]
-; MEDIUM-NEXT:  'r_type', 0x00000030
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM2:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM2]]
 ;
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing function-scoped variable si.
 ;
-; LARGE:       Relocation 2
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 3
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM2]]
-; LARGE-NEXT:  'r_type', 0x00000040
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM2:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM2]]
 
 @gi = global i32 5, align 4
 
@@ -88,26 +66,14 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing file-scope variable gi.
 ;
-; MEDIUM:       Relocation 4
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 5
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM3]]
-; MEDIUM-NEXT:  'r_type', 0x00000030
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM3:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM3]]
 ;
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing file-scope variable gi.
 ;
-; LARGE:       Relocation 4
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 5
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM3]]
-; LARGE-NEXT:  'r_type', 0x00000040
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM3:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM3]]
 
 define double @test_double_const() nounwind {
 entry:
@@ -117,26 +83,14 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
 ; accessing a constant.
 ;
-; MEDIUM:       Relocation 6
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 7
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM4]]
-; MEDIUM-NEXT:  'r_type', 0x00000030
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO [[SYM4]]
 ;
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing a constant.
 ;
-; LARGE:       Relocation 6
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 7
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM4]]
-; LARGE-NEXT:  'r_type', 0x00000040
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM4]]
 
 define signext i32 @test_jump_table(i32 signext %i) nounwind {
 entry:
@@ -185,23 +139,11 @@ sw.epilog:                                        ; preds = %sw.bb3, %sw.default
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing a jump table address.
 ;
-; MEDIUM:       Relocation 8
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 9
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM5]]
-; MEDIUM-NEXT:  'r_type', 0x00000040
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
 ;
-; LARGE:       Relocation 8
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 9
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM5]]
-; LARGE-NEXT:  'r_type', 0x00000040
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
 
 @ti = common global i32 0, align 4
 
@@ -216,23 +158,11 @@ entry:
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing tentatively declared variable ti.
 ;
-; MEDIUM:       Relocation 10
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 11
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM6]]
-; MEDIUM-NEXT:  'r_type', 0x00000040
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
 ;
-; LARGE:       Relocation 10
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 11
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM6]]
-; LARGE-NEXT:  'r_type', 0x00000040
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
 
 define i8* @test_fnaddr() nounwind {
 entry:
@@ -248,21 +178,8 @@ declare signext i32 @foo(i32 signext)
 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
 ; accessing function address foo.
 ;
-; MEDIUM:       Relocation 12
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
-; MEDIUM-NEXT:  'r_type', 0x00000032
-; MEDIUM:       Relocation 13
-; MEDIUM-NEXT:  'r_offset'
-; MEDIUM-NEXT:  'r_sym', 0x[[SYM7]]
-; MEDIUM-NEXT:  'r_type', 0x00000040
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
+; MEDIUM-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
 ;
-; LARGE:       Relocation 12
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
-; LARGE-NEXT:  'r_type', 0x00000032
-; LARGE:       Relocation 13
-; LARGE-NEXT:  'r_offset'
-; LARGE-NEXT:  'r_sym', 0x[[SYM7]]
-; LARGE-NEXT:  'r_type', 0x00000040
-
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
+; LARGE-NEXT:      0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
diff --git a/test/CodeGen/PowerPC/optcmp.ll b/test/CodeGen/PowerPC/optcmp.ll
new file mode 100644
index 0000000..523f329
--- /dev/null
+++ b/test/CodeGen/PowerPC/optcmp.ll
@@ -0,0 +1,143 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -disable-ppc-cmp-opt=0 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define signext i32 @foo(i32 signext %a, i32 signext %b, i32* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i32 %a, %b
+  store i32 %sub, i32* %c, align 4, !tbaa !0
+  %cmp = icmp sgt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+
+; CHECK: @foo
+; CHECK-NOT: subf.
+}
+
+define signext i32 @foo2(i32 signext %a, i32 signext %b, i32* nocapture %c) #0 {
+entry:
+  %shl = shl i32 %a, %b
+  store i32 %shl, i32* %c, align 4, !tbaa !0
+  %cmp = icmp sgt i32 %shl, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+
+; CHECK: @foo2
+; CHECK-NOT: slw.
+}
+
+define i64 @fool(i64 %a, i64 %b, i64* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i64 %a, %b
+  store i64 %sub, i64* %c, align 8, !tbaa !3
+  %cmp = icmp sgt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+
+; CHECK: @fool
+; CHECK: subf. [[REG:[0-9]+]], 4, 3
+; CHECK: isel 3, 3, 4, 1
+; CHECK: std [[REG]], 0(5)
+}
+
+define i64 @foolb(i64 %a, i64 %b, i64* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i64 %a, %b
+  store i64 %sub, i64* %c, align 8, !tbaa !3
+  %cmp = icmp sle i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+
+; CHECK: @foolb
+; CHECK: subf. [[REG:[0-9]+]], 4, 3
+; CHECK: isel 3, 4, 3, 1
+; CHECK: std [[REG]], 0(5)
+}
+
+define i64 @foolc(i64 %a, i64 %b, i64* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i64 %b, %a
+  store i64 %sub, i64* %c, align 8, !tbaa !3
+  %cmp = icmp sgt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+
+; CHECK: @foolc
+; CHECK: subf. [[REG:[0-9]+]], 3, 4
+; CHECK: isel 3, 3, 4, 0
+; CHECK: std [[REG]], 0(5)
+}
+
+define i64 @foold(i64 %a, i64 %b, i64* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i64 %b, %a
+  store i64 %sub, i64* %c, align 8, !tbaa !3
+  %cmp = icmp eq i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+
+; CHECK: @foold
+; CHECK: subf. [[REG:[0-9]+]], 3, 4
+; CHECK: isel 3, 3, 4, 2
+; CHECK: std [[REG]], 0(5)
+}
+
+define i64 @foold2(i64 %a, i64 %b, i64* nocapture %c) #0 {
+entry:
+  %sub = sub nsw i64 %a, %b
+  store i64 %sub, i64* %c, align 8, !tbaa !3
+  %cmp = icmp eq i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+
+; CHECK: @foold2
+; CHECK: subf. [[REG:[0-9]+]], 4, 3
+; CHECK: isel 3, 3, 4, 2
+; CHECK: std [[REG]], 0(5)
+}
+
+define i64 @foo2l(i64 %a, i64 %b, i64* nocapture %c) #0 {
+entry:
+  %shl = shl i64 %a, %b
+  store i64 %shl, i64* %c, align 8, !tbaa !3
+  %cmp = icmp sgt i64 %shl, 0
+  %conv1 = zext i1 %cmp to i64
+  ret i64 %conv1
+
+; CHECK: @foo2l
+; CHECK: sld. 4, 3, 4
+; CHECK: std 4, 0(5)
+}
+
+define double @food(double %a, double %b, double* nocapture %c) #0 {
+entry:
+  %sub = fsub double %a, %b
+  store double %sub, double* %c, align 8, !tbaa !3
+  %cmp = fcmp ogt double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+
+; CHECK: @food
+; CHECK-NOT: fsub. 0, 1, 2
+; CHECK: stfd 0, 0(5)
+}
+
+define float @foof(float %a, float %b, float* nocapture %c) #0 {
+entry:
+  %sub = fsub float %a, %b
+  store float %sub, float* %c, align 4, !tbaa !3
+  %cmp = fcmp ogt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+
+; CHECK: @foof
+; CHECK-NOT: fsubs. 0, 1, 2
+; CHECK: stfs 0, 0(5)
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"long", metadata !1}
+!4 = metadata !{metadata !"any pointer", metadata !1}
+
diff --git a/test/CodeGen/PowerPC/pr15359.ll b/test/CodeGen/PowerPC/pr15359.ll
index 12fa3e5..df02dfc 100644
--- a/test/CodeGen/PowerPC/pr15359.ll
+++ b/test/CodeGen/PowerPC/pr15359.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 -mcpu=pwr7 -filetype=obj %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s
+; RUN: llvm-readobj -t | FileCheck %s
 
 target datalayout = "E-p:64:64:64-S0-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
@@ -14,7 +14,9 @@ entry:
 
 ; Verify that nextIdx has symbol type TLS.
 ;
-; CHECK:    '.symtab'
-; CHECK:    'nextIdx'
-; CHECK:    'st_type', 0x6
-
+; CHECK:      Symbol {
+; CHECK:        Name: nextIdx
+; CHECK-NEXT:   Value:
+; CHECK-NEXT:   Size:
+; CHECK-NEXT:   Binding:
+; CHECK-NEXT:   Type: TLS
diff --git a/test/CodeGen/PowerPC/rounding-ops.ll b/test/CodeGen/PowerPC/rounding-ops.ll
index b210a6b..2b5e1c9 100644
--- a/test/CodeGen/PowerPC/rounding-ops.ll
+++ b/test/CodeGen/PowerPC/rounding-ops.ll
@@ -107,9 +107,10 @@ define double @test10(double %x) nounwind  {
 
 declare double @trunc(double) nounwind readnone
 
-define float @test11(float %x) nounwind  {
+define void @test11(float %x, float* %y) nounwind  {
   %call = tail call float @rintf(float %x) nounwind readnone
-  ret float %call
+  store float %call, float* %y
+  ret void
 
 ; CHECK: test11:
 ; CHECK-NOT: frin
@@ -125,9 +126,10 @@ define float @test11(float %x) nounwind  {
 
 declare float @rintf(float) nounwind readnone
 
-define double @test12(double %x) nounwind  {
+define void @test12(double %x, double* %y) nounwind  {
   %call = tail call double @rint(double %x) nounwind readnone
-  ret double %call
+  store double %call, double* %y
+  ret void
 
 ; CHECK: test12:
 ; CHECK-NOT: frin
diff --git a/test/CodeGen/PowerPC/s000-alias-misched.ll b/test/CodeGen/PowerPC/s000-alias-misched.ll
index d03ee87..3570a11 100644
--- a/test/CodeGen/PowerPC/s000-alias-misched.ll
+++ b/test/CodeGen/PowerPC/s000-alias-misched.ll
@@ -37,34 +37,34 @@ for.body4:                                        ; preds = %for.body4, %for.con
   %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv
   %arrayidx6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv
   %0 = bitcast double* %arrayidx to <1 x double>*
-  %1 = load <1 x double>* %0, align 32, !tbaa !0
+  %1 = load <1 x double>* %0, align 32
   %add = fadd <1 x double> %1, <double 1.000000e+00>
   %2 = bitcast double* %arrayidx6 to <1 x double>*
-  store <1 x double> %add, <1 x double>* %2, align 32, !tbaa !0
+  store <1 x double> %add, <1 x double>* %2, align 32
   %indvars.iv.next.322 = or i64 %indvars.iv, 4
   %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.322
   %arrayidx6.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.322
   %3 = bitcast double* %arrayidx.4 to <1 x double>*
-  %4 = load <1 x double>* %3, align 32, !tbaa !0
+  %4 = load <1 x double>* %3, align 32
   %add.4 = fadd <1 x double> %4, <double 1.000000e+00>
   %5 = bitcast double* %arrayidx6.4 to <1 x double>*
-  store <1 x double> %add.4, <1 x double>* %5, align 32, !tbaa !0
+  store <1 x double> %add.4, <1 x double>* %5, align 32
   %indvars.iv.next.726 = or i64 %indvars.iv, 8
   %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.726
   %arrayidx6.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.726
   %6 = bitcast double* %arrayidx.8 to <1 x double>*
-  %7 = load <1 x double>* %6, align 32, !tbaa !0
+  %7 = load <1 x double>* %6, align 32
   %add.8 = fadd <1 x double> %7, <double 1.000000e+00>
   %8 = bitcast double* %arrayidx6.8 to <1 x double>*
-  store <1 x double> %add.8, <1 x double>* %8, align 32, !tbaa !0
+  store <1 x double> %add.8, <1 x double>* %8, align 32
   %indvars.iv.next.1130 = or i64 %indvars.iv, 12
   %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1130
   %arrayidx6.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1130
   %9 = bitcast double* %arrayidx.12 to <1 x double>*
-  %10 = load <1 x double>* %9, align 32, !tbaa !0
+  %10 = load <1 x double>* %9, align 32
   %add.12 = fadd <1 x double> %10, <double 1.000000e+00>
   %11 = bitcast double* %arrayidx6.12 to <1 x double>*
-  store <1 x double> %add.12, <1 x double>* %11, align 32, !tbaa !0
+  store <1 x double> %add.12, <1 x double>* %11, align 32
   %indvars.iv.next.15 = add i64 %indvars.iv, 16
   %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
   %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000
@@ -95,7 +95,3 @@ for.end10:                                        ; preds = %for.end
 declare i64 @clock() nounwind
 
 declare signext i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double)
-
-!0 = metadata !{metadata !"double", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/stubs.ll b/test/CodeGen/PowerPC/stubs.ll
index cfcc50b..694f208 100644
--- a/test/CodeGen/PowerPC/stubs.ll
+++ b/test/CodeGen/PowerPC/stubs.ll
@@ -6,16 +6,16 @@ entry:
 }
 
 ; CHECK: _test1:
-; CHECK: bl ___floatditf$stub
+; CHECK: bl L___floatditf$stub
 ; CHECK: 	.section	__TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
-; CHECK: ___floatditf$stub:
+; CHECK: L___floatditf$stub:
 ; CHECK: 	.indirect_symbol ___floatditf
-; CHECK: 	lis r11, ha16(___floatditf$lazy_ptr)
-; CHECK: 	lwzu r12, lo16(___floatditf$lazy_ptr)(r11)
+; CHECK: 	lis r11, ha16(L___floatditf$lazy_ptr)
+; CHECK: 	lwzu r12, lo16(L___floatditf$lazy_ptr)(r11)
 ; CHECK: 	mtctr r12
 ; CHECK: 	bctr
 ; CHECK: 	.section	__DATA,__la_symbol_ptr,lazy_symbol_pointers
-; CHECK: ___floatditf$lazy_ptr:
+; CHECK: L___floatditf$lazy_ptr:
 ; CHECK: 	.indirect_symbol ___floatditf
 ; CHECK: 	.long dyld_stub_binding_helper
 
diff --git a/test/CodeGen/PowerPC/stwu-gta.ll b/test/CodeGen/PowerPC/stwu-gta.ll
index 4febe7e..980c1d5 100644
--- a/test/CodeGen/PowerPC/stwu-gta.ll
+++ b/test/CodeGen/PowerPC/stwu-gta.ll
@@ -8,15 +8,11 @@ target triple = "powerpc-unknown-linux"
 
 define void @_GLOBAL__I_a() nounwind section ".text.startup" {
 entry:
-  store i32 5, i32* getelementptr inbounds (%class.Two.0.5* @foo, i32 0, i32 0), align 4, !tbaa !0
-  store i32 6, i32* getelementptr inbounds (%class.Two.0.5* @foo, i32 0, i32 1), align 4, !tbaa !0
+  store i32 5, i32* getelementptr inbounds (%class.Two.0.5* @foo, i32 0, i32 0), align 4
+  store i32 6, i32* getelementptr inbounds (%class.Two.0.5* @foo, i32 0, i32 1), align 4
   ret void
 }
 
 ; CHECK: @_GLOBAL__I_a
 ; CHECK-NOT: stwux
 ; CHECK: stwu
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/stwu8.ll b/test/CodeGen/PowerPC/stwu8.ll
index e0bd043..b220af2 100644
--- a/test/CodeGen/PowerPC/stwu8.ll
+++ b/test/CodeGen/PowerPC/stwu8.ll
@@ -14,7 +14,7 @@ entry:
   %_M_header.i.i.i.i.i.i = getelementptr inbounds %class.spell_checker.21.103.513.538* %this, i64 0, i32 0, i32 0, i32 0, i32 1
   %0 = bitcast %"struct.std::_Rb_tree_node_base.17.99.509.534"* %_M_header.i.i.i.i.i.i to i8*
   call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 4, i1 false) nounwind
-  store %"struct.std::_Rb_tree_node_base.17.99.509.534"* %_M_header.i.i.i.i.i.i, %"struct.std::_Rb_tree_node_base.17.99.509.534"** undef, align 8, !tbaa !0
+  store %"struct.std::_Rb_tree_node_base.17.99.509.534"* %_M_header.i.i.i.i.i.i, %"struct.std::_Rb_tree_node_base.17.99.509.534"** undef, align 8
   unreachable
 }
 
@@ -22,7 +22,3 @@ entry:
 ; CHECK: stwu
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/tls-gd-obj.ll b/test/CodeGen/PowerPC/tls-gd-obj.ll
index 00b537d..ffc0db0 100644
--- a/test/CodeGen/PowerPC/tls-gd-obj.ll
+++ b/test/CodeGen/PowerPC/tls-gd-obj.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s
+; RUN: llvm-readobj -r | FileCheck %s
 
 ; Test correct relocation generation for thread-local storage using
 ; the general dynamic model and integrated assembly.
@@ -21,21 +21,11 @@ entry:
 ; and R_PPC64_TLSGD for accessing external variable a, and R_PPC64_REL24
 ; for the call to __tls_get_addr.
 ;
-; CHECK:       '.rela.text'
-; CHECK:       Relocation 0
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
-; CHECK-NEXT:  'r_type', 0x00000052
-; CHECK:       Relocation 1
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x00000050
-; CHECK:       Relocation 2
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x0000006b
-; CHECK:       Relocation 3
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x{{[0-9a-f]+}}
-; CHECK-NEXT:  'r_type', 0x0000000a
-
+; CHECK: Relocations [
+; CHECK:   Section (1) .text {
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_HA a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSGD16_LO a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSGD          a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_REL24          __tls_get_addr
+; CHECK:   }
+; CHECK: ]
diff --git a/test/CodeGen/PowerPC/tls-ie-obj.ll b/test/CodeGen/PowerPC/tls-ie-obj.ll
index 3600cc5..0f7a352 100644
--- a/test/CodeGen/PowerPC/tls-ie-obj.ll
+++ b/test/CodeGen/PowerPC/tls-ie-obj.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mcpu=pwr7 -O0 -filetype=obj %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s
+; RUN: llvm-readobj -r | FileCheck %s
 
 ; Test correct relocation generation for thread-local storage
 ; using the initial-exec model and integrated assembly.
@@ -20,17 +20,10 @@ entry:
 ; Verify generation of R_PPC64_GOT_TPREL16_DS and R_PPC64_TLS for
 ; accessing external variable a.
 ;
-; CHECK:       '.rela.text'
-; CHECK:       Relocation 0
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
-; CHECK-NEXT:  'r_type', 0x0000005a
-; CHECK:       Relocation 1
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x00000058
-; CHECK:       Relocation 2
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x00000043
-
+; CHECK: Relocations [
+; CHECK:   Section (1) .text {
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_HA    a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TPREL16_LO_DS a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLS               a
+; CHECK:   }
+; CHECK: ]
diff --git a/test/CodeGen/PowerPC/tls-ld-obj.ll b/test/CodeGen/PowerPC/tls-ld-obj.ll
index c521ae4..29ee87684 100644
--- a/test/CodeGen/PowerPC/tls-ld-obj.ll
+++ b/test/CodeGen/PowerPC/tls-ld-obj.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s
+; RUN: llvm-readobj -r | FileCheck %s
 
 ; Test correct relocation generation for thread-local storage using
 ; the local dynamic model.
@@ -22,29 +22,13 @@ entry:
 ; accessing external variable a, and R_PPC64_REL24 for the call to
 ; __tls_get_addr.
 ;
-; CHECK:       '.rela.text'
-; CHECK:       Relocation 0
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
-; CHECK-NEXT:  'r_type', 0x00000056
-; CHECK:       Relocation 1
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x00000054
-; CHECK:       Relocation 2
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x0000006c
-; CHECK:       Relocation 3
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x{{[0-9a-f]+}}
-; CHECK-NEXT:  'r_type', 0x0000000a
-; CHECK:       Relocation 4
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x0000004d
-; CHECK:       Relocation 5
-; CHECK-NEXT:  'r_offset'
-; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
-; CHECK-NEXT:  'r_type', 0x0000004b
-
+; CHECK: Relocations [
+; CHECK:   Section (1) .text {
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_HA a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_GOT_TLSLD16_LO a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_TLSLD          a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_REL24          __tls_get_addr
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_DTPREL16_HA    a
+; CHECK:     0x{{[0-9,A-F]+}} R_PPC64_DTPREL16_LO    a
+; CHECK:   }
+; CHECK: ]
diff --git a/test/CodeGen/R600/README b/test/CodeGen/R600/README
new file mode 100644
index 0000000..96998bb
--- /dev/null
+++ b/test/CodeGen/R600/README
@@ -0,0 +1,21 @@
++==============================================================================+
+| How to organize the lit tests                                                |
++==============================================================================+
+
+- If you write a test for matching a single DAG opcode or intrinsic, it should
+  go in a file called {opcode_name,intrinsic_name}.ll (e.g. fadd.ll)
+
+- If you write a test that matches several DAG opcodes and checks for a single
+  ISA instruction, then that test should go in a file called {ISA_name}.ll (e.g.
+  bfi_int.ll
+
+- For all other tests, use your best judgement for organizing tests and naming
+  the files.
+
++==============================================================================+
+| Naming conventions                                                           |
++==============================================================================+
+
+- Use dash '-' and not underscore '_' to separate words in file names, unless
+  the file is named after a DAG opcode or ISA instruction that has an
+  underscore '_' in its name.
diff --git a/test/CodeGen/R600/add.v4i32.ll b/test/CodeGen/R600/add.ll
index ac4a874..185998b 100644
--- a/test/CodeGen/R600/add.v4i32.ll
+++ b/test/CodeGen/R600/add.ll
@@ -1,9 +1,9 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/alu-split.ll b/test/CodeGen/R600/alu-split.ll
index afefcd9..48496f6 100644
--- a/test/CodeGen/R600/alu-split.ll
+++ b/test/CodeGen/R600/alu-split.ll
@@ -4,6 +4,7 @@
 ;CHECK: ALU
 ;CHECK: ALU
 ;CHECK-NOT: ALU
+;CHECK: CF_END
 
 define void @main() #0 {
 main_body:
diff --git a/test/CodeGen/R600/and.v4i32.ll b/test/CodeGen/R600/and.ll
index 662085e..166af2d 100644
--- a/test/CodeGen/R600/and.v4i32.ll
+++ b/test/CodeGen/R600/and.ll
@@ -1,9 +1,9 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/bfe_uint.ll b/test/CodeGen/R600/bfe_uint.ll
new file mode 100644
index 0000000..92570c3
--- /dev/null
+++ b/test/CodeGen/R600/bfe_uint.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @bfe_def
+; CHECK: BFE_UINT
+define void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
+entry:
+  %0 = lshr i32 %x, 5
+  %1 = and i32 %0, 15 ; 0xf
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; This program could be implemented using a BFE_UINT instruction, however
+; since the lshr constant + number of bits in the mask is >= 32, it can also be
+; implmented with a LSHR instruction, which is better, because LSHR has less
+; operands and requires less constants.
+
+; CHECK: @bfe_shift
+; CHECK-NOT: BFE_UINT
+define void @bfe_shift(i32 addrspace(1)* %out, i32 %x) {
+entry:
+  %0 = lshr i32 %x, 16
+  %1 = and i32 %0, 65535 ; 0xffff
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll
new file mode 100644
index 0000000..4244dcf
--- /dev/null
+++ b/test/CodeGen/R600/bfi_int.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s
+
+; BFI_INT Definition pattern from ISA docs
+; (y & x) | (z & ~x)
+;
+; R600-CHECK: @bfi_def
+; R600-CHECK: BFI_INT
+; SI-CHECK:   @bfi_def
+; SI-CHECK:   V_BFI_B32
+define void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+entry:
+  %0 = xor i32 %x, -1
+  %1 = and i32 %z, %0
+  %2 = and i32 %y, %x
+  %3 = or i32 %1, %2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; SHA-256 Ch function
+; z ^ (x & (y ^ z))
+; R600-CHECK: @bfi_sha256_ch
+; R600-CHECK: BFI_INT
+; SI-CHECK:   @bfi_sha256_ch
+; SI-CHECK:   V_BFI_B32
+define void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+entry:
+  %0 = xor i32 %y, %z
+  %1 = and i32 %x, %0
+  %2 = xor i32 %z, %1
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; SHA-256 Ma function
+; ((x & z) | (y & (x | z)))
+; R600-CHECK: @bfi_sha256_ma
+; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV.x}}, {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; SI-CHECK: V_XOR_B32_e32 [[DST:VGPR[0-9]+]], {{VGPR[0-9]+, VGPR[0-9]+}}
+; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{VGPR[0-9]+, VGPR[0-9]+}}
+
+define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+entry:
+  %0 = and i32 %x, %z
+  %1 = or i32 %x, %z
+  %2 = and i32 %y, %1
+  %3 = or i32 %0, %2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/call_fs.ll b/test/CodeGen/R600/call_fs.ll
new file mode 100644
index 0000000..e152bf6
--- /dev/null
+++ b/test/CodeGen/R600/call_fs.ll
@@ -0,0 +1,15 @@
+
+; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600-CHECK %s
+
+; EG-CHECK: @call_fs
+; EG-CHECK: CALL_FS  ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84]
+; R600-CHECK: @call_fs
+; R600-CHECK:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89]
+
+
+define void @call_fs() #0 {
+  ret void
+}
+
+attributes #0 = { "ShaderType"="1" } ; Vertex Shader
diff --git a/test/CodeGen/R600/cf_end.ll b/test/CodeGen/R600/cf_end.ll
new file mode 100644
index 0000000..138004d
--- /dev/null
+++ b/test/CodeGen/R600/cf_end.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood --show-mc-encoding | FileCheck --check-prefix=EG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=caicos --show-mc-encoding | FileCheck --check-prefix=EG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=cayman --show-mc-encoding | FileCheck --check-prefix=CM-CHECK %s
+
+; EG-CHECK: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x20,0x80]
+; CM-CHECK: CF_END ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x88]
+define void @eop() {
+  ret void
+}
diff --git a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
index fd958b3..6607c12 100644
--- a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
+++ b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
@@ -8,7 +8,7 @@
 
 
 ; CHECK: @sint
-; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
@@ -22,7 +22,7 @@ entry:
 }
 
 ;CHECK: @uint
-;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll
index 09baee7..012c17b 100644
--- a/test/CodeGen/R600/disconnected-predset-break-bug.ll
+++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll
@@ -6,7 +6,7 @@
 
 ; CHECK: @loop_ge
 ; CHECK: LOOP_START_DX10
-; CHECK: PRED_SET
+; CHECK: ALU_PUSH_BEFORE
 ; CHECK-NEXT: JUMP
 ; CHECK-NEXT: LOOP_BREAK
 define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind {
diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll
new file mode 100644
index 0000000..f460f13
--- /dev/null
+++ b/test/CodeGen/R600/elf.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=SI -o - | FileCheck --check-prefix=CONFIG-CHECK %s
+
+; ELF-CHECK: Format: ELF32
+; ELF-CHECK: Name: .AMDGPU.config
+; ELF-CHECK: Type: SHT_PROGBITS
+
+; CONFIG-CHECK: .section .AMDGPU.config
+; CONFIG-CHECK-NEXT: .long   45096
+; CONFIG-CHECK-NEXT: .long   0
+define void @test(i32 %p) #0 {
+   %i = add i32 %p, 2
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" } ; Pixel Shader
diff --git a/test/CodeGen/R600/elf.r600.ll b/test/CodeGen/R600/elf.r600.ll
new file mode 100644
index 0000000..0590efb
--- /dev/null
+++ b/test/CodeGen/R600/elf.r600.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=redwood -o - | FileCheck --check-prefix=CONFIG-CHECK %s
+
+; ELF-CHECK: Format: ELF32
+; ELF-CHECK: Name: .AMDGPU.config
+
+; CONFIG-CHECK: .section .AMDGPU.config
+; CONFIG-CHECK-NEXT: .long   166100
+; CONFIG-CHECK-NEXT: .long   258
+; CONFIG-CHECK-NEXT: .long   165900
+; CONFIG-CHECK-NEXT: .long   0
+define void @test(float addrspace(1)* %out, i32 %p) {
+   %i = add i32 %p, 2
+   %r = bitcast i32 %i to float
+   store float %r, float addrspace(1)* %out
+   ret void
+}
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
index 0407533..85f2882 100644
--- a/test/CodeGen/R600/fabs.ll
+++ b/test/CodeGen/R600/fabs.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: MOV T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}}
+;CHECK: MOV * T{{[0-9]+\.[XYZW], \|T[0-9]+\.[XYZW]\|}}
 
 define void @test() {
    %r0 = call float @llvm.R600.load.input(i32 0)
diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll
index d7d1b65..9a67232 100644
--- a/test/CodeGen/R600/fadd.ll
+++ b/test/CodeGen/R600/fadd.ll
@@ -1,8 +1,9 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: @fadd_f32
+; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
+define void @fadd_f32() {
    %r0 = call float @llvm.R600.load.input(i32 0)
    %r1 = call float @llvm.R600.load.input(i32 1)
    %r2 = fadd float %r0, %r1
@@ -14,3 +15,17 @@ declare float @llvm.R600.load.input(i32) readnone
 
 declare void @llvm.AMDGPU.store.output(float, i32)
 
+; CHECK: @fadd_v4f32
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fadd <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fadd.v4f32.ll b/test/CodeGen/R600/fadd.v4f32.ll
deleted file mode 100644
index 85dbfd5..0000000
--- a/test/CodeGen/R600/fadd.v4f32.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1) * %in
-  %b = load <4 x float> addrspace(1) * %b_ptr
-  %result = fadd <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/fcmp-cnd.ll b/test/CodeGen/R600/fcmp-cnd.ll
index a94cfb5..7373a21 100644
--- a/test/CodeGen/R600/fcmp-cnd.ll
+++ b/test/CodeGen/R600/fcmp-cnd.ll
@@ -2,7 +2,7 @@
 
 ;Not checking arguments 2 and 3 to CNDE, because they may change between
 ;registers and literal.x depending on what the optimizer does.
-;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll
index 37f621d..dc3a779 100644
--- a/test/CodeGen/R600/fcmp.ll
+++ b/test/CodeGen/R600/fcmp.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ; CHECK: @fcmp_sext
-; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
@@ -19,7 +19,8 @@ entry:
 ; SET* + FP_TO_SINT
 
 ; CHECK: @fcmp_br
-; CHECK: SET{{[N]*}}E_DX10 T{{[0-9]+\.[XYZW], [a-zA-Z0-9, .]+}}(5.0
+; CHECK: SET{{[N]*}}E_DX10 * T{{[0-9]+\.[XYZW],}}
+; CHECK-NEXT {{[0-9]+(5.0}}
 
 define void @fcmp_br(i32 addrspace(1)* %out, float %in) {
 entry:
diff --git a/test/CodeGen/R600/fdiv.v4f32.ll b/test/CodeGen/R600/fdiv.ll
index 79e677f..2e68e36 100644
--- a/test/CodeGen/R600/fdiv.v4f32.ll
+++ b/test/CodeGen/R600/fdiv.ll
@@ -1,13 +1,13 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/floor.ll b/test/CodeGen/R600/floor.ll
index 845330f..877d69a 100644
--- a/test/CodeGen/R600/floor.ll
+++ b/test/CodeGen/R600/floor.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: FLOOR T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test() {
    %r0 = call float @llvm.R600.load.input(i32 0)
diff --git a/test/CodeGen/R600/fmad.ll b/test/CodeGen/R600/fmad.ll
index a3d4d0f..62001ed 100644
--- a/test/CodeGen/R600/fmad.ll
+++ b/test/CodeGen/R600/fmad.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: MULADD_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MULADD_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test() {
    %r0 = call float @llvm.R600.load.input(i32 0)
diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll
index 3708f0b..8b704e5 100644
--- a/test/CodeGen/R600/fmax.ll
+++ b/test/CodeGen/R600/fmax.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: MAX T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test() {
    %r0 = call float @llvm.R600.load.input(i32 0)
diff --git a/test/CodeGen/R600/fmin.ll b/test/CodeGen/R600/fmin.ll
index 19d59ab..5e34b7c 100644
--- a/test/CodeGen/R600/fmin.ll
+++ b/test/CodeGen/R600/fmin.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: MIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test() {
    %r0 = call float @llvm.R600.load.input(i32 0)
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll
index eb1d523..c292946 100644
--- a/test/CodeGen/R600/fmul.ll
+++ b/test/CodeGen/R600/fmul.ll
@@ -1,8 +1,9 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: @fmul_f32
+; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-define void @test() {
+define void @fmul_f32() {
    %r0 = call float @llvm.R600.load.input(i32 0)
    %r1 = call float @llvm.R600.load.input(i32 1)
    %r2 = fmul float %r0, %r1
@@ -14,3 +15,17 @@ declare float @llvm.R600.load.input(i32) readnone
 
 declare void @llvm.AMDGPU.store.output(float, i32)
 
+; CHECK: @fmul_v4f32
+; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fmul <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fmul.v4f32.ll b/test/CodeGen/R600/fmul.v4f32.ll
index 6d44a0c..74a58f7 100644
--- a/test/CodeGen/R600/fmul.v4f32.ll
+++ b/test/CodeGen/R600/fmul.v4f32.ll
@@ -1,9 +1,9 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll
new file mode 100644
index 0000000..f5716e1
--- /dev/null
+++ b/test/CodeGen/R600/fp_to_sint.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @fp_to_sint_v4i32
+; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %value = load <4 x float> addrspace(1) * %in
+  %result = fptosi <4 x float> %value to <4 x i32>
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll
new file mode 100644
index 0000000..1c3c0c6
--- /dev/null
+++ b/test/CodeGen/R600/fp_to_uint.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @fp_to_uint_v4i32
+; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %value = load <4 x float> addrspace(1) * %in
+  %result = fptoui <4 x float> %value to <4 x i32>
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll
index 591aa52..f784cde 100644
--- a/test/CodeGen/R600/fsub.ll
+++ b/test/CodeGen/R600/fsub.ll
@@ -1,8 +1,9 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; CHECK: @fsub_f32
+; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
 
-define void @test() {
+define void @fsub_f32() {
    %r0 = call float @llvm.R600.load.input(i32 0)
    %r1 = call float @llvm.R600.load.input(i32 1)
    %r2 = fsub float %r0, %r1
@@ -14,3 +15,17 @@ declare float @llvm.R600.load.input(i32) readnone
 
 declare void @llvm.AMDGPU.store.output(float, i32)
 
+; CHECK: @fsub_v4f32
+; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
+  %a = load <4 x float> addrspace(1) * %in
+  %b = load <4 x float> addrspace(1) * %b_ptr
+  %result = fsub <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fsub.v4f32.ll b/test/CodeGen/R600/fsub.v4f32.ll
deleted file mode 100644
index 612a57e..0000000
--- a/test/CodeGen/R600/fsub.v4f32.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1) * %in
-  %b = load <4 x float> addrspace(1) * %b_ptr
-  %result = fsub <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/i8_to_double_to_float.ll b/test/CodeGen/R600/i8-to-double-to-float.ll
index 39f3322..6047466 100644
--- a/test/CodeGen/R600/i8_to_double_to_float.ll
+++ b/test/CodeGen/R600/i8-to-double-to-float.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) {
   %1 = load i8 addrspace(1)* %in
diff --git a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
index 71705a6..e3005fe8 100644
--- a/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
+++ b/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
@@ -3,7 +3,7 @@
 ;Test that a select with reversed True/False values is correctly lowered
 ;to a SETNE_INT.  There should only be one SETNE_INT instruction.
 
-;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: SETNE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;CHECK-NOT: SETNE_INT
 
 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll
new file mode 100644
index 0000000..979efb0
--- /dev/null
+++ b/test/CodeGen/R600/imm.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+; Use a 64-bit value with lo bits that can be represented as an inline constant
+; CHECK: @i64_imm_inline_lo
+; CHECK: S_MOV_B32 [[LO:SGPR[0-9]+]], 5
+; CHECK: V_MOV_B32_e32 [[LO_VGPR:VGPR[0-9]+]], [[LO]]
+; CHECK: BUFFER_STORE_DWORDX2 [[LO_VGPR]]_
+define void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
+entry:
+  store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005
+  ret void
+}
+
+; Use a 64-bit value with hi bits that can be represented as an inline constant
+; CHECK: @i64_imm_inline_hi
+; CHECK: S_MOV_B32 [[HI:SGPR[0-9]+]], 5
+; CHECK: V_MOV_B32_e32 [[HI_VGPR:VGPR[0-9]+]], [[HI]]
+; CHECK: BUFFER_STORE_DWORDX2 {{VGPR[0-9]+}}_[[HI_VGPR]]
+define void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
+entry:
+  store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678
+  ret void
+}
diff --git a/test/CodeGen/R600/jump_address.ll b/test/CodeGen/R600/jump-address.ll
index cd35bff..ae9c8bb 100644
--- a/test/CodeGen/R600/jump_address.ll
+++ b/test/CodeGen/R600/jump-address.ll
@@ -1,6 +1,8 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK: JUMP @4
+; CHECK: JUMP @3
+; CHECK: EXPORT
+; CHECK-NOT: EXPORT
 
 define void @main() #0 {
 main_body:
diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
index e69f64e..21e5d4c 100644
--- a/test/CodeGen/R600/literals.ll
+++ b/test/CodeGen/R600/literals.ll
@@ -7,7 +7,8 @@
 ; ADD_INT literal.x REG, 5
 
 ; CHECK: @i32_literal
-; CHECK: ADD_INT {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} 5
+; CHECK: ADD_INT * {{[A-Z0-9,. ]*}}literal.x
+; CHECK-NEXT: 5
 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = add i32 5, %in
@@ -22,7 +23,8 @@ entry:
 ; ADD literal.x REG, 5.0
 
 ; CHECK: @float_literal
-; CHECK: ADD {{[A-Z0-9,. ]*}}literal.x,{{[A-Z0-9,. ]*}} {{[0-9]+}}(5.0
+; CHECK: ADD * {{[A-Z0-9,. ]*}}literal.x
+; CHECK-NEXT: 1084227584(5.0
 define void @float_literal(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fadd float 5.0, %in
@@ -30,3 +32,168 @@ entry:
   ret void
 }
 
+; CHECK: @main
+; CHECK: -2147483648
+; CHECK-NEXT-NOT: -2147483648
+
+define void @main() #0 {
+main_body:
+  %0 = call float @llvm.R600.load.input(i32 4)
+  %1 = call float @llvm.R600.load.input(i32 5)
+  %2 = call float @llvm.R600.load.input(i32 6)
+  %3 = call float @llvm.R600.load.input(i32 7)
+  %4 = call float @llvm.R600.load.input(i32 8)
+  %5 = call float @llvm.R600.load.input(i32 9)
+  %6 = call float @llvm.R600.load.input(i32 10)
+  %7 = call float @llvm.R600.load.input(i32 11)
+  %8 = call float @llvm.R600.load.input(i32 12)
+  %9 = call float @llvm.R600.load.input(i32 13)
+  %10 = call float @llvm.R600.load.input(i32 14)
+  %11 = call float @llvm.R600.load.input(i32 15)
+  %12 = load <4 x float> addrspace(8)* null
+  %13 = extractelement <4 x float> %12, i32 0
+  %14 = fsub float -0.000000e+00, %13
+  %15 = fadd float %0, %14
+  %16 = load <4 x float> addrspace(8)* null
+  %17 = extractelement <4 x float> %16, i32 1
+  %18 = fsub float -0.000000e+00, %17
+  %19 = fadd float %1, %18
+  %20 = load <4 x float> addrspace(8)* null
+  %21 = extractelement <4 x float> %20, i32 2
+  %22 = fsub float -0.000000e+00, %21
+  %23 = fadd float %2, %22
+  %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %25 = extractelement <4 x float> %24, i32 0
+  %26 = fmul float %25, %0
+  %27 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %28 = extractelement <4 x float> %27, i32 1
+  %29 = fmul float %28, %0
+  %30 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %31 = extractelement <4 x float> %30, i32 2
+  %32 = fmul float %31, %0
+  %33 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %34 = extractelement <4 x float> %33, i32 3
+  %35 = fmul float %34, %0
+  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %37 = extractelement <4 x float> %36, i32 0
+  %38 = fmul float %37, %1
+  %39 = fadd float %38, %26
+  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %41 = extractelement <4 x float> %40, i32 1
+  %42 = fmul float %41, %1
+  %43 = fadd float %42, %29
+  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %45 = extractelement <4 x float> %44, i32 2
+  %46 = fmul float %45, %1
+  %47 = fadd float %46, %32
+  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %49 = extractelement <4 x float> %48, i32 3
+  %50 = fmul float %49, %1
+  %51 = fadd float %50, %35
+  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %53 = extractelement <4 x float> %52, i32 0
+  %54 = fmul float %53, %2
+  %55 = fadd float %54, %39
+  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %57 = extractelement <4 x float> %56, i32 1
+  %58 = fmul float %57, %2
+  %59 = fadd float %58, %43
+  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %61 = extractelement <4 x float> %60, i32 2
+  %62 = fmul float %61, %2
+  %63 = fadd float %62, %47
+  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %65 = extractelement <4 x float> %64, i32 3
+  %66 = fmul float %65, %2
+  %67 = fadd float %66, %51
+  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %69 = extractelement <4 x float> %68, i32 0
+  %70 = fmul float %69, %3
+  %71 = fadd float %70, %55
+  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %73 = extractelement <4 x float> %72, i32 1
+  %74 = fmul float %73, %3
+  %75 = fadd float %74, %59
+  %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %77 = extractelement <4 x float> %76, i32 2
+  %78 = fmul float %77, %3
+  %79 = fadd float %78, %63
+  %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %81 = extractelement <4 x float> %80, i32 3
+  %82 = fmul float %81, %3
+  %83 = fadd float %82, %67
+  %84 = insertelement <4 x float> undef, float %15, i32 0
+  %85 = insertelement <4 x float> %84, float %19, i32 1
+  %86 = insertelement <4 x float> %85, float %23, i32 2
+  %87 = insertelement <4 x float> %86, float 0.000000e+00, i32 3
+  %88 = insertelement <4 x float> undef, float %15, i32 0
+  %89 = insertelement <4 x float> %88, float %19, i32 1
+  %90 = insertelement <4 x float> %89, float %23, i32 2
+  %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 3
+  %92 = call float @llvm.AMDGPU.dp4(<4 x float> %87, <4 x float> %91)
+  %93 = call float @fabs(float %92)
+  %94 = call float @llvm.AMDGPU.rsq(float %93)
+  %95 = fmul float %15, %94
+  %96 = fmul float %19, %94
+  %97 = fmul float %23, %94
+  %98 = insertelement <4 x float> undef, float %4, i32 0
+  %99 = insertelement <4 x float> %98, float %5, i32 1
+  %100 = insertelement <4 x float> %99, float %6, i32 2
+  %101 = insertelement <4 x float> %100, float 0.000000e+00, i32 3
+  %102 = insertelement <4 x float> undef, float %4, i32 0
+  %103 = insertelement <4 x float> %102, float %5, i32 1
+  %104 = insertelement <4 x float> %103, float %6, i32 2
+  %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 3
+  %106 = call float @llvm.AMDGPU.dp4(<4 x float> %101, <4 x float> %105)
+  %107 = call float @fabs(float %106)
+  %108 = call float @llvm.AMDGPU.rsq(float %107)
+  %109 = fmul float %4, %108
+  %110 = fmul float %5, %108
+  %111 = fmul float %6, %108
+  %112 = insertelement <4 x float> undef, float %95, i32 0
+  %113 = insertelement <4 x float> %112, float %96, i32 1
+  %114 = insertelement <4 x float> %113, float %97, i32 2
+  %115 = insertelement <4 x float> %114, float 0.000000e+00, i32 3
+  %116 = insertelement <4 x float> undef, float %109, i32 0
+  %117 = insertelement <4 x float> %116, float %110, i32 1
+  %118 = insertelement <4 x float> %117, float %111, i32 2
+  %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 3
+  %120 = call float @llvm.AMDGPU.dp4(<4 x float> %115, <4 x float> %119)
+  %121 = fsub float -0.000000e+00, %120
+  %122 = fcmp uge float 0.000000e+00, %121
+  %123 = select i1 %122, float 0.000000e+00, float %121
+  %124 = insertelement <4 x float> undef, float %8, i32 0
+  %125 = insertelement <4 x float> %124, float %9, i32 1
+  %126 = insertelement <4 x float> %125, float 5.000000e-01, i32 2
+  %127 = insertelement <4 x float> %126, float 1.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %127, i32 60, i32 1)
+  %128 = insertelement <4 x float> undef, float %71, i32 0
+  %129 = insertelement <4 x float> %128, float %75, i32 1
+  %130 = insertelement <4 x float> %129, float %79, i32 2
+  %131 = insertelement <4 x float> %130, float %83, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %131, i32 0, i32 2)
+  %132 = insertelement <4 x float> undef, float %123, i32 0
+  %133 = insertelement <4 x float> %132, float %96, i32 1
+  %134 = insertelement <4 x float> %133, float %97, i32 2
+  %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %135, i32 1, i32 2)
+  ret void
+}
+
+; Function Attrs: readnone
+declare float @llvm.R600.load.input(i32) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+; Function Attrs: readonly
+declare float @fabs(float) #2
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.rsq(float) #1
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { readnone }
+attributes #2 = { readonly }
diff --git a/test/CodeGen/R600/llvm.AMDGPU.mul.ll b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
index 693eb27..cc0732b 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.mul.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.mul.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test() {
    %r0 = call float @llvm.R600.load.input(i32 0)
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
index fac957f..ff22a69 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: TRUNC T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: TRUNC * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test() {
    %r0 = call float @llvm.R600.load.input(i32 0)
diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
index bf0cdaa..e45722c 100644
--- a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
+++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
 
 ;CHECK: S_MOV_B32
 ;CHECK-NEXT: V_INTERP_MOV_F32
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll
index c724395..de06354 100644
--- a/test/CodeGen/R600/llvm.SI.sample.ll
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -1,21 +1,21 @@
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
 
-;CHECK: IMAGE_SAMPLE
-;CHECK: IMAGE_SAMPLE
-;CHECK: IMAGE_SAMPLE
-;CHECK: IMAGE_SAMPLE
-;CHECK: IMAGE_SAMPLE
-;CHECK: IMAGE_SAMPLE_C
-;CHECK: IMAGE_SAMPLE_C
-;CHECK: IMAGE_SAMPLE_C
-;CHECK: IMAGE_SAMPLE
-;CHECK: IMAGE_SAMPLE
-;CHECK: IMAGE_SAMPLE_C
-;CHECK: IMAGE_SAMPLE_C
-;CHECK: IMAGE_SAMPLE_C
-;CHECK: IMAGE_SAMPLE
-;CHECK: IMAGE_SAMPLE
-;CHECK: IMAGE_SAMPLE
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 15
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 3
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 2
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 1
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 4
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 5
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 9
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+}}, 6
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 10
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+}}, 12
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 7
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 11
+;CHECK: IMAGE_SAMPLE_C {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 13
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 14
+;CHECK: IMAGE_SAMPLE {{VGPR[0-9]+}}, 8
 
 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
@@ -34,54 +34,88 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1
    %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2
    %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3
-   %res1 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v1,
+   %res1 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v1,
       <8 x i32> undef, <4 x i32> undef, i32 1)
-   %res2 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v2,
+   %res2 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v2,
       <8 x i32> undef, <4 x i32> undef, i32 2)
-   %res3 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v3,
+   %res3 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v3,
       <8 x i32> undef, <4 x i32> undef, i32 3)
-   %res4 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v4,
+   %res4 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v4,
       <8 x i32> undef, <4 x i32> undef, i32 4)
-   %res5 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v5,
+   %res5 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v5,
       <8 x i32> undef, <4 x i32> undef, i32 5)
-   %res6 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v6,
+   %res6 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v6,
       <8 x i32> undef, <4 x i32> undef, i32 6)
-   %res7 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v7,
+   %res7 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v7,
       <8 x i32> undef, <4 x i32> undef, i32 7)
-   %res8 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v8,
+   %res8 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v8,
       <8 x i32> undef, <4 x i32> undef, i32 8)
-   %res9 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v9,
+   %res9 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v9,
       <8 x i32> undef, <4 x i32> undef, i32 9)
-   %res10 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v10,
+   %res10 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v10,
       <8 x i32> undef, <4 x i32> undef, i32 10)
-   %res11 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v11,
+   %res11 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v11,
       <8 x i32> undef, <4 x i32> undef, i32 11)
-   %res12 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v12,
+   %res12 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v12,
       <8 x i32> undef, <4 x i32> undef, i32 12)
-   %res13 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v13,
+   %res13 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v13,
       <8 x i32> undef, <4 x i32> undef, i32 13)
-   %res14 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v14,
+   %res14 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v14,
       <8 x i32> undef, <4 x i32> undef, i32 14)
-   %res15 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v15,
+   %res15 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v15,
       <8 x i32> undef, <4 x i32> undef, i32 15)
-   %res16 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v16,
+   %res16 = call <4 x float> @llvm.SI.sample.(<4 x i32> %v16,
       <8 x i32> undef, <4 x i32> undef, i32 16)
    %e1 = extractelement <4 x float> %res1, i32 0
-   %e2 = extractelement <4 x float> %res2, i32 0
-   %e3 = extractelement <4 x float> %res3, i32 0
-   %e4 = extractelement <4 x float> %res4, i32 0
-   %e5 = extractelement <4 x float> %res5, i32 0
-   %e6 = extractelement <4 x float> %res6, i32 0
-   %e7 = extractelement <4 x float> %res7, i32 0
-   %e8 = extractelement <4 x float> %res8, i32 0
-   %e9 = extractelement <4 x float> %res9, i32 0
-   %e10 = extractelement <4 x float> %res10, i32 0
-   %e11 = extractelement <4 x float> %res11, i32 0
-   %e12 = extractelement <4 x float> %res12, i32 0
-   %e13 = extractelement <4 x float> %res13, i32 0
-   %e14 = extractelement <4 x float> %res14, i32 0
-   %e15 = extractelement <4 x float> %res15, i32 0
-   %e16 = extractelement <4 x float> %res16, i32 0
+   %e2 = extractelement <4 x float> %res2, i32 1
+   %e3 = extractelement <4 x float> %res3, i32 2
+   %e4 = extractelement <4 x float> %res4, i32 3
+   %t0 = extractelement <4 x float> %res5, i32 0
+   %t1 = extractelement <4 x float> %res5, i32 1
+   %e5 = fadd float %t0, %t1
+   %t2 = extractelement <4 x float> %res6, i32 0
+   %t3 = extractelement <4 x float> %res6, i32 2
+   %e6 = fadd float %t2, %t3
+   %t4 = extractelement <4 x float> %res7, i32 0
+   %t5 = extractelement <4 x float> %res7, i32 3
+   %e7 = fadd float %t4, %t5
+   %t6 = extractelement <4 x float> %res8, i32 1
+   %t7 = extractelement <4 x float> %res8, i32 2
+   %e8 = fadd float %t6, %t7
+   %t8 = extractelement <4 x float> %res9, i32 1
+   %t9 = extractelement <4 x float> %res9, i32 3
+   %e9 = fadd float %t8, %t9
+   %t10 = extractelement <4 x float> %res10, i32 2
+   %t11 = extractelement <4 x float> %res10, i32 3
+   %e10 = fadd float %t10, %t11
+   %t12 = extractelement <4 x float> %res11, i32 0
+   %t13 = extractelement <4 x float> %res11, i32 1
+   %t14 = extractelement <4 x float> %res11, i32 2
+   %t15 = fadd float %t12, %t13
+   %e11 = fadd float %t14, %t15
+   %t16 = extractelement <4 x float> %res12, i32 0
+   %t17 = extractelement <4 x float> %res12, i32 1
+   %t18 = extractelement <4 x float> %res12, i32 3
+   %t19 = fadd float %t16, %t17
+   %e12 = fadd float %t18, %t19
+   %t20 = extractelement <4 x float> %res13, i32 0
+   %t21 = extractelement <4 x float> %res13, i32 2
+   %t22 = extractelement <4 x float> %res13, i32 3
+   %t23 = fadd float %t20, %t21
+   %e13 = fadd float %t22, %t23
+   %t24 = extractelement <4 x float> %res14, i32 1
+   %t25 = extractelement <4 x float> %res14, i32 2
+   %t26 = extractelement <4 x float> %res14, i32 3
+   %t27 = fadd float %t24, %t25
+   %e14 = fadd float %t26, %t27
+   %t28 = extractelement <4 x float> %res15, i32 0
+   %t29 = extractelement <4 x float> %res15, i32 1
+   %t30 = extractelement <4 x float> %res15, i32 2
+   %t31 = extractelement <4 x float> %res15, i32 3
+   %t32 = fadd float %t28, %t29
+   %t33 = fadd float %t30, %t31
+   %e15 = fadd float %t32, %t33
+   %e16 = extractelement <4 x float> %res16, i32 3
    %s1 = fadd float %e1, %e2
    %s2 = fadd float %s1, %e3
    %s3 = fadd float %s2, %e4
@@ -101,6 +135,6 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    ret void
 }
 
-declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
+declare <4 x float> @llvm.SI.sample.(<4 x i32>, <8 x i32>, <4 x i32>, i32) readnone
 
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
index dc120bf..9b28167 100644
--- a/test/CodeGen/R600/llvm.cos.ll
+++ b/test/CodeGen/R600/llvm.cos.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: COS T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: COS * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test() {
    %r0 = call float @llvm.R600.load.input(i32 0)
diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll
index b4ce9f4..91b77428 100644
--- a/test/CodeGen/R600/llvm.pow.ll
+++ b/test/CodeGen/R600/llvm.pow.ll
@@ -1,8 +1,8 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: LOG_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;CHECK-NEXT: EXP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK-NEXT: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test() {
    %r0 = call float @llvm.R600.load.input(i32 0)
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
index 5cd6998..803dc2d 100644
--- a/test/CodeGen/R600/llvm.sin.ll
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: SIN T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: SIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test() {
    %r0 = call float @llvm.R600.load.input(i32 0)
diff --git a/test/CodeGen/R600/load.constant_addrspace.f32.ll b/test/CodeGen/R600/load.constant_addrspace.f32.ll
deleted file mode 100644
index 9362728..0000000
--- a/test/CodeGen/R600/load.constant_addrspace.f32.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}}
-
-define void @test(float addrspace(1)* %out, float addrspace(2)* %in) {
-  %1 = load float addrspace(2)* %in
-  store float %1, float addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/load.i8.ll b/test/CodeGen/R600/load.i8.ll
deleted file mode 100644
index b070dcd..0000000
--- a/test/CodeGen/R600/load.i8.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-
-define void @test(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
-  %1 = load i8 addrspace(1)* %in
-  %2 = zext i8 %1 to i32
-  store i32 %2, i32 addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
new file mode 100644
index 0000000..b03245a
--- /dev/null
+++ b/test/CodeGen/R600/load.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; Load an i8 value from the global address space.
+; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
+  %1 = load i8 addrspace(1)* %in
+  %2 = zext i8 %1 to i32
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; Load a f32 value from the constant address space.
+; CHECK: VTX_READ_32 T{{[0-9]+\.X, T[0-9]+\.X}}
+
+define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
+  %1 = load float addrspace(2)* %in
+  store float %1, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll
new file mode 100644
index 0000000..8a5458b
--- /dev/null
+++ b/test/CodeGen/R600/loop-address.ll
@@ -0,0 +1,41 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: TEX
+;CHECK: ALU_PUSH
+;CHECK: JUMP @4
+;CHECK: ELSE @16
+;CHECK: TEX
+;CHECK: LOOP_START_DX10 @15
+;CHECK: LOOP_BREAK @14
+;CHECK: POP @16
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
+target triple = "r600--"
+
+define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) #0 {
+entry:
+  %cmp5 = icmp sgt i32 %iterations, 0
+  br i1 %cmp5, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ]
+  %ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %i.07 = add nsw i32 %i.07.in, -1
+  %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06
+  store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4
+  %add = add nsw i32 %ai.06, 1
+  %exitcond = icmp eq i32 %add, %iterations
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
+
+!opencl.kernels = !{!0, !1, !2, !3}
+
+!0 = metadata !{void (i32 addrspace(1)*, i32)* @loop_ge}
+!1 = metadata !{null}
+!2 = metadata !{null}
+!3 = metadata !{null}
diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll
index 423adb9..fb698da 100644
--- a/test/CodeGen/R600/lshl.ll
+++ b/test/CodeGen/R600/lshl.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
 
 ;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0
 
diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll
index 551eac1..e0ed3ac 100644
--- a/test/CodeGen/R600/lshr.ll
+++ b/test/CodeGen/R600/lshr.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
 
 ;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0
 
diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll
new file mode 100644
index 0000000..7278e90
--- /dev/null
+++ b/test/CodeGen/R600/mul.ll
@@ -0,0 +1,16 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; mul24 and mad24 are affected
+;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: MULLO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %result = mul <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll
index 28744e0..bc17a59 100644
--- a/test/CodeGen/R600/mulhu.ll
+++ b/test/CodeGen/R600/mulhu.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
 
 ;CHECK: V_MOV_B32_e32 VGPR1, -1431655765
 ;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll
new file mode 100644
index 0000000..b0dbb02
--- /dev/null
+++ b/test/CodeGen/R600/or.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @or_v4i32
+; CHECK: OR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: OR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: OR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: OR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) {
+  %result = or <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll
index eb8b052..0d3eeef 100644
--- a/test/CodeGen/R600/predicates.ll
+++ b/test/CodeGen/R600/predicates.ll
@@ -4,8 +4,8 @@
 ; when it is legal to do so.
 
 ; CHECK: @simple_if
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
-; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
+; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
 define void @simple_if(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = icmp sgt i32 %in, 0
@@ -22,9 +22,9 @@ ENDIF:
 }
 
 ; CHECK: @simple_if_else
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
-; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
-; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
+; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
+; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
 define void @simple_if_else(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = icmp sgt i32 %in, 0
@@ -46,11 +46,11 @@ ENDIF:
 
 ; CHECK: @nested_if
 ; CHECK: ALU_PUSH_BEFORE
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
 ; CHECK: JUMP
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
-; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
 ; CHECK: POP
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Exec
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
+; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
 define void @nested_if(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = icmp sgt i32 %in, 0
@@ -73,12 +73,12 @@ ENDIF:
 
 ; CHECK: @nested_if_else
 ; CHECK: ALU_PUSH_BEFORE
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
 ; CHECK: JUMP
-; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
-; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
-; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
 ; CHECK: POP
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Exec
+; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
+; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
+; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
 define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = icmp sgt i32 %in, 0
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
new file mode 100644
index 0000000..37c3d9d
--- /dev/null
+++ b/test/CodeGen/R600/pv.ll
@@ -0,0 +1,244 @@
+; RUN: llc < %s -march=r600 | FileCheck %s
+
+;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
+;CHECK-NEXT: CNDGE T{{[0-9].[XYZW]}}, PV.x
+
+define void @main() #0 {
+main_body:
+  %0 = call float @llvm.R600.load.input(i32 4)
+  %1 = call float @llvm.R600.load.input(i32 5)
+  %2 = call float @llvm.R600.load.input(i32 6)
+  %3 = call float @llvm.R600.load.input(i32 7)
+  %4 = call float @llvm.R600.load.input(i32 8)
+  %5 = call float @llvm.R600.load.input(i32 9)
+  %6 = call float @llvm.R600.load.input(i32 10)
+  %7 = call float @llvm.R600.load.input(i32 11)
+  %8 = call float @llvm.R600.load.input(i32 12)
+  %9 = call float @llvm.R600.load.input(i32 13)
+  %10 = call float @llvm.R600.load.input(i32 14)
+  %11 = call float @llvm.R600.load.input(i32 15)
+  %12 = call float @llvm.R600.load.input(i32 16)
+  %13 = call float @llvm.R600.load.input(i32 17)
+  %14 = call float @llvm.R600.load.input(i32 18)
+  %15 = call float @llvm.R600.load.input(i32 19)
+  %16 = call float @llvm.R600.load.input(i32 20)
+  %17 = call float @llvm.R600.load.input(i32 21)
+  %18 = call float @llvm.R600.load.input(i32 22)
+  %19 = call float @llvm.R600.load.input(i32 23)
+  %20 = call float @llvm.R600.load.input(i32 24)
+  %21 = call float @llvm.R600.load.input(i32 25)
+  %22 = call float @llvm.R600.load.input(i32 26)
+  %23 = call float @llvm.R600.load.input(i32 27)
+  %24 = call float @llvm.R600.load.input(i32 28)
+  %25 = call float @llvm.R600.load.input(i32 29)
+  %26 = call float @llvm.R600.load.input(i32 30)
+  %27 = call float @llvm.R600.load.input(i32 31)
+  %28 = load <4 x float> addrspace(8)* null
+  %29 = extractelement <4 x float> %28, i32 0
+  %30 = fmul float %0, %29
+  %31 = load <4 x float> addrspace(8)* null
+  %32 = extractelement <4 x float> %31, i32 1
+  %33 = fmul float %0, %32
+  %34 = load <4 x float> addrspace(8)* null
+  %35 = extractelement <4 x float> %34, i32 2
+  %36 = fmul float %0, %35
+  %37 = load <4 x float> addrspace(8)* null
+  %38 = extractelement <4 x float> %37, i32 3
+  %39 = fmul float %0, %38
+  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %41 = extractelement <4 x float> %40, i32 0
+  %42 = fmul float %1, %41
+  %43 = fadd float %42, %30
+  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %45 = extractelement <4 x float> %44, i32 1
+  %46 = fmul float %1, %45
+  %47 = fadd float %46, %33
+  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %49 = extractelement <4 x float> %48, i32 2
+  %50 = fmul float %1, %49
+  %51 = fadd float %50, %36
+  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %53 = extractelement <4 x float> %52, i32 3
+  %54 = fmul float %1, %53
+  %55 = fadd float %54, %39
+  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %57 = extractelement <4 x float> %56, i32 0
+  %58 = fmul float %2, %57
+  %59 = fadd float %58, %43
+  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %61 = extractelement <4 x float> %60, i32 1
+  %62 = fmul float %2, %61
+  %63 = fadd float %62, %47
+  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %65 = extractelement <4 x float> %64, i32 2
+  %66 = fmul float %2, %65
+  %67 = fadd float %66, %51
+  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %69 = extractelement <4 x float> %68, i32 3
+  %70 = fmul float %2, %69
+  %71 = fadd float %70, %55
+  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %73 = extractelement <4 x float> %72, i32 0
+  %74 = fmul float %3, %73
+  %75 = fadd float %74, %59
+  %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %77 = extractelement <4 x float> %76, i32 1
+  %78 = fmul float %3, %77
+  %79 = fadd float %78, %63
+  %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %81 = extractelement <4 x float> %80, i32 2
+  %82 = fmul float %3, %81
+  %83 = fadd float %82, %67
+  %84 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %85 = extractelement <4 x float> %84, i32 3
+  %86 = fmul float %3, %85
+  %87 = fadd float %86, %71
+  %88 = insertelement <4 x float> undef, float %4, i32 0
+  %89 = insertelement <4 x float> %88, float %5, i32 1
+  %90 = insertelement <4 x float> %89, float %6, i32 2
+  %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 3
+  %92 = insertelement <4 x float> undef, float %4, i32 0
+  %93 = insertelement <4 x float> %92, float %5, i32 1
+  %94 = insertelement <4 x float> %93, float %6, i32 2
+  %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3
+  %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95)
+  %97 = call float @fabs(float %96)
+  %98 = call float @llvm.AMDGPU.rsq(float %97)
+  %99 = fmul float %4, %98
+  %100 = fmul float %5, %98
+  %101 = fmul float %6, %98
+  %102 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %103 = extractelement <4 x float> %102, i32 0
+  %104 = fmul float %103, %8
+  %105 = fadd float %104, %20
+  %106 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %107 = extractelement <4 x float> %106, i32 1
+  %108 = fmul float %107, %9
+  %109 = fadd float %108, %21
+  %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %111 = extractelement <4 x float> %110, i32 2
+  %112 = fmul float %111, %10
+  %113 = fadd float %112, %22
+  %114 = call float @llvm.AMDIL.clamp.(float %105, float 0.000000e+00, float 1.000000e+00)
+  %115 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00)
+  %116 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00)
+  %117 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
+  %118 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %119 = extractelement <4 x float> %118, i32 0
+  %120 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %121 = extractelement <4 x float> %120, i32 1
+  %122 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %123 = extractelement <4 x float> %122, i32 2
+  %124 = insertelement <4 x float> undef, float %99, i32 0
+  %125 = insertelement <4 x float> %124, float %100, i32 1
+  %126 = insertelement <4 x float> %125, float %101, i32 2
+  %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 3
+  %128 = insertelement <4 x float> undef, float %119, i32 0
+  %129 = insertelement <4 x float> %128, float %121, i32 1
+  %130 = insertelement <4 x float> %129, float %123, i32 2
+  %131 = insertelement <4 x float> %130, float 0.000000e+00, i32 3
+  %132 = call float @llvm.AMDGPU.dp4(<4 x float> %127, <4 x float> %131)
+  %133 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %134 = extractelement <4 x float> %133, i32 0
+  %135 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %136 = extractelement <4 x float> %135, i32 1
+  %137 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %138 = extractelement <4 x float> %137, i32 2
+  %139 = insertelement <4 x float> undef, float %99, i32 0
+  %140 = insertelement <4 x float> %139, float %100, i32 1
+  %141 = insertelement <4 x float> %140, float %101, i32 2
+  %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 3
+  %143 = insertelement <4 x float> undef, float %134, i32 0
+  %144 = insertelement <4 x float> %143, float %136, i32 1
+  %145 = insertelement <4 x float> %144, float %138, i32 2
+  %146 = insertelement <4 x float> %145, float 0.000000e+00, i32 3
+  %147 = call float @llvm.AMDGPU.dp4(<4 x float> %142, <4 x float> %146)
+  %148 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %149 = extractelement <4 x float> %148, i32 0
+  %150 = fmul float %149, %8
+  %151 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %152 = extractelement <4 x float> %151, i32 1
+  %153 = fmul float %152, %9
+  %154 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %155 = extractelement <4 x float> %154, i32 2
+  %156 = fmul float %155, %10
+  %157 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %158 = extractelement <4 x float> %157, i32 0
+  %159 = fmul float %158, %12
+  %160 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %161 = extractelement <4 x float> %160, i32 1
+  %162 = fmul float %161, %13
+  %163 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %164 = extractelement <4 x float> %163, i32 2
+  %165 = fmul float %164, %14
+  %166 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %167 = extractelement <4 x float> %166, i32 0
+  %168 = fmul float %167, %16
+  %169 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %170 = extractelement <4 x float> %169, i32 1
+  %171 = fmul float %170, %17
+  %172 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %173 = extractelement <4 x float> %172, i32 2
+  %174 = fmul float %173, %18
+  %175 = fcmp uge float %132, 0.000000e+00
+  %176 = select i1 %175, float %132, float 0.000000e+00
+  %177 = fcmp uge float %147, 0.000000e+00
+  %178 = select i1 %177, float %147, float 0.000000e+00
+  %179 = call float @llvm.pow.f32(float %178, float %24)
+  %180 = fcmp ult float %132, 0.000000e+00
+  %181 = select i1 %180, float 0.000000e+00, float %179
+  %182 = fadd float %150, %105
+  %183 = fadd float %153, %109
+  %184 = fadd float %156, %113
+  %185 = fmul float %176, %159
+  %186 = fadd float %185, %182
+  %187 = fmul float %176, %162
+  %188 = fadd float %187, %183
+  %189 = fmul float %176, %165
+  %190 = fadd float %189, %184
+  %191 = fmul float %181, %168
+  %192 = fadd float %191, %186
+  %193 = fmul float %181, %171
+  %194 = fadd float %193, %188
+  %195 = fmul float %181, %174
+  %196 = fadd float %195, %190
+  %197 = call float @llvm.AMDIL.clamp.(float %192, float 0.000000e+00, float 1.000000e+00)
+  %198 = call float @llvm.AMDIL.clamp.(float %194, float 0.000000e+00, float 1.000000e+00)
+  %199 = call float @llvm.AMDIL.clamp.(float %196, float 0.000000e+00, float 1.000000e+00)
+  %200 = insertelement <4 x float> undef, float %75, i32 0
+  %201 = insertelement <4 x float> %200, float %79, i32 1
+  %202 = insertelement <4 x float> %201, float %83, i32 2
+  %203 = insertelement <4 x float> %202, float %87, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %203, i32 60, i32 1)
+  %204 = insertelement <4 x float> undef, float %197, i32 0
+  %205 = insertelement <4 x float> %204, float %198, i32 1
+  %206 = insertelement <4 x float> %205, float %199, i32 2
+  %207 = insertelement <4 x float> %206, float %117, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %207, i32 0, i32 2)
+  ret void
+}
+
+; Function Attrs: readnone
+declare float @llvm.R600.load.input(i32) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
+
+; Function Attrs: readonly
+declare float @fabs(float) #2
+
+; Function Attrs: readnone
+declare float @llvm.AMDGPU.rsq(float) #1
+
+; Function Attrs: readnone
+declare float @llvm.AMDIL.clamp.(float, float, float) #1
+
+; Function Attrs: nounwind readonly
+declare float @llvm.pow.f32(float, float) #3
+
+declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
+
+attributes #0 = { "ShaderType"="1" }
+attributes #1 = { readnone }
+attributes #2 = { readonly }
+attributes #3 = { nounwind readonly }
diff --git a/test/CodeGen/R600/r600-encoding.ll b/test/CodeGen/R600/r600-encoding.ll
new file mode 100644
index 0000000..c8040a1
--- /dev/null
+++ b/test/CodeGen/R600/r600-encoding.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rs880 | FileCheck --check-prefix=R600-CHECK %s
+
+; The earliest R600 GPUs have a slightly different encoding than the rest of
+; the VLIW4/5 GPUs.
+
+; EG-CHECK: @test
+; EG-CHECK: MUL_IEEE {{[ *TXYZW.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x01,0x[0-9a-f]+,0x[0-9a-f]+}}]
+
+; R600-CHECK: @test
+; R600-CHECK: MUL_IEEE {{[ *TXYZW.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}]
+
+define void @test() {
+entry:
+  %0 = call float @llvm.R600.load.input(i32 0)
+  %1 = call float @llvm.R600.load.input(i32 1)
+  %2 = fmul float %0, %1
+  call void @llvm.AMDGPU.store.output(float %2, i32 0)
+  ret void
+}
+
+declare float @llvm.R600.load.input(i32) readnone
+
+declare void @llvm.AMDGPU.store.output(float, i32)
diff --git a/test/CodeGen/R600/reciprocal.ll b/test/CodeGen/R600/reciprocal.ll
index 6838c1a..2783929 100644
--- a/test/CodeGen/R600/reciprocal.ll
+++ b/test/CodeGen/R600/reciprocal.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-;CHECK: RECIP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test() {
    %r0 = call float @llvm.R600.load.input(i32 0)
diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll
index 3556fac..3dd10c8 100644
--- a/test/CodeGen/R600/sdiv.ll
+++ b/test/CodeGen/R600/sdiv.ll
@@ -9,7 +9,7 @@
 ; This was fixed by adding an additional pattern in R600Instructions.td to
 ; match this pattern with a CNDGE_INT.
 
-; CHECK: RETURN
+; CHECK: CF_END
 
 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/selectcc_cnde.ll b/test/CodeGen/R600/selectcc-cnd.ll
index f0a0f51..d7287b4 100644
--- a/test/CodeGen/R600/selectcc_cnde.ll
+++ b/test/CodeGen/R600/selectcc-cnd.ll
@@ -1,7 +1,8 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ;CHECK-NOT: SETE
-;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1.0, literal.x, [-0-9]+\(2.0}}
+;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
+;CHECK-NEXT: {{[-0-9]+\(2.0}}
 define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
   %1 = load float addrspace(1)* %in
   %2 = fcmp oeq float %1, 0.0
diff --git a/test/CodeGen/R600/selectcc_cnde_int.ll b/test/CodeGen/R600/selectcc-cnde-int.ll
index b38078e..768dc7d 100644
--- a/test/CodeGen/R600/selectcc_cnde_int.ll
+++ b/test/CodeGen/R600/selectcc-cnde-int.ll
@@ -1,7 +1,8 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ;CHECK-NOT: SETE_INT
-;CHECK: CNDE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], 1, literal.x, 2}}
+;CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
+;CHECK-NEXT: 2
 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %1 = load i32 addrspace(1)* %in
   %2 = icmp eq i32 %1, 0
diff --git a/test/CodeGen/R600/selectcc-icmp-select-float.ll b/test/CodeGen/R600/selectcc-icmp-select-float.ll
index 359ca1e..6743800 100644
--- a/test/CodeGen/R600/selectcc-icmp-select-float.ll
+++ b/test/CodeGen/R600/selectcc-icmp-select-float.ll
@@ -2,7 +2,8 @@
 
 ; Note additional optimizations may cause this SGT to be replaced with a
 ; CND* instruction.
-; CHECK: SETGT_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal.x, -1}}
+; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, literal.x,
+; CHECK-NEXT: -1
 ; Test a selectcc with i32 LHS/RHS and float True/False
 
 define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
index 54febcf..eb6e9d2 100644
--- a/test/CodeGen/R600/set-dx10.ll
+++ b/test/CodeGen/R600/set-dx10.ll
@@ -5,7 +5,8 @@
 ; SET*DX10 instructions.
 
 ; CHECK: @fcmp_une_select_fptosi
-; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp une float %in, 5.0
@@ -17,7 +18,8 @@ entry:
 }
 
 ; CHECK: @fcmp_une_select_i32
-; CHECK: SETNE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp une float %in, 5.0
@@ -27,7 +29,8 @@ entry:
 }
 
 ; CHECK: @fcmp_ueq_select_fptosi
-; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ueq float %in, 5.0
@@ -39,7 +42,8 @@ entry:
 }
 
 ; CHECK: @fcmp_ueq_select_i32
-; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ueq float %in, 5.0
@@ -49,7 +53,8 @@ entry:
 }
 
 ; CHECK: @fcmp_ugt_select_fptosi
-; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ugt float %in, 5.0
@@ -61,7 +66,8 @@ entry:
 }
 
 ; CHECK: @fcmp_ugt_select_i32
-; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ugt float %in, 5.0
@@ -71,7 +77,8 @@ entry:
 }
 
 ; CHECK: @fcmp_uge_select_fptosi
-; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp uge float %in, 5.0
@@ -83,7 +90,8 @@ entry:
 }
 
 ; CHECK: @fcmp_uge_select_i32
-; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x, 1084227584(5.000000e+00)
+; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp uge float %in, 5.0
@@ -93,7 +101,8 @@ entry:
 }
 
 ; CHECK: @fcmp_ule_select_fptosi
-; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ule float %in, 5.0
@@ -105,7 +114,8 @@ entry:
 }
 
 ; CHECK: @fcmp_ule_select_i32
-; CHECK: SETGE_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ule float %in, 5.0
@@ -115,7 +125,8 @@ entry:
 }
 
 ; CHECK: @fcmp_ult_select_fptosi
-; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ult float %in, 5.0
@@ -127,7 +138,8 @@ entry:
 }
 
 ; CHECK: @fcmp_ult_select_i32
-; CHECK: SETGT_DX10 T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ult float %in, 5.0
diff --git a/test/CodeGen/R600/setcc.v4i32.ll b/test/CodeGen/R600/setcc.ll
index 0752f2e..0752f2e 100644
--- a/test/CodeGen/R600/setcc.v4i32.ll
+++ b/test/CodeGen/R600/setcc.ll
diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
index 5ab4b87..4622203 100644
--- a/test/CodeGen/R600/seto.ll
+++ b/test/CodeGen/R600/seto.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
 
 ;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
 
diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
index 3208355..0bf5801 100644
--- a/test/CodeGen/R600/setuo.ll
+++ b/test/CodeGen/R600/setuo.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
 
 ;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0
 
diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll
new file mode 100644
index 0000000..43cc1e2
--- /dev/null
+++ b/test/CodeGen/R600/shl.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @shl_v4i32
+; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) {
+  %result = shl <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll
new file mode 100644
index 0000000..91a8eb7
--- /dev/null
+++ b/test/CodeGen/R600/sint_to_fp.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @sint_to_fp_v4i32
+; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %value = load <4 x i32> addrspace(1) * %in
+  %result = sitofp <4 x i32> %value to <4 x float>
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
new file mode 100644
index 0000000..972542d
--- /dev/null
+++ b/test/CodeGen/R600/sra.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @ashr_v4i32
+; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: ASHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) {
+  %result = ashr <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll
new file mode 100644
index 0000000..5f63600
--- /dev/null
+++ b/test/CodeGen/R600/srl.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @lshr_v4i32
+; CHECK: LSHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: LSHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: LSHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: LSHR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) {
+  %result = lshr <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
new file mode 100644
index 0000000..4d673f3
--- /dev/null
+++ b/test/CodeGen/R600/store.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
+
+; floating-point store
+; EG-CHECK: @store_f32
+; EG-CHECK: RAT_WRITE_CACHELESS_32_eg T{{[0-9]+\.X, T[0-9]+\.X}}, 1
+; SI-CHECK: @store_f32
+; SI-CHECK: BUFFER_STORE_DWORD
+
+define void @store_f32(float addrspace(1)* %out, float %in) {
+  store float %in, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/store.r600.ll b/test/CodeGen/R600/store.r600.ll
new file mode 100644
index 0000000..5ffb7f1
--- /dev/null
+++ b/test/CodeGen/R600/store.r600.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
+
+; XXX: Merge this test into store.ll once it is supported on SI
+
+; v4i32 store
+; EG-CHECK: @store_v4i32
+; EG-CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+
+define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %1 = load <4 x i32> addrspace(1) * %in
+  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; v4f32 store
+; EG-CHECK: @store_v4f32
+; EG-CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
+define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+  %1 = load <4 x float> addrspace(1) * %in
+  store <4 x float> %1, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/store.v4f32.ll b/test/CodeGen/R600/store.v4f32.ll
deleted file mode 100644
index 8b0d244..0000000
--- a/test/CodeGen/R600/store.v4f32.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
-
-define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %1 = load <4 x float> addrspace(1) * %in
-  store <4 x float> %1, <4 x float> addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/store.v4i32.ll b/test/CodeGen/R600/store.v4i32.ll
deleted file mode 100644
index a659815..0000000
--- a/test/CodeGen/R600/store.v4i32.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-;CHECK: RAT_WRITE_CACHELESS_128 T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
-
-define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %1 = load <4 x i32> addrspace(1) * %in
-  store <4 x i32> %1, <4 x i32> addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll
new file mode 100644
index 0000000..12bfba3
--- /dev/null
+++ b/test/CodeGen/R600/sub.ll
@@ -0,0 +1,15 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+;CHECK: SUB_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+;CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
+  %a = load <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %result = sub <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/udiv.v4i32.ll b/test/CodeGen/R600/udiv.ll
index 47657a6..b81e366 100644
--- a/test/CodeGen/R600/udiv.v4i32.ll
+++ b/test/CodeGen/R600/udiv.ll
@@ -3,7 +3,7 @@
 ;The code generated by udiv is long and complex and may frequently change.
 ;The goal of this test is to make sure the ISel doesn't fail when it gets
 ;a v4i32 udiv
-;CHECK: RETURN
+;CHECK: CF_END
 
 define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll
new file mode 100644
index 0000000..9054fc4
--- /dev/null
+++ b/test/CodeGen/R600/uint_to_fp.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @uint_to_fp_v4i32
+; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+  %value = load <4 x i32> addrspace(1) * %in
+  %result = uitofp <4 x i32> %value to <4 x float>
+  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll
index b48c591..b311f4c 100644
--- a/test/CodeGen/R600/unsupported-cc.ll
+++ b/test/CodeGen/R600/unsupported-cc.ll
@@ -3,7 +3,8 @@
 ; These tests are for condition codes that are not supported by the hardware
 
 ; CHECK: @slt
-; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45)
+; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-NEXT: 5(7.006492e-45)
 define void @slt(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = icmp slt i32 %in, 5
@@ -13,7 +14,8 @@ entry:
 }
 
 ; CHECK: @ult_i32
-; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 5(7.006492e-45)
+; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-NEXT: 5(7.006492e-45)
 define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = icmp ult i32 %in, 5
@@ -23,7 +25,8 @@ entry:
 }
 
 ; CHECK: @ult_float
-; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @ult_float(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ult float %in, 5.0
@@ -33,7 +36,8 @@ entry:
 }
 
 ; CHECK: @olt
-; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+;CHECK-NEXT: 1084227584(5.000000e+00)
 define void @olt(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp olt float %in, 5.0
@@ -43,7 +47,8 @@ entry:
 }
 
 ; CHECK: @sle
-; CHECK: SETGT_INT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45)
+; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-NEXT: 6(8.407791e-45)
 define void @sle(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = icmp sle i32 %in, 5
@@ -53,7 +58,8 @@ entry:
 }
 
 ; CHECK: @ule_i32
-; CHECK: SETGT_UINT T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 6(8.407791e-45)
+; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-NEXT: 6(8.407791e-45)
 define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = icmp ule i32 %in, 5
@@ -63,7 +69,8 @@ entry:
 }
 
 ; CHECK: @ule_float
-; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @ule_float(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ule float %in, 5.0
@@ -73,7 +80,8 @@ entry:
 }
 
 ; CHECK: @ole
-; CHECK: SETGE T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}}, 1084227584(5.000000e+00)
+; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
+; CHECK-NEXT:1084227584(5.000000e+00)
 define void @ole(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ole float %in, 5.0
diff --git a/test/CodeGen/R600/urecip.ll b/test/CodeGen/R600/urecip.ll
new file mode 100644
index 0000000..dad02dd
--- /dev/null
+++ b/test/CodeGen/R600/urecip.ll
@@ -0,0 +1,12 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: V_RCP_IFLAG_F32_e32
+
+define void @test(i32 %p, i32 %q) {
+   %i = udiv i32 %p, %q
+   %r = bitcast i32 %i to float
+   call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r)
+   ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/urem.v4i32.ll b/test/CodeGen/R600/urem.ll
index 2e7388c..a2cc0bd 100644
--- a/test/CodeGen/R600/urem.v4i32.ll
+++ b/test/CodeGen/R600/urem.ll
@@ -3,7 +3,7 @@
 ;The code generated by urem is long and complex and may frequently change.
 ;The goal of this test is to make sure the ISel doesn't fail when it gets
 ;a v4i32 urem
-;CHECK: RETURN
+;CHECK: CF_END
 
 define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/vec4-expand.ll b/test/CodeGen/R600/vec4-expand.ll
deleted file mode 100644
index 8f62bc6..0000000
--- a/test/CodeGen/R600/vec4-expand.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
-
-; CHECK: @fp_to_sint
-; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @fp_to_sint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %value = load <4 x float> addrspace(1) * %in
-  %result = fptosi <4 x float> %value to <4 x i32>
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; CHECK: @fp_to_uint
-; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: FLT_TO_UINT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @fp_to_uint(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %value = load <4 x float> addrspace(1) * %in
-  %result = fptoui <4 x float> %value to <4 x i32>
-  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
-  ret void
-}
-
-; CHECK: @sint_to_fp
-; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: INT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @sint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %value = load <4 x i32> addrspace(1) * %in
-  %result = sitofp <4 x i32> %value to <4 x float>
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
-  ret void
-}
-
-; CHECK: @uint_to_fp
-; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; CHECK: UINT_TO_FLT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-define void @uint_to_fp(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %value = load <4 x i32> addrspace(1) * %in
-  %result = uitofp <4 x i32> %value to <4 x float>
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll
new file mode 100644
index 0000000..6e459df
--- /dev/null
+++ b/test/CodeGen/R600/vselect.ll
@@ -0,0 +1,17 @@
+;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @test_select_v4i32
+; CHECK: CNDE_INT T{{[0-9]+\.[XYZW], PV\.[xyzw], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], PV\.[xyzw], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
+entry:
+  %0 = load <4 x i32> addrspace(1)* %in0
+  %1 = load <4 x i32> addrspace(1)* %in1
+  %cmp = icmp ne <4 x i32> %0, %1
+  %result = select <4 x i1> %cmp, <4 x i32> %0, <4 x i32> %1
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll
new file mode 100644
index 0000000..cf612e0
--- /dev/null
+++ b/test/CodeGen/R600/xor.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
+
+; CHECK: @xor_v4i32
+; CHECK: XOR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: XOR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: XOR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; CHECK: XOR_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) {
+  %result = xor <4 x i32> %a, %b
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/SPARC/64abi.ll b/test/CodeGen/SPARC/64abi.ll
new file mode 100644
index 0000000..ec97135
--- /dev/null
+++ b/test/CodeGen/SPARC/64abi.ll
@@ -0,0 +1,378 @@
+; RUN: llc < %s -march=sparcv9 -disable-sparc-delay-filler | FileCheck %s
+
+; CHECK: intarg
+; The save/restore frame is not strictly necessary here, but we would need to
+; refer to %o registers instead.
+; CHECK: save %sp, -128, %sp
+; CHECK: stb %i0, [%i4]
+; CHECK: stb %i1, [%i4]
+; CHECK: sth %i2, [%i4]
+; CHECK: st  %i3, [%i4]
+; CHECK: stx %i4, [%i4]
+; CHECK: st  %i5, [%i4]
+; CHECK: ld [%fp+2227], [[R:%[gilo][0-7]]]
+; CHECK: st  [[R]], [%i4]
+; CHECK: ldx [%fp+2231], [[R:%[gilo][0-7]]]
+; CHECK: stx [[R]], [%i4]
+; CHECK: restore
+define void @intarg(i8  %a0,   ; %i0
+                    i8  %a1,   ; %i1
+                    i16 %a2,   ; %i2
+                    i32 %a3,   ; %i3
+                    i8* %a4,   ; %i4
+                    i32 %a5,   ; %i5
+                    i32 signext %a6,   ; [%fp+BIAS+176]
+                    i8* %a7) { ; [%fp+BIAS+184]
+  store i8 %a0, i8* %a4
+  store i8 %a1, i8* %a4
+  %p16 = bitcast i8* %a4 to i16*
+  store i16 %a2, i16* %p16
+  %p32 = bitcast i8* %a4 to i32*
+  store i32 %a3, i32* %p32
+  %pp = bitcast i8* %a4 to i8**
+  store i8* %a4, i8** %pp
+  store i32 %a5, i32* %p32
+  store i32 %a6, i32* %p32
+  store i8* %a7, i8** %pp
+  ret void
+}
+
+; CHECK: call_intarg
+; 16 saved + 8 args.
+; CHECK: save %sp, -192, %sp
+; Sign-extend and store the full 64 bits.
+; CHECK: sra %i0, 0, [[R:%[gilo][0-7]]]
+; CHECK: stx [[R]], [%sp+2223]
+; Use %o0-%o5 for outgoing arguments
+; CHECK: or %g0, 5, %o5
+; CHECK: call intarg
+; CHECK-NOT: add %sp
+; CHECK: restore
+define void @call_intarg(i32 %i0, i8* %i1) {
+  call void @intarg(i8 0, i8 1, i16 2, i32 3, i8* undef, i32 5, i32 %i0, i8* %i1)
+  ret void
+}
+
+; CHECK: floatarg
+; CHECK: save %sp, -128, %sp
+; CHECK: fstod %f1,
+; CHECK: faddd %f2,
+; CHECK: faddd %f4,
+; CHECK: faddd %f6,
+; CHECK: ld [%fp+2307], [[F:%f[0-9]+]]
+; CHECK: fadds %f31, [[F]]
+define double @floatarg(float %a0,    ; %f1
+                        double %a1,   ; %d2
+                        double %a2,   ; %d4
+                        double %a3,   ; %d6
+                        float %a4,    ; %f9
+                        float %a5,    ; %f11
+                        float %a6,    ; %f13
+                        float %a7,    ; %f15
+                        float %a8,    ; %f17
+                        float %a9,    ; %f19
+                        float %a10,   ; %f21
+                        float %a11,   ; %f23
+                        float %a12,   ; %f25
+                        float %a13,   ; %f27
+                        float %a14,   ; %f29
+                        float %a15,   ; %f31
+                        float %a16,   ; [%fp+BIAS+256] (using 8 bytes)
+                        double %a17) { ; [%fp+BIAS+264] (using 8 bytes)
+  %d0 = fpext float %a0 to double
+  %s1 = fadd double %a1, %d0
+  %s2 = fadd double %a2, %s1
+  %s3 = fadd double %a3, %s2
+  %s16 = fadd float %a15, %a16
+  %d16 = fpext float %s16 to double
+  %s17 = fadd double %d16, %s3
+  ret double %s17
+}
+
+; CHECK: call_floatarg
+; CHECK: save %sp, -272, %sp
+; Store 4 bytes, right-aligned in slot.
+; CHECK: st %f1, [%sp+2307]
+; Store 8 bytes in full slot.
+; CHECK: std %f2, [%sp+2311]
+; CHECK: fmovd %f2, %f4
+; CHECK: call floatarg
+; CHECK-NOT: add %sp
+; CHECK: restore
+define void @call_floatarg(float %f1, double %d2, float %f5, double *%p) {
+  %r = call double @floatarg(float %f5, double %d2, double %d2, double %d2,
+                             float %f5, float %f5,  float %f5,  float %f5,
+                             float %f5, float %f5,  float %f5,  float %f5,
+                             float %f5, float %f5,  float %f5,  float %f5,
+                             float %f1, double %d2)
+  store double %r, double* %p
+  ret void
+}
+
+; CHECK: mixedarg
+; CHECK: fstod %f3
+; CHECK: faddd %f6
+; CHECK: faddd %f16
+; CHECK: ldx [%fp+2231]
+; CHECK: ldx [%fp+2247]
+define void @mixedarg(i8 %a0,      ; %i0
+                      float %a1,   ; %f3
+                      i16 %a2,     ; %i2
+                      double %a3,  ; %d6
+                      i13 %a4,     ; %i4
+                      float %a5,   ; %f11
+                      i64 %a6,     ; [%fp+BIAS+176]
+                      double *%a7, ; [%fp+BIAS+184]
+                      double %a8,  ; %d16
+                      i16* %a9) {  ; [%fp+BIAS+200]
+  %d1 = fpext float %a1 to double
+  %s3 = fadd double %a3, %d1
+  %s8 = fadd double %a8, %s3
+  store double %s8, double* %a7
+  store i16 %a2, i16* %a9
+  ret void
+}
+
+; CHECK: call_mixedarg
+; CHECK: stx %i2, [%sp+2247]
+; CHECK: stx %i0, [%sp+2223]
+; CHECK: fmovd %f2, %f6
+; CHECK: fmovd %f2, %f16
+; CHECK: call mixedarg
+; CHECK-NOT: add %sp
+; CHECK: restore
+define void @call_mixedarg(i64 %i0, double %f2, i16* %i2) {
+  call void @mixedarg(i8 undef,
+                      float undef,
+                      i16 undef,
+                      double %f2,
+                      i13 undef,
+                      float undef,
+                      i64 %i0,
+                      double* undef,
+                      double %f2,
+                      i16* %i2)
+  ret void
+}
+
+; The inreg attribute is used to indicate 32-bit sized struct elements that
+; share an 8-byte slot.
+; CHECK: inreg_fi
+; CHECK: fstoi %f1
+; CHECK: srlx %i0, 32, [[R:%[gilo][0-7]]]
+; CHECK: sub [[R]],
+define i32 @inreg_fi(i32 inreg %a0,     ; high bits of %i0
+                     float inreg %a1) { ; %f1
+  %b1 = fptosi float %a1 to i32
+  %rv = sub i32 %a0, %b1
+  ret i32 %rv
+}
+
+; CHECK: call_inreg_fi
+; Allocate space for 6 arguments, even when only 2 are used.
+; CHECK: save %sp, -176, %sp
+; CHECK: sllx %i1, 32, %o0
+; CHECK: fmovs %f5, %f1
+; CHECK: call inreg_fi
+define void @call_inreg_fi(i32* %p, i32 %i1, float %f5) {
+  %x = call i32 @inreg_fi(i32 %i1, float %f5)
+  ret void
+}
+
+; CHECK: inreg_ff
+; CHECK: fsubs %f0, %f1, %f1
+define float @inreg_ff(float inreg %a0,   ; %f0
+                       float inreg %a1) { ; %f1
+  %rv = fsub float %a0, %a1
+  ret float %rv
+}
+
+; CHECK: call_inreg_ff
+; CHECK: fmovs %f3, %f0
+; CHECK: fmovs %f5, %f1
+; CHECK: call inreg_ff
+define void @call_inreg_ff(i32* %p, float %f3, float %f5) {
+  %x = call float @inreg_ff(float %f3, float %f5)
+  ret void
+}
+
+; CHECK: inreg_if
+; CHECK: fstoi %f0
+; CHECK: sub %i0
+define i32 @inreg_if(float inreg %a0, ; %f0
+                     i32 inreg %a1) { ; low bits of %i0
+  %b0 = fptosi float %a0 to i32
+  %rv = sub i32 %a1, %b0
+  ret i32 %rv
+}
+
+; CHECK: call_inreg_if
+; CHECK: fmovs %f3, %f0
+; CHECK: or %g0, %i2, %o0
+; CHECK: call inreg_if
+define void @call_inreg_if(i32* %p, float %f3, i32 %i2) {
+  %x = call i32 @inreg_if(float %f3, i32 %i2)
+  ret void
+}
+
+; The frontend shouldn't do this. Just pass i64 instead.
+; CHECK: inreg_ii
+; CHECK: srlx %i0, 32, [[R:%[gilo][0-7]]]
+; CHECK: sub %i0, [[R]], %i0
+define i32 @inreg_ii(i32 inreg %a0,   ; high bits of %i0
+                     i32 inreg %a1) { ; low bits of %i0
+  %rv = sub i32 %a1, %a0
+  ret i32 %rv
+}
+
+; CHECK: call_inreg_ii
+; CHECK: srl %i2, 0, [[R2:%[gilo][0-7]]]
+; CHECK: sllx %i1, 32, [[R1:%[gilo][0-7]]]
+; CHECK: or [[R1]], [[R2]], %o0
+; CHECK: call inreg_ii
+define void @call_inreg_ii(i32* %p, i32 %i1, i32 %i2) {
+  %x = call i32 @inreg_ii(i32 %i1, i32 %i2)
+  ret void
+}
+
+; Structs up to 32 bytes in size can be returned in registers.
+; CHECK: ret_i64_pair
+; CHECK: ldx [%i2], %i0
+; CHECK: ldx [%i3], %i1
+define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) {
+  %r1 = load i64* %p
+  %rv1 = insertvalue { i64, i64 } undef, i64 %r1, 0
+  store i64 0, i64* %p
+  %r2 = load i64* %q
+  %rv2 = insertvalue { i64, i64 } %rv1, i64 %r2, 1
+  ret { i64, i64 } %rv2
+}
+
+; CHECK: call_ret_i64_pair
+; CHECK: call ret_i64_pair
+; CHECK: stx %o0, [%i0]
+; CHECK: stx %o1, [%i0]
+define void @call_ret_i64_pair(i64* %i0) {
+  %rv = call { i64, i64 } @ret_i64_pair(i32 undef, i32 undef,
+                                        i64* undef, i64* undef)
+  %e0 = extractvalue { i64, i64 } %rv, 0
+  store i64 %e0, i64* %i0
+  %e1 = extractvalue { i64, i64 } %rv, 1
+  store i64 %e1, i64* %i0
+  ret void
+}
+
+; This is not a C struct, each member uses 8 bytes.
+; CHECK: ret_i32_float_pair
+; CHECK: ld [%i2], %i0
+; CHECK: ld [%i3], %f3
+define { i32, float } @ret_i32_float_pair(i32 %a0, i32 %a1,
+                                          i32* %p, float* %q) {
+  %r1 = load i32* %p
+  %rv1 = insertvalue { i32, float } undef, i32 %r1, 0
+  store i32 0, i32* %p
+  %r2 = load float* %q
+  %rv2 = insertvalue { i32, float } %rv1, float %r2, 1
+  ret { i32, float } %rv2
+}
+
+; CHECK: call_ret_i32_float_pair
+; CHECK: call ret_i32_float_pair
+; CHECK: st %o0, [%i0]
+; CHECK: st %f3, [%i1]
+define void @call_ret_i32_float_pair(i32* %i0, float* %i1) {
+  %rv = call { i32, float } @ret_i32_float_pair(i32 undef, i32 undef,
+                                                i32* undef, float* undef)
+  %e0 = extractvalue { i32, float } %rv, 0
+  store i32 %e0, i32* %i0
+  %e1 = extractvalue { i32, float } %rv, 1
+  store float %e1, float* %i1
+  ret void
+}
+
+; This is a C struct, each member uses 4 bytes.
+; CHECK: ret_i32_float_packed
+; CHECK: ld [%i2], [[R:%[gilo][0-7]]]
+; CHECK: sllx [[R]], 32, %i0
+; CHECK: ld [%i3], %f1
+define inreg { i32, float } @ret_i32_float_packed(i32 %a0, i32 %a1,
+                                                  i32* %p, float* %q) {
+  %r1 = load i32* %p
+  %rv1 = insertvalue { i32, float } undef, i32 %r1, 0
+  store i32 0, i32* %p
+  %r2 = load float* %q
+  %rv2 = insertvalue { i32, float } %rv1, float %r2, 1
+  ret { i32, float } %rv2
+}
+
+; CHECK: call_ret_i32_float_packed
+; CHECK: call ret_i32_float_packed
+; CHECK: srlx %o0, 32, [[R:%[gilo][0-7]]]
+; CHECK: st [[R]], [%i0]
+; CHECK: st %f1, [%i1]
+define void @call_ret_i32_float_packed(i32* %i0, float* %i1) {
+  %rv = call { i32, float } @ret_i32_float_packed(i32 undef, i32 undef,
+                                                  i32* undef, float* undef)
+  %e0 = extractvalue { i32, float } %rv, 0
+  store i32 %e0, i32* %i0
+  %e1 = extractvalue { i32, float } %rv, 1
+  store float %e1, float* %i1
+  ret void
+}
+
+; The C frontend should use i64 to return { i32, i32 } structs, but verify that
+; we don't miscompile thi case where both struct elements are placed in %i0.
+; CHECK: ret_i32_packed
+; CHECK: ld [%i2], [[R1:%[gilo][0-7]]]
+; CHECK: ld [%i3], [[R2:%[gilo][0-7]]]
+; CHECK: sllx [[R2]], 32, [[R3:%[gilo][0-7]]]
+; CHECK: or [[R3]], [[R1]], %i0
+define inreg { i32, i32 } @ret_i32_packed(i32 %a0, i32 %a1,
+                                          i32* %p, i32* %q) {
+  %r1 = load i32* %p
+  %rv1 = insertvalue { i32, i32 } undef, i32 %r1, 1
+  store i32 0, i32* %p
+  %r2 = load i32* %q
+  %rv2 = insertvalue { i32, i32 } %rv1, i32 %r2, 0
+  ret { i32, i32 } %rv2
+}
+
+; CHECK: call_ret_i32_packed
+; CHECK: call ret_i32_packed
+; CHECK: srlx %o0, 32, [[R:%[gilo][0-7]]]
+; CHECK: st [[R]], [%i0]
+; CHECK: st %o0, [%i1]
+define void @call_ret_i32_packed(i32* %i0, i32* %i1) {
+  %rv = call { i32, i32 } @ret_i32_packed(i32 undef, i32 undef,
+                                          i32* undef, i32* undef)
+  %e0 = extractvalue { i32, i32 } %rv, 0
+  store i32 %e0, i32* %i0
+  %e1 = extractvalue { i32, i32 } %rv, 1
+  store i32 %e1, i32* %i1
+  ret void
+}
+
+; The return value must be sign-extended to 64 bits.
+; CHECK: ret_sext
+; CHECK: sra %i0, 0, %i0
+define signext i32 @ret_sext(i32 %a0) {
+  ret i32 %a0
+}
+
+; CHECK: ret_zext
+; CHECK: srl %i0, 0, %i0
+define zeroext i32 @ret_zext(i32 %a0) {
+  ret i32 %a0
+}
+
+; CHECK: ret_nosext
+; CHECK-NOT: sra
+define signext i32 @ret_nosext(i32 signext %a0) {
+  ret i32 %a0
+}
+
+; CHECK: ret_nozext
+; CHECK-NOT: srl
+define signext i32 @ret_nozext(i32 signext %a0) {
+  ret i32 %a0
+}
diff --git a/test/CodeGen/SPARC/64bit.ll b/test/CodeGen/SPARC/64bit.ll
index 0d4e191..2bbf7de 100644
--- a/test/CodeGen/SPARC/64bit.ll
+++ b/test/CodeGen/SPARC/64bit.ll
@@ -66,6 +66,12 @@ define i64 @ret_bigimm() {
   ret i64 6800754272627607872
 }
 
+; CHECK: ret_bigimm2
+; CHECK: sethi 1048576
+define i64 @ret_bigimm2() {
+  ret i64 4611686018427387904 ; 0x4000000000000000
+}
+
 ; CHECK: reg_reg_alu
 ; CHECK: add %i0, %i1, [[R0:%[goli][0-7]]]
 ; CHECK: sub [[R0]], %i2, [[R1:%[goli][0-7]]]
@@ -144,3 +150,34 @@ define void @stores(i64* %p, i32* %q, i16* %r, i8* %s) {
 
   ret void
 }
+
+; CHECK: promote_shifts
+; CHECK: ldub [%i0], [[R:%[goli][0-7]]]
+; CHECK: sll [[R]], [[R]], %i0
+define i8 @promote_shifts(i8* %p) {
+  %L24 = load i8* %p
+  %L32 = load i8* %p
+  %B36 = shl i8 %L24, %L32
+  ret i8 %B36
+}
+
+; CHECK: multiply
+; CHECK: mulx %i0, %i1, %i0
+define i64 @multiply(i64 %a, i64 %b) {
+  %r = mul i64 %a, %b
+  ret i64 %r
+}
+
+; CHECK: signed_divide
+; CHECK: sdivx %i0, %i1, %i0
+define i64 @signed_divide(i64 %a, i64 %b) {
+  %r = sdiv i64 %a, %b
+  ret i64 %r
+}
+
+; CHECK: unsigned_divide
+; CHECK: udivx %i0, %i1, %i0
+define i64 @unsigned_divide(i64 %a, i64 %b) {
+  %r = udiv i64 %a, %b
+  ret i64 %r
+}
diff --git a/test/CodeGen/SPARC/constpool.ll b/test/CodeGen/SPARC/constpool.ll
new file mode 100644
index 0000000..d93a53b
--- /dev/null
+++ b/test/CodeGen/SPARC/constpool.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=sparc   -relocation-model=static -code-model=small  | FileCheck --check-prefix=abs32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=small  | FileCheck --check-prefix=abs32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=medium | FileCheck --check-prefix=abs44 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=large  | FileCheck --check-prefix=abs64 %s
+; RUN: llc < %s -march=sparc   -relocation-model=pic    -code-model=medium | FileCheck --check-prefix=v8pic32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=pic    -code-model=medium | FileCheck --check-prefix=v9pic32 %s
+
+define float @floatCP() {
+entry:
+  ret float 1.000000e+00
+}
+
+; abs32: floatCP
+; abs32: sethi %hi(.LCPI0_0), %[[R:[gilo][0-7]]]
+; abs32: ld [%[[R]]+%lo(.LCPI0_0)], %f
+; abs32: jmp %i7+8
+
+; abs44: floatCP
+; abs44: sethi %h44(.LCPI0_0), %[[R1:[gilo][0-7]]]
+; abs44: add %[[R1]], %m44(.LCPI0_0), %[[R2:[gilo][0-7]]]
+; abs44: sllx %[[R2]], 12, %[[R3:[gilo][0-7]]]
+; abs44: ld [%[[R3]]+%l44(.LCPI0_0)], %f1
+; abs44: jmp %i7+8
+
+; abs64: floatCP
+; abs64: sethi %hi(.LCPI0_0), %[[R1:[gilo][0-7]]]
+; abs64: add %[[R1]], %lo(.LCPI0_0), %[[R2:[gilo][0-7]]]
+; abs64: sethi %hh(.LCPI0_0), %[[R3:[gilo][0-7]]]
+; abs64: add %[[R3]], %hm(.LCPI0_0), %[[R4:[gilo][0-7]]]
+; abs64: sllx %[[R4]], 32, %[[R5:[gilo][0-7]]]
+; abs64: ld [%[[R5]]+%[[R2]]], %f1
+; abs64: jmp %i7+8
+
+; v8pic32: floatCP
+; v8pic32: _GLOBAL_OFFSET_TABLE_
+; v8pic32: sethi %hi(.LCPI0_0), %[[R1:[gilo][0-7]]]
+; v8pic32: add %[[R1]], %lo(.LCPI0_0), %[[Goffs:[gilo][0-7]]]
+; v8pic32: ld [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
+; v8pic32: ld [%[[Gaddr]]], %f0
+; v8pic32: jmp %i7+8
+
+; v9pic32: floatCP
+; v9pic32: _GLOBAL_OFFSET_TABLE_
+; v9pic32: sethi %hi(.LCPI0_0), %[[R1:[gilo][0-7]]]
+; v9pic32: add %[[R1]], %lo(.LCPI0_0), %[[Goffs:[gilo][0-7]]]
+; v9pic32: ldx [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
+; v9pic32: ld [%[[Gaddr]]], %f1
+; v9pic32: jmp %i7+8
diff --git a/test/CodeGen/SPARC/globals.ll b/test/CodeGen/SPARC/globals.ll
new file mode 100644
index 0000000..8d8de58
--- /dev/null
+++ b/test/CodeGen/SPARC/globals.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=sparc   -relocation-model=static -code-model=small  | FileCheck --check-prefix=abs32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=small  | FileCheck --check-prefix=abs32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=medium | FileCheck --check-prefix=abs44 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=static -code-model=large  | FileCheck --check-prefix=abs64 %s
+; RUN: llc < %s -march=sparc   -relocation-model=pic    -code-model=medium | FileCheck --check-prefix=v8pic32 %s
+; RUN: llc < %s -march=sparcv9 -relocation-model=pic    -code-model=medium | FileCheck --check-prefix=v9pic32 %s
+
+@G = external global i8
+
+define zeroext i8 @loadG() {
+  %tmp = load i8* @G
+  ret i8 %tmp
+}
+
+; abs32: loadG
+; abs32: sethi %hi(G), %[[R:[gilo][0-7]]]
+; abs32: ldub [%[[R]]+%lo(G)], %i0
+; abs32: jmp %i7+8
+
+; abs44: loadG
+; abs44: sethi %h44(G), %[[R1:[gilo][0-7]]]
+; abs44: add %[[R1]], %m44(G), %[[R2:[gilo][0-7]]]
+; abs44: sllx %[[R2]], 12, %[[R3:[gilo][0-7]]]
+; abs44: ldub [%[[R3]]+%l44(G)], %i0
+; abs44: jmp %i7+8
+
+; abs64: loadG
+; abs64: sethi %hi(G), %[[R1:[gilo][0-7]]]
+; abs64: add %[[R1]], %lo(G), %[[R2:[gilo][0-7]]]
+; abs64: sethi %hh(G), %[[R3:[gilo][0-7]]]
+; abs64: add %[[R3]], %hm(G), %[[R4:[gilo][0-7]]]
+; abs64: sllx %[[R4]], 32, %[[R5:[gilo][0-7]]]
+; abs64: ldub [%[[R5]]+%[[R2]]], %i0
+; abs64: jmp %i7+8
+
+; v8pic32: loadG
+; v8pic32: _GLOBAL_OFFSET_TABLE_
+; v8pic32: sethi %hi(G), %[[R1:[gilo][0-7]]]
+; v8pic32: add %[[R1]], %lo(G), %[[Goffs:[gilo][0-7]]]
+; v8pic32: ld [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
+; v8pic32: ldub [%[[Gaddr]]], %i0
+; v8pic32: jmp %i7+8
+
+; v9pic32: loadG
+; v9pic32: _GLOBAL_OFFSET_TABLE_
+; v9pic32: sethi %hi(G), %[[R1:[gilo][0-7]]]
+; v9pic32: add %[[R1]], %lo(G), %[[Goffs:[gilo][0-7]]]
+; v9pic32: ldx [%[[GOT:[gilo][0-7]]]+%[[Goffs]]], %[[Gaddr:[gilo][0-7]]]
+; v9pic32: ldub [%[[Gaddr]]], %i0
+; v9pic32: jmp %i7+8
diff --git a/test/CodeGen/SPARC/varargs.ll b/test/CodeGen/SPARC/varargs.ll
new file mode 100644
index 0000000..b13f90e
--- /dev/null
+++ b/test/CodeGen/SPARC/varargs.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -disable-block-placement | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32:64-S128"
+target triple = "sparcv9-sun-solaris"
+
+; CHECK: varargsfunc
+; 128 byte save ares + 1 alloca rounded up to 16 bytes alignment.
+; CHECK: save %sp, -144, %sp
+; Store the ... arguments to the argument array. The order is not important.
+; CHECK: stx %i5, [%fp+2215]
+; CHECK: stx %i4, [%fp+2207]
+; CHECK: stx %i3, [%fp+2199]
+; CHECK: stx %i2, [%fp+2191]
+; Store the address of the ... args to %ap at %fp+BIAS+128-8
+; add %fp, 2191, [[R:[gilo][0-7]]]
+; stx [[R]], [%fp+2039]
+define double @varargsfunc(i8* nocapture %fmt, double %sum, ...) {
+entry:
+  %ap = alloca i8*, align 4
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  br label %for.cond
+
+for.cond:
+  %fmt.addr.0 = phi i8* [ %fmt, %entry ], [ %incdec.ptr, %for.cond.backedge ]
+  %sum.addr.0 = phi double [ %sum, %entry ], [ %sum.addr.0.be, %for.cond.backedge ]
+  %incdec.ptr = getelementptr inbounds i8* %fmt.addr.0, i64 1
+  %0 = load i8* %fmt.addr.0, align 1
+  %conv = sext i8 %0 to i32
+  switch i32 %conv, label %sw.default [
+    i32 105, label %sw.bb
+    i32 102, label %sw.bb3
+  ]
+
+; CHECK: sw.bb
+; ldx [%fp+2039], %[[AP:[gilo][0-7]]]
+; add %[[AP]], 4, %[[AP2:[gilo][0-7]]]
+; stx %[[AP2]], [%fp+2039]
+; ld [%[[AP]]]
+sw.bb:
+  %1 = va_arg i8** %ap, i32
+  %conv2 = sitofp i32 %1 to double
+  br label %for.cond.backedge
+
+; CHECK: sw.bb3
+; ldx [%fp+2039], %[[AP:[gilo][0-7]]]
+; add %[[AP]], 8, %[[AP2:[gilo][0-7]]]
+; stx %[[AP2]], [%fp+2039]
+; ldd [%[[AP]]]
+sw.bb3:
+  %2 = va_arg i8** %ap, double
+  br label %for.cond.backedge
+
+for.cond.backedge:
+  %.pn = phi double [ %2, %sw.bb3 ], [ %conv2, %sw.bb ]
+  %sum.addr.0.be = fadd double %.pn, %sum.addr.0
+  br label %for.cond
+
+sw.default:
+  ret double %sum.addr.0
+}
+
+declare void @llvm.va_start(i8*)
+
+@.str = private unnamed_addr constant [4 x i8] c"abc\00", align 1
+
+; CHECK: call_1d
+; The fixed-arg double goes in %d2, the second goes in %o2.
+; CHECK: sethi 1048576
+; CHECK: , %o2
+; CHECK: , %f2
+define i32 @call_1d() #0 {
+entry:
+  %call = call double (i8*, double, ...)* @varargsfunc(i8* undef, double 1.000000e+00, double 2.000000e+00)
+  ret i32 1
+}
diff --git a/test/CodeGen/SystemZ/addr-01.ll b/test/CodeGen/SystemZ/addr-01.ll
new file mode 100644
index 0000000..c125ffa
--- /dev/null
+++ b/test/CodeGen/SystemZ/addr-01.ll
@@ -0,0 +1,107 @@
+; Test selection of addresses with indices in cases where the address
+; is used once.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; A simple index address.
+define void @f1(i64 %addr, i64 %index) {
+; CHECK: f1:
+; CHECK: lb %r0, 0(%r3,%r2)
+; CHECK: br %r14
+  %add = add i64 %addr, %index
+  %ptr = inttoptr i64 %add to i8 *
+  %a = load volatile i8 *%ptr
+  ret void
+}
+
+; An address with an index and a displacement (order 1).
+define void @f2(i64 %addr, i64 %index) {
+; CHECK: f2:
+; CHECK: lb %r0, 100(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %addr, %index
+  %add2 = add i64 %add1, 100
+  %ptr = inttoptr i64 %add2 to i8 *
+  %a = load volatile i8 *%ptr
+  ret void
+}
+
+; An address with an index and a displacement (order 2).
+define void @f3(i64 %addr, i64 %index) {
+; CHECK: f3:
+; CHECK: lb %r0, 100(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %addr, 100
+  %add2 = add i64 %add1, %index
+  %ptr = inttoptr i64 %add2 to i8 *
+  %a = load volatile i8 *%ptr
+  ret void
+}
+
+; An address with an index and a subtracted displacement (order 1).
+define void @f4(i64 %addr, i64 %index) {
+; CHECK: f4:
+; CHECK: lb %r0, -100(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %addr, %index
+  %add2 = sub i64 %add1, 100
+  %ptr = inttoptr i64 %add2 to i8 *
+  %a = load volatile i8 *%ptr
+  ret void
+}
+
+; An address with an index and a subtracted displacement (order 2).
+define void @f5(i64 %addr, i64 %index) {
+; CHECK: f5:
+; CHECK: lb %r0, -100(%r3,%r2)
+; CHECK: br %r14
+  %add1 = sub i64 %addr, 100
+  %add2 = add i64 %add1, %index
+  %ptr = inttoptr i64 %add2 to i8 *
+  %a = load volatile i8 *%ptr
+  ret void
+}
+
+; An address with an index and a displacement added using OR.
+define void @f6(i64 %addr, i64 %index) {
+; CHECK: f6:
+; CHECK: nill %r2, 65528
+; CHECK: lb %r0, 6(%r3,%r2)
+; CHECK: br %r14
+  %aligned = and i64 %addr, -8
+  %or = or i64 %aligned, 6
+  %add = add i64 %or, %index
+  %ptr = inttoptr i64 %add to i8 *
+  %a = load volatile i8 *%ptr
+  ret void
+}
+
+; Like f6, but without the masking.  This OR doesn't count as a displacement.
+define void @f7(i64 %addr, i64 %index) {
+; CHECK: f7:
+; CHECK: oill %r2, 6
+; CHECK: lb %r0, 0(%r3,%r2)
+; CHECK: br %r14
+  %or = or i64 %addr, 6
+  %add = add i64 %or, %index
+  %ptr = inttoptr i64 %add to i8 *
+  %a = load volatile i8 *%ptr
+  ret void
+}
+
+; Like f6, but with the OR applied after the index.  We don't know anything
+; about the alignment of %add here.
+define void @f8(i64 %addr, i64 %index) {
+; CHECK: f8:
+; CHECK: nill %r2, 65528
+; CHECK: agr %r2, %r3
+; CHECK: oill %r2, 6
+; CHECK: lb %r0, 0(%r2)
+; CHECK: br %r14
+  %aligned = and i64 %addr, -8
+  %add = add i64 %aligned, %index
+  %or = or i64 %add, 6
+  %ptr = inttoptr i64 %or to i8 *
+  %a = load volatile i8 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/addr-02.ll b/test/CodeGen/SystemZ/addr-02.ll
new file mode 100644
index 0000000..6772c1d
--- /dev/null
+++ b/test/CodeGen/SystemZ/addr-02.ll
@@ -0,0 +1,116 @@
+; addr-01.ll in which the address is also used in a non-address context.
+; The assumption here is that we should match complex addresses where
+; possible, but this might well need to change in future.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; A simple index address.
+define void @f1(i64 %addr, i64 %index, i8 **%dst) {
+; CHECK: f1:
+; CHECK: lb %r0, 0(%r3,%r2)
+; CHECK: br %r14
+  %add = add i64 %addr, %index
+  %ptr = inttoptr i64 %add to i8 *
+  %a = load volatile i8 *%ptr
+  store volatile i8 *%ptr, i8 **%dst
+  ret void
+}
+
+; An address with an index and a displacement (order 1).
+define void @f2(i64 %addr, i64 %index, i8 **%dst) {
+; CHECK: f2:
+; CHECK: lb %r0, 100(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %addr, %index
+  %add2 = add i64 %add1, 100
+  %ptr = inttoptr i64 %add2 to i8 *
+  %a = load volatile i8 *%ptr
+  store volatile i8 *%ptr, i8 **%dst
+  ret void
+}
+
+; An address with an index and a displacement (order 2).
+define void @f3(i64 %addr, i64 %index, i8 **%dst) {
+; CHECK: f3:
+; CHECK: lb %r0, 100(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %addr, 100
+  %add2 = add i64 %add1, %index
+  %ptr = inttoptr i64 %add2 to i8 *
+  %a = load volatile i8 *%ptr
+  store volatile i8 *%ptr, i8 **%dst
+  ret void
+}
+
+; An address with an index and a subtracted displacement (order 1).
+define void @f4(i64 %addr, i64 %index, i8 **%dst) {
+; CHECK: f4:
+; CHECK: lb %r0, -100(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %addr, %index
+  %add2 = sub i64 %add1, 100
+  %ptr = inttoptr i64 %add2 to i8 *
+  %a = load volatile i8 *%ptr
+  store volatile i8 *%ptr, i8 **%dst
+  ret void
+}
+
+; An address with an index and a subtracted displacement (order 2).
+define void @f5(i64 %addr, i64 %index, i8 **%dst) {
+; CHECK: f5:
+; CHECK: lb %r0, -100(%r3,%r2)
+; CHECK: br %r14
+  %add1 = sub i64 %addr, 100
+  %add2 = add i64 %add1, %index
+  %ptr = inttoptr i64 %add2 to i8 *
+  %a = load volatile i8 *%ptr
+  store volatile i8 *%ptr, i8 **%dst
+  ret void
+}
+
+; An address with an index and a displacement added using OR.
+define void @f6(i64 %addr, i64 %index, i8 **%dst) {
+; CHECK: f6:
+; CHECK: nill %r2, 65528
+; CHECK: lb %r0, 6(%r3,%r2)
+; CHECK: br %r14
+  %aligned = and i64 %addr, -8
+  %or = or i64 %aligned, 6
+  %add = add i64 %or, %index
+  %ptr = inttoptr i64 %add to i8 *
+  %a = load volatile i8 *%ptr
+  store volatile i8 *%ptr, i8 **%dst
+  ret void
+}
+
+; Like f6, but without the masking.  This OR doesn't count as a displacement.
+define void @f7(i64 %addr, i64 %index, i8 **%dst) {
+; CHECK: f7:
+; CHECK: oill %r2, 6
+; CHECK: lb %r0, 0(%r3,%r2)
+; CHECK: br %r14
+  %or = or i64 %addr, 6
+  %add = add i64 %or, %index
+  %ptr = inttoptr i64 %add to i8 *
+  %a = load volatile i8 *%ptr
+  store volatile i8 *%ptr, i8 **%dst
+  ret void
+}
+
+; Like f6, but with the OR applied after the index.  We don't know anything
+; about the alignment of %add here.
+define void @f8(i64 %addr, i64 %index, i8 **%dst) {
+; CHECK: f8:
+; CHECK: nill %r2, 65528
+; CHECK: agr %r2, %r3
+; CHECK: oill %r2, 6
+; CHECK: lb %r0, 0(%r2)
+; CHECK: br %r14
+  %aligned = and i64 %addr, -8
+  %add = add i64 %aligned, %index
+  %or = or i64 %add, 6
+  %ptr = inttoptr i64 %or to i8 *
+  %a = load volatile i8 *%ptr
+  store volatile i8 *%ptr, i8 **%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/addr-03.ll b/test/CodeGen/SystemZ/addr-03.ll
new file mode 100644
index 0000000..dbdb9f1
--- /dev/null
+++ b/test/CodeGen/SystemZ/addr-03.ll
@@ -0,0 +1,48 @@
+; Test constant addresses, unlikely as they are.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define void @f1() {
+; CHECK: f1:
+; CHECK: lb %r0, 0
+; CHECK: br %r14
+  %ptr = inttoptr i64 0 to i8 *
+  %val = load volatile i8 *%ptr
+  ret void
+}
+
+define void @f2() {
+; CHECK: f2:
+; CHECK: lb %r0, -524288
+; CHECK: br %r14
+  %ptr = inttoptr i64 -524288 to i8 *
+  %val = load volatile i8 *%ptr
+  ret void
+}
+
+define void @f3() {
+; CHECK: f3:
+; CHECK-NOT: lb %r0, -524289
+; CHECK: br %r14
+  %ptr = inttoptr i64 -524289 to i8 *
+  %val = load volatile i8 *%ptr
+  ret void
+}
+
+define void @f4() {
+; CHECK: f4:
+; CHECK: lb %r0, 524287
+; CHECK: br %r14
+  %ptr = inttoptr i64 524287 to i8 *
+  %val = load volatile i8 *%ptr
+  ret void
+}
+
+define void @f5() {
+; CHECK: f5:
+; CHECK-NOT: lb %r0, 524288
+; CHECK: br %r14
+  %ptr = inttoptr i64 524288 to i8 *
+  %val = load volatile i8 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/alloca-01.ll b/test/CodeGen/SystemZ/alloca-01.ll
new file mode 100644
index 0000000..1852c91
--- /dev/null
+++ b/test/CodeGen/SystemZ/alloca-01.ll
@@ -0,0 +1,81 @@
+; Test variable-sized allocas and addresses based on them in cases where
+; stack arguments are needed.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK2
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-A
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-B
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-C
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-D
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP
+
+declare i64 @bar(i8 *%a, i8 *%b, i8 *%c, i8 *%d, i8 *%e, i64 %f, i64 %g)
+
+; Allocate %length bytes and take addresses based on the result.
+; There are two stack arguments, so an offset of 160 + 2 * 8 == 176
+; is added to the copy of %r15.
+define i64 @f1(i64 %length, i64 %index) {
+; The full allocation sequence is:
+;
+;    la %r0, 7(%r2)      1
+;    nill %r0, 0xfff8    1
+;    lgr %r1, %r15         2
+;    sgr %r1, %r0        1 2
+;    lgr %r15, %r1         2
+;
+; The third instruction does not depend on the first two, so check for
+; two fully-ordered sequences.
+;
+; FIXME: a better sequence would be:
+;
+;    lgr %r1, %r15
+;    sgr %r1, %r2
+;    nill %r1, 0xfff8
+;    lgr %r15, %r1
+;
+; CHECK1: f1:
+; CHECK1: la %r0, 7(%r2)
+; CHECK1: nill %r0, 65528
+; CHECK1: sgr %r1, %r0
+; CHECK1: lgr %r15, %r1
+;
+; CHECK2: f1:
+; CHECK2: lgr %r1, %r15
+; CHECK2: sgr %r1, %r0
+; CHECK2: lgr %r15, %r1
+;
+; CHECK-A: f1:
+; CHECK-A: lgr %r15, %r1
+; CHECK-A: la %r2, 176(%r1)
+;
+; CHECK-B: f1:
+; CHECK-B: lgr %r15, %r1
+; CHECK-B: la %r3, 177(%r1)
+;
+; CHECK-C: f1:
+; CHECK-C: lgr %r15, %r1
+; CHECK-C: la %r4, 4095({{%r3,%r1|%r1,%r3}})
+;
+; CHECK-D: f1:
+; CHECK-D: lgr %r15, %r1
+; CHECK-D: lay %r5, 4096({{%r3,%r1|%r1,%r3}})
+;
+; CHECK-E: f1:
+; CHECK-E: lgr %r15, %r1
+; CHECK-E: lay %r6, 4271({{%r3,%r1|%r1,%r3}})
+;
+; CHECK-FP: f1:
+; CHECK-FP: lgr %r11, %r15
+; CHECK-FP: lmg %r6, %r15, 224(%r11)
+  %a = alloca i8, i64 %length
+  %b = getelementptr i8 *%a, i64 1
+  %cindex = add i64 %index, 3919
+  %c = getelementptr i8 *%a, i64 %cindex
+  %dindex = add i64 %index, 3920
+  %d = getelementptr i8 *%a, i64 %dindex
+  %eindex = add i64 %index, 4095
+  %e = getelementptr i8 *%a, i64 %eindex
+  %count = call i64 @bar(i8 *%a, i8 *%b, i8 *%c, i8 *%d, i8 *%e, i64 0, i64 0)
+  %res = add i64 %count, 1
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/alloca-02.ll b/test/CodeGen/SystemZ/alloca-02.ll
new file mode 100644
index 0000000..fbb095f
--- /dev/null
+++ b/test/CodeGen/SystemZ/alloca-02.ll
@@ -0,0 +1,49 @@
+; Make sure that the alloca offset isn't lost when the alloca result is
+; used directly in a load or store.  There must always be an LA or LAY.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-A
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-B
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-C
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-D
+
+declare i64 @bar(i8 *%a)
+
+define i64 @f1(i64 %length, i64 %index) {
+; CHECK-A: f1:
+; CHECK-A: lgr %r15, [[ADDR:%r[1-5]]]
+; CHECK-A: la %r2, 160([[ADDR]])
+; CHECK-A: mvi 0(%r2), 0
+;
+; CHECK-B: f1:
+; CHECK-B: lgr %r15, [[ADDR:%r[1-5]]]
+; CHECK-B: la %r2, 160([[ADDR]])
+; CHECK-B: mvi 4095(%r2), 1
+;
+; CHECK-C: f1:
+; CHECK-C: lgr %r15, [[ADDR:%r[1-5]]]
+; CHECK-C: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
+; CHECK-C: mvi 0([[TMP]]), 2
+;
+; CHECK-D: f1:
+; CHECK-D: lgr %r15, [[ADDR:%r[1-5]]]
+; CHECK-D: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
+; CHECK-D: mvi 4095([[TMP]]), 3
+;
+; CHECK-E: f1:
+; CHECK-E: lgr %r15, [[ADDR:%r[1-5]]]
+; CHECK-E: la [[TMP:%r[1-5]]], 160(%r3,[[ADDR]])
+; CHECK-E: mviy 4096([[TMP]]), 4
+  %a = alloca i8, i64 %length
+  store i8 0, i8 *%a
+  %b = getelementptr i8 *%a, i64 4095
+  store i8 1, i8 *%b
+  %c = getelementptr i8 *%a, i64 %index
+  store i8 2, i8 *%c
+  %d = getelementptr i8 *%c, i64 4095
+  store i8 3, i8 *%d
+  %e = getelementptr i8 *%d, i64 1
+  store i8 4, i8 *%e
+  %count = call i64 @bar(i8 *%a)
+  %res = add i64 %count, 1
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/and-01.ll b/test/CodeGen/SystemZ/and-01.ll
new file mode 100644
index 0000000..8dd106b
--- /dev/null
+++ b/test/CodeGen/SystemZ/and-01.ll
@@ -0,0 +1,129 @@
+; Test 32-bit ANDs in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check NR.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: nr %r2, %r3
+; CHECK: br %r14
+  %and = and i32 %a, %b
+  ret i32 %and
+}
+
+; Check the low end of the N range.
+define i32 @f2(i32 %a, i32 *%src) {
+; CHECK: f2:
+; CHECK: n %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %and = and i32 %a, %b
+  ret i32 %and
+}
+
+; Check the high end of the aligned N range.
+define i32 @f3(i32 %a, i32 *%src) {
+; CHECK: f3:
+; CHECK: n %r2, 4092(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1023
+  %b = load i32 *%ptr
+  %and = and i32 %a, %b
+  ret i32 %and
+}
+
+; Check the next word up, which should use NY instead of N.
+define i32 @f4(i32 %a, i32 *%src) {
+; CHECK: f4:
+; CHECK: ny %r2, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1024
+  %b = load i32 *%ptr
+  %and = and i32 %a, %b
+  ret i32 %and
+}
+
+; Check the high end of the aligned NY range.
+define i32 @f5(i32 %a, i32 *%src) {
+; CHECK: f5:
+; CHECK: ny %r2, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %and = and i32 %a, %b
+  ret i32 %and
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f6(i32 %a, i32 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r3, 524288
+; CHECK: n %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %and = and i32 %a, %b
+  ret i32 %and
+}
+
+; Check the high end of the negative aligned NY range.
+define i32 @f7(i32 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK: ny %r2, -4(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %and = and i32 %a, %b
+  ret i32 %and
+}
+
+; Check the low end of the NY range.
+define i32 @f8(i32 %a, i32 *%src) {
+; CHECK: f8:
+; CHECK: ny %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %and = and i32 %a, %b
+  ret i32 %and
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f9(i32 %a, i32 *%src) {
+; CHECK: f9:
+; CHECK: agfi %r3, -524292
+; CHECK: n %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %and = and i32 %a, %b
+  ret i32 %and
+}
+
+; Check that N allows an index.
+define i32 @f10(i32 %a, i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: n %r2, 4092({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4092
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %and = and i32 %a, %b
+  ret i32 %and
+}
+
+; Check that NY allows an index.
+define i32 @f11(i32 %a, i64 %src, i64 %index) {
+; CHECK: f11:
+; CHECK: ny %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %and = and i32 %a, %b
+  ret i32 %and
+}
diff --git a/test/CodeGen/SystemZ/and-02.ll b/test/CodeGen/SystemZ/and-02.ll
new file mode 100644
index 0000000..a0fff81
--- /dev/null
+++ b/test/CodeGen/SystemZ/and-02.ll
@@ -0,0 +1,93 @@
+; Test 32-bit ANDs in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the lowest useful NILF value.
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: nilf %r2, 1
+; CHECK: br %r14
+  %and = and i32 %a, 1
+  ret i32 %and
+}
+
+; Check the highest 16-bit constant that must be handled by NILF.
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: nilf %r2, 65534
+; CHECK: br %r14
+  %and = and i32 %a, 65534
+  ret i32 %and
+}
+
+; ANDs of 0xffff are zero extensions from i16.
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: llhr %r2, %r2
+; CHECK: br %r14
+  %and = and i32 %a, 65535
+  ret i32 %and
+}
+
+; Check the next value up, which must again use NILF.
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: nilf %r2, 65536
+; CHECK: br %r14
+  %and = and i32 %a, 65536
+  ret i32 %and
+}
+
+; Check the lowest useful NILH value.  (LLHR is used instead of NILH of 0.)
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: nilh %r2, 1
+; CHECK: br %r14
+  %and = and i32 %a, 131071
+  ret i32 %and
+}
+
+; Check the highest useful NILF value.
+define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK: nilf %r2, 4294901758
+; CHECK: br %r14
+  %and = and i32 %a, -65538
+  ret i32 %and
+}
+
+; Check the highest useful NILH value, which is one up from the above.
+define i32 @f7(i32 %a) {
+; CHECK: f7:
+; CHECK: nilh %r2, 65534
+; CHECK: br %r14
+  %and = and i32 %a, -65537
+  ret i32 %and
+}
+
+; Check the low end of the NILL range, which is one up again.
+define i32 @f8(i32 %a) {
+; CHECK: f8:
+; CHECK: nill %r2, 0
+; CHECK: br %r14
+  %and = and i32 %a, -65536
+  ret i32 %and
+}
+
+; Check the next value up.
+define i32 @f9(i32 %a) {
+; CHECK: f9:
+; CHECK: nill %r2, 1
+; CHECK: br %r14
+  %and = and i32 %a, -65535
+  ret i32 %and
+}
+
+; Check the highest useful NILL value.
+define i32 @f10(i32 %a) {
+; CHECK: f10:
+; CHECK: nill %r2, 65534
+; CHECK: br %r14
+  %and = and i32 %a, -2
+  ret i32 %and
+}
diff --git a/test/CodeGen/SystemZ/and-03.ll b/test/CodeGen/SystemZ/and-03.ll
new file mode 100644
index 0000000..3fe8d3c
--- /dev/null
+++ b/test/CodeGen/SystemZ/and-03.ll
@@ -0,0 +1,94 @@
+; Test 64-bit ANDs in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check NGR.
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: ngr %r2, %r3
+; CHECK: br %r14
+  %and = and i64 %a, %b
+  ret i64 %and
+}
+
+; Check NG with no displacement.
+define i64 @f2(i64 %a, i64 *%src) {
+; CHECK: f2:
+; CHECK: ng %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i64 *%src
+  %and = and i64 %a, %b
+  ret i64 %and
+}
+
+; Check the high end of the aligned NG range.
+define i64 @f3(i64 %a, i64 *%src) {
+; CHECK: f3:
+; CHECK: ng %r2, 524280(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %b = load i64 *%ptr
+  %and = and i64 %a, %b
+  ret i64 %and
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i64 *%src) {
+; CHECK: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: ng %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %b = load i64 *%ptr
+  %and = and i64 %a, %b
+  ret i64 %and
+}
+
+; Check the high end of the negative aligned NG range.
+define i64 @f5(i64 %a, i64 *%src) {
+; CHECK: f5:
+; CHECK: ng %r2, -8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -1
+  %b = load i64 *%ptr
+  %and = and i64 %a, %b
+  ret i64 %and
+}
+
+; Check the low end of the NG range.
+define i64 @f6(i64 %a, i64 *%src) {
+; CHECK: f6:
+; CHECK: ng %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %b = load i64 *%ptr
+  %and = and i64 %a, %b
+  ret i64 %and
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f7(i64 %a, i64 *%src) {
+; CHECK: f7:
+; CHECK: agfi %r3, -524296
+; CHECK: ng %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %b = load i64 *%ptr
+  %and = and i64 %a, %b
+  ret i64 %and
+}
+
+; Check that NG allows an index.
+define i64 @f8(i64 %a, i64 %src, i64 %index) {
+; CHECK: f8:
+; CHECK: ng %r2, 524280({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524280
+  %ptr = inttoptr i64 %add2 to i64 *
+  %b = load i64 *%ptr
+  %and = and i64 %a, %b
+  ret i64 %and
+}
diff --git a/test/CodeGen/SystemZ/and-04.ll b/test/CodeGen/SystemZ/and-04.ll
new file mode 100644
index 0000000..62def60
--- /dev/null
+++ b/test/CodeGen/SystemZ/and-04.ll
@@ -0,0 +1,180 @@
+; Test 64-bit ANDs in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; There is no 64-bit AND instruction for a mask of 1.
+; FIXME: we ought to be able to require "ngr %r2, %r0", but at the moment,
+; two-address optimisations force "ngr %r0, %r2; lgr %r2, %r0" instead.
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: lghi %r0, 1
+; CHECK: ngr
+; CHECK: br %r14
+  %and = and i64 %a, 1
+  ret i64 %and
+}
+
+; Likewise 0xfffe.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: llill %r0, 65534
+; CHECK: ngr
+; CHECK: br %r14
+  %and = and i64 %a, 65534
+  ret i64 %and
+}
+
+; ...but 0xffff is a 16-bit zero extension.
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: llghr %r2, %r2
+; CHECK: br %r14
+  %and = and i64 %a, 65535
+  ret i64 %and
+}
+
+; Check the next value up, which again has no dedicated instruction.
+define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: llilh %r0, 1
+; CHECK: ngr
+; CHECK: br %r14
+  %and = and i64 %a, 65536
+  ret i64 %and
+}
+
+; Check 0xfffffffe.
+define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: lilf %r0, 4294967294
+; CHECK: ngr
+; CHECK: br %r14
+  %and = and i64 %a, 4294967294
+  ret i64 %and
+}
+
+; Check the next value up, which is a 32-bit zero extension.
+define i64 @f6(i64 %a) {
+; CHECK: f6:
+; CHECK: llgfr %r2, %r2
+; CHECK: br %r14
+  %and = and i64 %a, 4294967295
+  ret i64 %and
+}
+
+; Check the lowest useful NIHF value (0x00000001_ffffffff).
+define i64 @f7(i64 %a) {
+; CHECK: f7:
+; CHECK: nihf %r2, 1
+; CHECK: br %r14
+  %and = and i64 %a, 8589934591
+  ret i64 %and
+}
+
+; Check the low end of the NIHH range (0x0000ffff_ffffffff).
+define i64 @f8(i64 %a) {
+; CHECK: f8:
+; CHECK: nihh %r2, 0
+; CHECK: br %r14
+  %and = and i64 %a, 281474976710655
+  ret i64 %and
+}
+
+; Check the highest useful NIHH value (0xfffeffff_ffffffff).
+define i64 @f9(i64 %a) {
+; CHECK: f9:
+; CHECK: nihh %r2, 65534
+; CHECK: br %r14
+  %and = and i64 %a, -281474976710657
+  ret i64 %and
+}
+
+; Check the highest useful NIHF value (0xfffefffe_ffffffff).
+define i64 @f10(i64 %a) {
+; CHECK: f10:
+; CHECK: nihf %r2, 4294901758
+; CHECK: br %r14
+  %and = and i64 %a, -281479271677953
+  ret i64 %and
+}
+
+; Check the low end of the NIHL range (0xffff0000_ffffffff).
+define i64 @f11(i64 %a) {
+; CHECK: f11:
+; CHECK: nihl %r2, 0
+; CHECK: br %r14
+  %and = and i64 %a, -281470681743361
+  ret i64 %and
+}
+
+; Check the highest useful NIHL value (0xfffffffe_ffffffff).
+define i64 @f12(i64 %a) {
+; CHECK: f12:
+; CHECK: nihl %r2, 65534
+; CHECK: br %r14
+  %and = and i64 %a, -4294967297
+  ret i64 %and
+}
+
+; Check the low end of the NILF range (0xffffffff_00000000).
+define i64 @f13(i64 %a) {
+; CHECK: f13:
+; CHECK: nilf %r2, 0
+; CHECK: br %r14
+  %and = and i64 %a, -4294967296
+  ret i64 %and
+}
+
+; Check the low end of the NILH range (0xffffffff_0000ffff).
+define i64 @f14(i64 %a) {
+; CHECK: f14:
+; CHECK: nilh %r2, 0
+; CHECK: br %r14
+  %and = and i64 %a, -4294901761
+  ret i64 %and
+}
+
+; Check the next value up, which must use NILF.
+define i64 @f15(i64 %a) {
+; CHECK: f15:
+; CHECK: nilf %r2, 65536
+; CHECK: br %r14
+  %and = and i64 %a, -4294901760
+  ret i64 %and
+}
+
+; Check the maximum useful NILF value (0xffffffff_fffefffe).
+define i64 @f16(i64 %a) {
+; CHECK: f16:
+; CHECK: nilf %r2, 4294901758
+; CHECK: br %r14
+  %and = and i64 %a, -65538
+  ret i64 %and
+}
+
+; Check the highest useful NILH value, which is one greater than the above.
+define i64 @f17(i64 %a) {
+; CHECK: f17:
+; CHECK: nilh %r2, 65534
+; CHECK: br %r14
+  %and = and i64 %a, -65537
+  ret i64 %and
+}
+
+; Check the low end of the NILL range, which is one greater again.
+define i64 @f18(i64 %a) {
+; CHECK: f18:
+; CHECK: nill %r2, 0
+; CHECK: br %r14
+  %and = and i64 %a, -65536
+  ret i64 %and
+}
+
+; Check the highest useful NILL value.
+define i64 @f19(i64 %a) {
+; CHECK: f19:
+; CHECK: nill %r2, 65534
+; CHECK: br %r14
+  %and = and i64 %a, -2
+  ret i64 %and
+}
diff --git a/test/CodeGen/SystemZ/and-05.ll b/test/CodeGen/SystemZ/and-05.ll
new file mode 100644
index 0000000..4573911
--- /dev/null
+++ b/test/CodeGen/SystemZ/and-05.ll
@@ -0,0 +1,165 @@
+; Test ANDs of a constant into a byte of memory.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the lowest useful constant, expressed as a signed integer.
+define void @f1(i8 *%ptr) {
+; CHECK: f1:
+; CHECK: ni 0(%r2), 1
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %and = and i8 %val, -255
+  store i8 %and, i8 *%ptr
+  ret void
+}
+
+; Check the highest useful constant, expressed as a signed integer.
+define void @f2(i8 *%ptr) {
+; CHECK: f2:
+; CHECK: ni 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %and = and i8 %val, -2
+  store i8 %and, i8 *%ptr
+  ret void
+}
+
+; Check the lowest useful constant, expressed as an unsigned integer.
+define void @f3(i8 *%ptr) {
+; CHECK: f3:
+; CHECK: ni 0(%r2), 1
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %and = and i8 %val, 1
+  store i8 %and, i8 *%ptr
+  ret void
+}
+
+; Check the highest useful constant, expressed as a unsigned integer.
+define void @f4(i8 *%ptr) {
+; CHECK: f4:
+; CHECK: ni 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %and = and i8 %val, 254
+  store i8 %and, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the NI range.
+define void @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: ni 4095(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4095
+  %val = load i8 *%ptr
+  %and = and i8 %val, 127
+  store i8 %and, i8 *%ptr
+  ret void
+}
+
+; Check the next byte up, which should use NIY instead of NI.
+define void @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: niy 4096(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4096
+  %val = load i8 *%ptr
+  %and = and i8 %val, 127
+  store i8 %and, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the NIY range.
+define void @f7(i8 *%src) {
+; CHECK: f7:
+; CHECK: niy 524287(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524287
+  %val = load i8 *%ptr
+  %and = and i8 %val, 127
+  store i8 %and, i8 *%ptr
+  ret void
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f8(i8 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r2, 524288
+; CHECK: ni 0(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524288
+  %val = load i8 *%ptr
+  %and = and i8 %val, 127
+  store i8 %and, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the negative NIY range.
+define void @f9(i8 *%src) {
+; CHECK: f9:
+; CHECK: niy -1(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -1
+  %val = load i8 *%ptr
+  %and = and i8 %val, 127
+  store i8 %and, i8 *%ptr
+  ret void
+}
+
+; Check the low end of the NIY range.
+define void @f10(i8 *%src) {
+; CHECK: f10:
+; CHECK: niy -524288(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524288
+  %val = load i8 *%ptr
+  %and = and i8 %val, 127
+  store i8 %and, i8 *%ptr
+  ret void
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f11(i8 *%src) {
+; CHECK: f11:
+; CHECK: agfi %r2, -524289
+; CHECK: ni 0(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524289
+  %val = load i8 *%ptr
+  %and = and i8 %val, 127
+  store i8 %and, i8 *%ptr
+  ret void
+}
+
+; Check that NI does not allow an index
+define void @f12(i64 %src, i64 %index) {
+; CHECK: f12:
+; CHECK: agr %r2, %r3
+; CHECK: ni 4095(%r2), 127
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4095
+  %ptr = inttoptr i64 %add2 to i8 *
+  %val = load i8 *%ptr
+  %and = and i8 %val, 127
+  store i8 %and, i8 *%ptr
+  ret void
+}
+
+; Check that NIY does not allow an index
+define void @f13(i64 %src, i64 %index) {
+; CHECK: f13:
+; CHECK: agr %r2, %r3
+; CHECK: niy 4096(%r2), 127
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i8 *
+  %val = load i8 *%ptr
+  %and = and i8 %val, 127
+  store i8 %and, i8 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/and-06.ll b/test/CodeGen/SystemZ/and-06.ll
new file mode 100644
index 0000000..bbb5e7b
--- /dev/null
+++ b/test/CodeGen/SystemZ/and-06.ll
@@ -0,0 +1,108 @@
+; Test that we can use NI for byte operations that are expressed as i32
+; or i64 operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Zero extension to 32 bits, negative constant.
+define void @f1(i8 *%ptr) {
+; CHECK: f1:
+; CHECK: ni 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %and = and i32 %ext, -2
+  %trunc = trunc i32 %and to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Zero extension to 64 bits, negative constant.
+define void @f2(i8 *%ptr) {
+; CHECK: f2:
+; CHECK: ni 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %and = and i64 %ext, -2
+  %trunc = trunc i64 %and to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Zero extension to 32 bits, positive constant.
+define void @f3(i8 *%ptr) {
+; CHECK: f3:
+; CHECK: ni 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %and = and i32 %ext, 254
+  %trunc = trunc i32 %and to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Zero extension to 64 bits, positive constant.
+define void @f4(i8 *%ptr) {
+; CHECK: f4:
+; CHECK: ni 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %and = and i64 %ext, 254
+  %trunc = trunc i64 %and to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Sign extension to 32 bits, negative constant.
+define void @f5(i8 *%ptr) {
+; CHECK: f5:
+; CHECK: ni 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %and = and i32 %ext, -2
+  %trunc = trunc i32 %and to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Sign extension to 64 bits, negative constant.
+define void @f6(i8 *%ptr) {
+; CHECK: f6:
+; CHECK: ni 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %and = and i64 %ext, -2
+  %trunc = trunc i64 %and to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Sign extension to 32 bits, positive constant.
+define void @f7(i8 *%ptr) {
+; CHECK: f7:
+; CHECK: ni 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %and = and i32 %ext, 254
+  %trunc = trunc i32 %and to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Sign extension to 64 bits, positive constant.
+define void @f8(i8 *%ptr) {
+; CHECK: f8:
+; CHECK: ni 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %and = and i64 %ext, 254
+  %trunc = trunc i64 %and to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/args-01.ll b/test/CodeGen/SystemZ/args-01.ll
new file mode 100644
index 0000000..a6b80c5
--- /dev/null
+++ b/test/CodeGen/SystemZ/args-01.ll
@@ -0,0 +1,74 @@
+; Test the handling of GPR, FPR and stack arguments when no extension
+; type is given.  This type of argument is used for passing structures, etc.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-INT
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FLOAT
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-DOUBLE
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP128-1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP128-2
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-STACK
+
+declare void @bar(i8, i16, i32, i64, float, double, fp128, i64,
+                  float, double, i8, i16, i32, i64, float, double, fp128)
+
+; There are two indirect fp128 slots, one at offset 224 (the first available
+; byte after the outgoing arguments) and one immediately after it at 240.
+; These slots should be set up outside the glued call sequence, so would
+; normally use %f0/%f2 as the first available 128-bit pair.  This choice
+; is hard-coded in the FP128 tests.
+;
+; The order of the CHECK-INT loads doesn't matter.  The same goes for the
+; CHECK_FP128-* stores and the CHECK-STACK stores.  It would be OK to reorder
+; them in response to future code changes.
+define void @foo() {
+; CHECK-INT: foo:
+; CHECK-INT: lhi %r2, 1
+; CHECK-INT: lhi %r3, 2
+; CHECK-INT: lhi %r4, 3
+; CHECK-INT: lghi %r5, 4
+; CHECK-INT: la %r6, {{224|240}}(%r15)
+; CHECK-INT: brasl %r14, bar@PLT
+;
+; CHECK-FLOAT: foo:
+; CHECK-FLOAT: lzer %f0
+; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: brasl %r14, bar@PLT
+;
+; CHECK-DOUBLE: foo:
+; CHECK-DOUBLE: lzdr %f2
+; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: brasl %r14, bar@PLT
+;
+; CHECK-FP128-1: foo:
+; CHECK-FP128-1: aghi %r15, -256
+; CHECK-FP128-1: lzxr %f0
+; CHECK-FP128-1: std %f0, 224(%r15)
+; CHECK-FP128-1: std %f2, 232(%r15)
+; CHECK-FP128-1: brasl %r14, bar@PLT
+;
+; CHECK-FP128-2: foo:
+; CHECK-FP128-2: aghi %r15, -256
+; CHECK-FP128-2: lzxr %f0
+; CHECK-FP128-2: std %f0, 240(%r15)
+; CHECK-FP128-2: std %f2, 248(%r15)
+; CHECK-FP128-2: brasl %r14, bar@PLT
+;
+; CHECK-STACK: foo:
+; CHECK-STACK: aghi %r15, -256
+; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15)
+; CHECK-STACK: stg [[REGISTER]], 216(%r15)
+; CHECK-STACK: mvghi 208(%r15), 0
+; CHECK-STACK: mvhi 204(%r15), 0
+; CHECK-STACK: mvghi 192(%r15), 9
+; CHECK-STACK: mvhi 188(%r15), 8
+; CHECK-STACK: mvhi 180(%r15), 7
+; CHECK-STACK: mvhi 172(%r15), 6
+; CHECK-STACK: mvghi 160(%r15), 5
+; CHECK-STACK: brasl %r14, bar@PLT
+
+  call void @bar (i8 1, i16 2, i32 3, i64 4, float 0.0, double 0.0,
+                  fp128 0xL00000000000000000000000000000000, i64 5,
+                  float -0.0, double -0.0, i8 6, i16 7, i32 8, i64 9, float 0.0,
+                  double 0.0, fp128 0xL00000000000000000000000000000000)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/args-02.ll b/test/CodeGen/SystemZ/args-02.ll
new file mode 100644
index 0000000..9ea111c
--- /dev/null
+++ b/test/CodeGen/SystemZ/args-02.ll
@@ -0,0 +1,76 @@
+; Test the handling of GPR, FPR and stack arguments when integers are
+; sign-extended.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-INT
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FLOAT
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-DOUBLE
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP128-1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP128-2
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-STACK
+
+declare void @bar(i8 signext, i16 signext, i32 signext, i64, float, double,
+                  fp128, i64, float, double, i8 signext, i16 signext,
+                  i32 signext, i64, float, double, fp128)
+
+; There are two indirect fp128 slots, one at offset 224 (the first available
+; byte after the outgoing arguments) and one immediately after it at 240.
+; These slots should be set up outside the glued call sequence, so would
+; normally use %f0/%f2 as the first available 128-bit pair.  This choice
+; is hard-coded in the FP128 tests.
+;
+; The order of the CHECK-INT loads doesn't matter.  The same goes for the
+; CHECK_FP128-* stores and the CHECK-STACK stores.  It would be OK to reorder
+; them in response to future code changes.
+define void @foo() {
+; CHECK-INT: foo:
+; CHECK-INT: lghi %r2, -1
+; CHECK-INT: lghi %r3, -2
+; CHECK-INT: lghi %r4, -3
+; CHECK-INT: lghi %r5, -4
+; CHECK-INT: la %r6, {{224|240}}(%r15)
+; CHECK-INT: brasl %r14, bar@PLT
+;
+; CHECK-FLOAT: foo:
+; CHECK-FLOAT: lzer %f0
+; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: brasl %r14, bar@PLT
+;
+; CHECK-DOUBLE: foo:
+; CHECK-DOUBLE: lzdr %f2
+; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: brasl %r14, bar@PLT
+;
+; CHECK-FP128-1: foo:
+; CHECK-FP128-1: aghi %r15, -256
+; CHECK-FP128-1: lzxr %f0
+; CHECK-FP128-1: std %f0, 224(%r15)
+; CHECK-FP128-1: std %f2, 232(%r15)
+; CHECK-FP128-1: brasl %r14, bar@PLT
+;
+; CHECK-FP128-2: foo:
+; CHECK-FP128-2: aghi %r15, -256
+; CHECK-FP128-2: lzxr %f0
+; CHECK-FP128-2: std %f0, 240(%r15)
+; CHECK-FP128-2: std %f2, 248(%r15)
+; CHECK-FP128-2: brasl %r14, bar@PLT
+;
+; CHECK-STACK: foo:
+; CHECK-STACK: aghi %r15, -256
+; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15)
+; CHECK-STACK: stg [[REGISTER]], 216(%r15)
+; CHECK-STACK: mvghi 208(%r15), 0
+; CHECK-STACK: mvhi 204(%r15), 0
+; CHECK-STACK: mvghi 192(%r15), -9
+; CHECK-STACK: mvghi 184(%r15), -8
+; CHECK-STACK: mvghi 176(%r15), -7
+; CHECK-STACK: mvghi 168(%r15), -6
+; CHECK-STACK: mvghi 160(%r15), -5
+; CHECK-STACK: brasl %r14, bar@PLT
+
+  call void @bar (i8 -1, i16 -2, i32 -3, i64 -4, float 0.0, double 0.0,
+                  fp128 0xL00000000000000000000000000000000, i64 -5,
+                  float -0.0, double -0.0, i8 -6, i16 -7, i32 -8, i64 -9,
+                  float 0.0, double 0.0,
+                  fp128 0xL00000000000000000000000000000000)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/args-03.ll b/test/CodeGen/SystemZ/args-03.ll
new file mode 100644
index 0000000..f954d58
--- /dev/null
+++ b/test/CodeGen/SystemZ/args-03.ll
@@ -0,0 +1,78 @@
+; Test the handling of GPR, FPR and stack arguments when integers are
+; zero-extended.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-INT
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FLOAT
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-DOUBLE
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP128-1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-FP128-2
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-STACK
+
+declare void @bar(i8 zeroext, i16 zeroext, i32 zeroext, i64, float, double,
+                  fp128, i64, float, double, i8 zeroext, i16 zeroext,
+                  i32 zeroext, i64, float, double, fp128)
+
+; There are two indirect fp128 slots, one at offset 224 (the first available
+; byte after the outgoing arguments) and one immediately after it at 240.
+; These slots should be set up outside the glued call sequence, so would
+; normally use %f0/%f2 as the first available 128-bit pair.  This choice
+; is hard-coded in the FP128 tests.
+;
+; The order of the CHECK-INT loads doesn't matter.  The same goes for the
+; CHECK_FP128-* stores and the CHECK-STACK stores.  It would be OK to reorder
+; them in response to future code changes.
+define void @foo() {
+; CHECK-INT: foo:
+; CHECK-INT: lghi %r2, 255
+; CHECK-INT: llill %r3, 65534
+; CHECK-INT: llilf %r4, 4294967293
+; CHECK-INT: lghi %r5, -4
+; CHECK-INT: la %r6, {{224|240}}(%r15)
+; CHECK-INT: brasl %r14, bar@PLT
+;
+; CHECK-FLOAT: foo:
+; CHECK-FLOAT: lzer %f0
+; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: brasl %r14, bar@PLT
+;
+; CHECK-DOUBLE: foo:
+; CHECK-DOUBLE: lzdr %f2
+; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: brasl %r14, bar@PLT
+;
+; CHECK-FP128-1: foo:
+; CHECK-FP128-1: aghi %r15, -256
+; CHECK-FP128-1: lzxr %f0
+; CHECK-FP128-1: std %f0, 224(%r15)
+; CHECK-FP128-1: std %f2, 232(%r15)
+; CHECK-FP128-1: brasl %r14, bar@PLT
+;
+; CHECK-FP128-2: foo:
+; CHECK-FP128-2: aghi %r15, -256
+; CHECK-FP128-2: lzxr %f0
+; CHECK-FP128-2: std %f0, 240(%r15)
+; CHECK-FP128-2: std %f2, 248(%r15)
+; CHECK-FP128-2: brasl %r14, bar@PLT
+;
+; CHECK-STACK: foo:
+; CHECK-STACK: aghi %r15, -256
+; CHECK-STACK: la [[REGISTER:%r[0-5]+]], {{224|240}}(%r15)
+; CHECK-STACK: stg [[REGISTER]], 216(%r15)
+; CHECK-STACK: llilf [[AT184:%r[0-5]+]], 4294967288
+; CHECK-STACK: stg [[AT184]], 184(%r15)
+; CHECK-STACK: llill [[AT176:%r[0-5]+]], 65529
+; CHECK-STACK: stg [[AT176]], 176(%r15)
+; CHECK-STACK: mvghi 208(%r15), 0
+; CHECK-STACK: mvhi 204(%r15), 0
+; CHECK-STACK: mvghi 192(%r15), -9
+; CHECK-STACK: mvghi 168(%r15), 250
+; CHECK-STACK: mvghi 160(%r15), -5
+; CHECK-STACK: brasl %r14, bar@PLT
+
+  call void @bar (i8 -1, i16 -2, i32 -3, i64 -4, float 0.0, double 0.0,
+                  fp128 0xL00000000000000000000000000000000, i64 -5,
+                  float -0.0, double -0.0, i8 -6, i16 -7, i32 -8, i64 -9,
+                  float 0.0, double 0.0,
+                  fp128 0xL00000000000000000000000000000000)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/args-04.ll b/test/CodeGen/SystemZ/args-04.ll
new file mode 100644
index 0000000..8340494
--- /dev/null
+++ b/test/CodeGen/SystemZ/args-04.ll
@@ -0,0 +1,126 @@
+; Test incoming GPR, FPR and stack arguments when no extension type is given.
+; This type of argument is used for passing structures, etc.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Do some arithmetic so that we can see the register being used.
+define i8 @f1(i8 %r2) {
+; CHECK: f1:
+; CHECK: ahi %r2, 1
+; CHECK: br %r14
+  %y = add i8 %r2, 1
+  ret i8 %y
+}
+
+define i16 @f2(i8 %r2, i16 %r3) {
+; CHECK: f2:
+; CHECK: {{lr|lgr}} %r2, %r3
+; CHECK: br %r14
+  ret i16 %r3
+}
+
+define i32 @f3(i8 %r2, i16 %r3, i32 %r4) {
+; CHECK: f3:
+; CHECK: {{lr|lgr}} %r2, %r4
+; CHECK: br %r14
+  ret i32 %r4
+}
+
+define i64 @f4(i8 %r2, i16 %r3, i32 %r4, i64 %r5) {
+; CHECK: f4:
+; CHECK: {{lr|lgr}} %r2, %r5
+; CHECK: br %r14
+  ret i64 %r5
+}
+
+; Do some arithmetic so that we can see the register being used.
+define float @f5(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0) {
+; CHECK: f5:
+; CHECK: aebr %f0, %f0
+; CHECK: br %r14
+  %y = fadd float %f0, %f0
+  ret float %y
+}
+
+define double @f6(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2) {
+; CHECK: f6:
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  ret double %f2
+}
+
+; fp128s are passed indirectly.  Do some arithmetic so that the value
+; must be interpreted as a float, rather than as a block of memory to
+; be copied.
+define void @f7(fp128 *%r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
+                fp128 %r6) {
+; CHECK: f7:
+; CHECK: ld %f0, 0(%r6)
+; CHECK: ld %f2, 8(%r6)
+; CHECK: axbr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %y = fadd fp128 %r6, %r6
+  store fp128 %y, fp128 *%r2
+  ret void
+}
+
+define i64 @f8(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
+               fp128 %r6, i64 %s1) {
+; CHECK: f8:
+; CHECK: lg %r2, 160(%r15)
+; CHECK: br %r14
+  ret i64 %s1
+}
+
+define float @f9(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
+                 fp128 %r6, i64 %s1, float %f4) {
+; CHECK: f9:
+; CHECK: ler %f0, %f4
+; CHECK: br %r14
+  ret float %f4
+}
+
+define double @f10(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
+                   fp128 %r6, i64 %s1, float %f4, double %f6) {
+; CHECK: f10:
+; CHECK: ldr %f0, %f6
+; CHECK: br %r14
+  ret double %f6
+}
+
+define i64 @f11(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
+                fp128 %r6, i64 %s1, float %f4, double %f6, i64 %s2) {
+; CHECK: f11:
+; CHECK: lg %r2, 168(%r15)
+; CHECK: br %r14
+  ret i64 %s2
+}
+
+; Floats are passed right-justified.
+define float @f12(i8 %r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
+                  fp128 %r6, i64 %s1, float %f4, double %f6, i64 %s2,
+                  float %s3) {
+; CHECK: f12:
+; CHECK: le %f0, 180(%r15)
+; CHECK: br %r14
+  ret float %s3
+}
+
+; Test a case where the fp128 address is passed on the stack.
+define void @f13(fp128 *%r2, i16 %r3, i32 %r4, i64 %r5, float %f0, double %f2,
+                 fp128 %r6, i64 %s1, float %f4, double %f6, i64 %s2,
+                 float %s3, fp128 %s4) {
+; CHECK: f13:
+; CHECK: lg [[REGISTER:%r[1-5]+]], 184(%r15)
+; CHECK: ld %f0, 0([[REGISTER]])
+; CHECK: ld %f2, 8([[REGISTER]])
+; CHECK: axbr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %y = fadd fp128 %s4, %s4
+  store fp128 %y, fp128 *%r2
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/args-05.ll b/test/CodeGen/SystemZ/args-05.ll
new file mode 100644
index 0000000..9fa193a
--- /dev/null
+++ b/test/CodeGen/SystemZ/args-05.ll
@@ -0,0 +1,47 @@
+; Test that we take advantage of signext and zeroext annotations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Zero extension of something that is already zero-extended.
+define void @f1(i32 zeroext %r2, i64 *%r3) {
+; CHECK: f1:
+; CHECK-NOT: %r2
+; CHECK: stg %r2, 0(%r3)
+; CHECK: br %r14
+  %conv = zext i32 %r2 to i64
+  store i64 %conv, i64* %r3
+  ret void
+}
+
+; Sign extension of something that is already sign-extended.
+define void @f2(i32 signext %r2, i64 *%r3) {
+; CHECK: f2:
+; CHECK-NOT: %r2
+; CHECK: stg %r2, 0(%r3)
+; CHECK: br %r14
+  %conv = sext i32 %r2 to i64
+  store i64 %conv, i64* %r3
+  ret void
+}
+
+; Sign extension of something that is already zero-extended.
+define void @f3(i32 zeroext %r2, i64 *%r3) {
+; CHECK: f3:
+; CHECK: lgfr [[REGISTER:%r[0-5]+]], %r2
+; CHECK: stg [[REGISTER]], 0(%r3)
+; CHECK: br %r14
+  %conv = sext i32 %r2 to i64
+  store i64 %conv, i64* %r3
+  ret void
+}
+
+; Zero extension of something that is already sign-extended.
+define void @f4(i32 signext %r2, i64 *%r3) {
+; CHECK: f4:
+; CHECK: llgfr [[REGISTER:%r[0-5]+]], %r2
+; CHECK: stg [[REGISTER]], 0(%r3)
+; CHECK: br %r14
+  %conv = zext i32 %r2 to i64
+  store i64 %conv, i64* %r3
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/args-06.ll b/test/CodeGen/SystemZ/args-06.ll
new file mode 100644
index 0000000..b2f8bee
--- /dev/null
+++ b/test/CodeGen/SystemZ/args-06.ll
@@ -0,0 +1,76 @@
+; Test the padding of unextended integer stack parameters.  These are used
+; to pass structures.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define i8 @f1(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g) {
+; CHECK: f1:
+; CHECK: ar %r2, %r3
+; CHECK: ar %r2, %r4
+; CHECK: ar %r2, %r5
+; CHECK: ar %r2, %r6
+; CHECK: lb {{%r[0-5]}}, 167(%r15)
+; CHECK: lb {{%r[0-5]}}, 175(%r15)
+; CHECK: br %r14
+  %addb = add i8 %a, %b
+  %addc = add i8 %addb, %c
+  %addd = add i8 %addc, %d
+  %adde = add i8 %addd, %e
+  %addf = add i8 %adde, %f
+  %addg = add i8 %addf, %g
+  ret i8 %addg
+}
+
+define i16 @f2(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16 %f, i16 %g) {
+; CHECK: f2:
+; CHECK: ar %r2, %r3
+; CHECK: ar %r2, %r4
+; CHECK: ar %r2, %r5
+; CHECK: ar %r2, %r6
+; CHECK: lh {{%r[0-5]}}, 166(%r15)
+; CHECK: lh {{%r[0-5]}}, 174(%r15)
+; CHECK: br %r14
+  %addb = add i16 %a, %b
+  %addc = add i16 %addb, %c
+  %addd = add i16 %addc, %d
+  %adde = add i16 %addd, %e
+  %addf = add i16 %adde, %f
+  %addg = add i16 %addf, %g
+  ret i16 %addg
+}
+
+define i32 @f3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) {
+; CHECK: f3:
+; CHECK: ar %r2, %r3
+; CHECK: ar %r2, %r4
+; CHECK: ar %r2, %r5
+; CHECK: ar %r2, %r6
+; CHECK: a %r2, 164(%r15)
+; CHECK: a %r2, 172(%r15)
+; CHECK: br %r14
+  %addb = add i32 %a, %b
+  %addc = add i32 %addb, %c
+  %addd = add i32 %addc, %d
+  %adde = add i32 %addd, %e
+  %addf = add i32 %adde, %f
+  %addg = add i32 %addf, %g
+  ret i32 %addg
+}
+
+define i64 @f4(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) {
+; CHECK: f4:
+; CHECK: agr %r2, %r3
+; CHECK: agr %r2, %r4
+; CHECK: agr %r2, %r5
+; CHECK: agr %r2, %r6
+; CHECK: ag %r2, 160(%r15)
+; CHECK: ag %r2, 168(%r15)
+; CHECK: br %r14
+  %addb = add i64 %a, %b
+  %addc = add i64 %addb, %c
+  %addd = add i64 %addc, %d
+  %adde = add i64 %addd, %e
+  %addf = add i64 %adde, %f
+  %addg = add i64 %addf, %g
+  ret i64 %addg
+}
diff --git a/test/CodeGen/SystemZ/asm-01.ll b/test/CodeGen/SystemZ/asm-01.ll
new file mode 100644
index 0000000..016d04c
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-01.ll
@@ -0,0 +1,61 @@
+; Test the "Q" asm constraint, which accepts addresses that have a base
+; and a 12-bit displacement.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the lowest range.
+define void @f1(i64 %base) {
+; CHECK: f1:
+; CHECK: blah 0(%r2)
+; CHECK: br %r14
+  %addr = inttoptr i64 %base to i64 *
+  call void asm "blah $0", "=*Q" (i64 *%addr)
+  ret void
+}
+
+; Check the next lowest byte.
+define void @f2(i64 %base) {
+; CHECK: f2:
+; CHECK: aghi %r2, -1
+; CHECK: blah 0(%r2)
+; CHECK: br %r14
+  %add = add i64 %base, -1
+  %addr = inttoptr i64 %add to i64 *
+  call void asm "blah $0", "=*Q" (i64 *%addr)
+  ret void
+}
+
+; Check the highest range.
+define void @f3(i64 %base) {
+; CHECK: f3:
+; CHECK: blah 4095(%r2)
+; CHECK: br %r14
+  %add = add i64 %base, 4095
+  %addr = inttoptr i64 %add to i64 *
+  call void asm "blah $0", "=*Q" (i64 *%addr)
+  ret void
+}
+
+; Check the next highest byte.
+define void @f4(i64 %base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: blah 0(%r2)
+; CHECK: br %r14
+  %add = add i64 %base, 4096
+  %addr = inttoptr i64 %add to i64 *
+  call void asm "blah $0", "=*Q" (i64 *%addr)
+  ret void
+}
+
+; Check that indices aren't allowed
+define void @f5(i64 %base, i64 %index) {
+; CHECK: f5:
+; CHECK: agr %r2, %r3
+; CHECK: blah 0(%r2)
+; CHECK: br %r14
+  %add = add i64 %base, %index
+  %addr = inttoptr i64 %add to i64 *
+  call void asm "blah $0", "=*Q" (i64 *%addr)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/asm-02.ll b/test/CodeGen/SystemZ/asm-02.ll
new file mode 100644
index 0000000..12d8bec
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-02.ll
@@ -0,0 +1,52 @@
+; Test the "R" asm constraint, which accepts addresses that have a base,
+; an index and a 12-bit displacement.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the lowest range.
+define void @f1(i64 %base) {
+; CHECK: f1:
+; CHECK: blah 0(%r2)
+; CHECK: br %r14
+  %addr = inttoptr i64 %base to i64 *
+  call void asm "blah $0", "=*R" (i64 *%addr)
+  ret void
+}
+
+; Check the next lowest byte.
+define void @f2(i64 %base) {
+; CHECK: f2:
+; CHECK: aghi %r2, -1
+; CHECK: blah 0(%r2)
+; CHECK: br %r14
+  %add = add i64 %base, -1
+  %addr = inttoptr i64 %add to i64 *
+  call void asm "blah $0", "=*R" (i64 *%addr)
+  ret void
+}
+
+; Check the highest range.
+define void @f3(i64 %base) {
+; CHECK: f3:
+; CHECK: blah 4095(%r2)
+; CHECK: br %r14
+  %add = add i64 %base, 4095
+  %addr = inttoptr i64 %add to i64 *
+  call void asm "blah $0", "=*R" (i64 *%addr)
+  ret void
+}
+
+; Check the next highest byte.
+define void @f4(i64 %base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: blah 0(%r2)
+; CHECK: br %r14
+  %add = add i64 %base, 4096
+  %addr = inttoptr i64 %add to i64 *
+  call void asm "blah $0", "=*R" (i64 *%addr)
+  ret void
+}
+
+; FIXME: at the moment the precise constraint is not passed down to
+; target code, so we must conservatively treat "R" as "Q".
diff --git a/test/CodeGen/SystemZ/asm-03.ll b/test/CodeGen/SystemZ/asm-03.ll
new file mode 100644
index 0000000..a6f3f2a
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-03.ll
@@ -0,0 +1,16 @@
+; Test the "S" asm constraint, which accepts addresses that have a base
+; and a 20-bit displacement.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define void @f1(i64 %base) {
+; CHECK: f1:
+; CHECK: blah 0(%r2)
+; CHECK: br %r14
+  %addr = inttoptr i64 %base to i64 *
+  call void asm "blah $0", "=*S" (i64 *%addr)
+  ret void
+}
+
+; FIXME: at the moment the precise constraint is not passed down to
+; target code, so we must conservatively treat "S" as "Q".
diff --git a/test/CodeGen/SystemZ/asm-04.ll b/test/CodeGen/SystemZ/asm-04.ll
new file mode 100644
index 0000000..0560949
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-04.ll
@@ -0,0 +1,16 @@
+; Test the "T" asm constraint, which accepts addresses that have a base,
+; an index and a 20-bit displacement.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define void @f1(i64 %base) {
+; CHECK: f1:
+; CHECK: blah 0(%r2)
+; CHECK: br %r14
+  %addr = inttoptr i64 %base to i64 *
+  call void asm "blah $0", "=*T" (i64 *%addr)
+  ret void
+}
+
+; FIXME: at the moment the precise constraint is not passed down to
+; target code, so we must conservatively treat "T" as "Q".
diff --git a/test/CodeGen/SystemZ/asm-05.ll b/test/CodeGen/SystemZ/asm-05.ll
new file mode 100644
index 0000000..dae90b0
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-05.ll
@@ -0,0 +1,15 @@
+; Test the "m" asm constraint, which is equivalent to "T".
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define void @f1(i64 %base) {
+; CHECK: f1:
+; CHECK: blah 0(%r2)
+; CHECK: br %r14
+  %addr = inttoptr i64 %base to i64 *
+  call void asm "blah $0", "=*m" (i64 *%addr)
+  ret void
+}
+
+; FIXME: at the moment the precise constraint is not passed down to
+; target code, so we must conservatively treat "m" as "Q".
diff --git a/test/CodeGen/SystemZ/asm-06.ll b/test/CodeGen/SystemZ/asm-06.ll
new file mode 100644
index 0000000..c0e24a36
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-06.ll
@@ -0,0 +1,39 @@
+; Test the GPR constraint "a", which forbids %r0.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define i64 @f1() {
+; CHECK: f1:
+; CHECK: lhi %r1, 1
+; CHECK: blah %r2 %r1
+; CHECK: br %r14
+  %val = call i64 asm "blah $0 $1", "=r,a" (i8 1)
+  ret i64 %val
+}
+
+define i64 @f2() {
+; CHECK: f2:
+; CHECK: lhi %r1, 2
+; CHECK: blah %r2 %r1
+; CHECK: br %r14
+  %val = call i64 asm "blah $0 $1", "=r,a" (i16 2)
+  ret i64 %val
+}
+
+define i64 @f3() {
+; CHECK: f3:
+; CHECK: lhi %r1, 3
+; CHECK: blah %r2 %r1
+; CHECK: br %r14
+  %val = call i64 asm "blah $0 $1", "=r,a" (i32 3)
+  ret i64 %val
+}
+
+define i64 @f4() {
+; CHECK: f4:
+; CHECK: lghi %r1, 4
+; CHECK: blah %r2 %r1
+; CHECK: br %r14
+  %val = call i64 asm "blah $0 $1", "=r,a" (i64 4)
+  ret i64 %val
+}
diff --git a/test/CodeGen/SystemZ/asm-07.ll b/test/CodeGen/SystemZ/asm-07.ll
new file mode 100644
index 0000000..e07286d
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-07.ll
@@ -0,0 +1,39 @@
+; Test the GPR constraint "r".
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define i64 @f1() {
+; CHECK: f1:
+; CHECK: lhi %r0, 1
+; CHECK: blah %r2 %r0
+; CHECK: br %r14
+  %val = call i64 asm "blah $0 $1", "=r,r" (i8 1)
+  ret i64 %val
+}
+
+define i64 @f2() {
+; CHECK: f2:
+; CHECK: lhi %r0, 2
+; CHECK: blah %r2 %r0
+; CHECK: br %r14
+  %val = call i64 asm "blah $0 $1", "=r,r" (i16 2)
+  ret i64 %val
+}
+
+define i64 @f3() {
+; CHECK: f3:
+; CHECK: lhi %r0, 3
+; CHECK: blah %r2 %r0
+; CHECK: br %r14
+  %val = call i64 asm "blah $0 $1", "=r,r" (i32 3)
+  ret i64 %val
+}
+
+define i64 @f4() {
+; CHECK: f4:
+; CHECK: lghi %r0, 4
+; CHECK: blah %r2 %r0
+; CHECK: br %r14
+  %val = call i64 asm "blah $0 $1", "=r,r" (i64 4)
+  ret i64 %val
+}
diff --git a/test/CodeGen/SystemZ/asm-08.ll b/test/CodeGen/SystemZ/asm-08.ll
new file mode 100644
index 0000000..15abc4d
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-08.ll
@@ -0,0 +1,39 @@
+; Test the GPR constraint "d", which is equivalent to "r".
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define i64 @f1() {
+; CHECK: f1:
+; CHECK: lhi %r0, 1
+; CHECK: blah %r2 %r0
+; CHECK: br %r14
+  %val = call i64 asm "blah $0 $1", "=d,d" (i8 1)
+  ret i64 %val
+}
+
+define i64 @f2() {
+; CHECK: f2:
+; CHECK: lhi %r0, 2
+; CHECK: blah %r2 %r0
+; CHECK: br %r14
+  %val = call i64 asm "blah $0 $1", "=d,d" (i16 2)
+  ret i64 %val
+}
+
+define i64 @f3() {
+; CHECK: f3:
+; CHECK: lhi %r0, 3
+; CHECK: blah %r2 %r0
+; CHECK: br %r14
+  %val = call i64 asm "blah $0 $1", "=d,d" (i32 3)
+  ret i64 %val
+}
+
+define i64 @f4() {
+; CHECK: f4:
+; CHECK: lghi %r0, 4
+; CHECK: blah %r2 %r0
+; CHECK: br %r14
+  %val = call i64 asm "blah $0 $1", "=d,d" (i64 4)
+  ret i64 %val
+}
diff --git a/test/CodeGen/SystemZ/asm-09.ll b/test/CodeGen/SystemZ/asm-09.ll
new file mode 100644
index 0000000..1541170
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-09.ll
@@ -0,0 +1,83 @@
+; Test matching operands with the GPR constraint "r".
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define void @f1(i32 *%dst) {
+; CHECK: f1:
+; CHECK: lhi %r0, 100
+; CHECK: blah %r0
+; CHECK: st %r0, 0(%r2)
+; CHECK: br %r14
+  %val = call i32 asm "blah $0", "=r,0" (i8 100)
+  store i32 %val, i32 *%dst
+  ret void
+}
+
+define void @f2(i32 *%dst) {
+; CHECK: f2:
+; CHECK: lhi %r0, 101
+; CHECK: blah %r0
+; CHECK: st %r0, 0(%r2)
+; CHECK: br %r14
+  %val = call i32 asm "blah $0", "=r,0" (i16 101)
+  store i32 %val, i32 *%dst
+  ret void
+}
+
+define void @f3(i32 *%dst) {
+; CHECK: f3:
+; CHECK: lhi %r0, 102
+; CHECK: blah %r0
+; CHECK: st %r0, 0(%r2)
+; CHECK: br %r14
+  %val = call i32 asm "blah $0", "=r,0" (i32 102)
+  store i32 %val, i32 *%dst
+  ret void
+}
+
+; FIXME: this uses "lhi %r0, 103", but should use "lghi %r0, 103".
+define void @f4(i32 *%dst) {
+; CHECK: f4:
+; CHECK: blah %r0
+; CHECK: st %r0, 0(%r2)
+; CHECK: br %r14
+  %val = call i32 asm "blah $0", "=r,0" (i64 103)
+  store i32 %val, i32 *%dst
+  ret void
+}
+
+define i64 @f5() {
+; CHECK: f5:
+; CHECK: lghi %r2, 104
+; CHECK: blah %r2
+; CHECK: br %r14
+  %val = call i64 asm "blah $0", "=r,0" (i8 104)
+  ret i64 %val
+}
+
+define i64 @f6() {
+; CHECK: f6:
+; CHECK: lghi %r2, 105
+; CHECK: blah %r2
+; CHECK: br %r14
+  %val = call i64 asm "blah $0", "=r,0" (i16 105)
+  ret i64 %val
+}
+
+define i64 @f7() {
+; CHECK: f7:
+; CHECK: lghi %r2, 106
+; CHECK: blah %r2
+; CHECK: br %r14
+  %val = call i64 asm "blah $0", "=r,0" (i32 106)
+  ret i64 %val
+}
+
+define i64 @f8() {
+; CHECK: f8:
+; CHECK: lghi %r2, 107
+; CHECK: blah %r2
+; CHECK: br %r14
+  %val = call i64 asm "blah $0", "=r,0" (i64 107)
+  ret i64 %val
+}
diff --git a/test/CodeGen/SystemZ/asm-10.ll b/test/CodeGen/SystemZ/asm-10.ll
new file mode 100644
index 0000000..676c2028
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-10.ll
@@ -0,0 +1,30 @@
+; Test the FPR constraint "f".
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define float @f1() {
+; CHECK: f1:
+; CHECK: lzer %f1
+; CHECK: blah %f0 %f1
+; CHECK: br %r14
+  %val = call float asm "blah $0 $1", "=&f,f" (float 0.0)
+  ret float %val
+}
+
+define double @f2() {
+; CHECK: f2:
+; CHECK: lzdr %f1
+; CHECK: blah %f0 %f1
+; CHECK: br %r14
+  %val = call double asm "blah $0 $1", "=&f,f" (double 0.0)
+  ret double %val
+}
+
+define double @f3() {
+; CHECK: f3:
+; CHECK: lzxr %f1
+; CHECK: blah %f0 %f1
+; CHECK: br %r14
+  %val = call double asm "blah $0 $1", "=&f,f" (fp128 0xL00000000000000000000000000000000)
+  ret double %val
+}
diff --git a/test/CodeGen/SystemZ/asm-11.ll b/test/CodeGen/SystemZ/asm-11.ll
new file mode 100644
index 0000000..9bd8d7c
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-11.ll
@@ -0,0 +1,41 @@
+; Test the "I" constraint (8-bit unsigned constants).
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test 1 below the first valid value.
+define i32 @f1() {
+; CHECK: f1:
+; CHECK: lhi [[REG:%r[0-5]]], -1
+; CHECK: blah %r2 [[REG]]
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rI" (i32 -1)
+  ret i32 %val
+}
+
+; Test the first valid value.
+define i32 @f2() {
+; CHECK: f2:
+; CHECK: blah %r2 0
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rI" (i32 0)
+  ret i32 %val
+}
+
+; Test the last valid value.
+define i32 @f3() {
+; CHECK: f3:
+; CHECK: blah %r2 255
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rI" (i32 255)
+  ret i32 %val
+}
+
+; Test 1 above the last valid value.
+define i32 @f4() {
+; CHECK: f4:
+; CHECK: lhi [[REG:%r[0-5]]], 256
+; CHECK: blah %r2 [[REG]]
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rI" (i32 256)
+  ret i32 %val
+}
diff --git a/test/CodeGen/SystemZ/asm-12.ll b/test/CodeGen/SystemZ/asm-12.ll
new file mode 100644
index 0000000..dd920f1
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-12.ll
@@ -0,0 +1,41 @@
+; Test the "J" constraint (12-bit unsigned constants).
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test 1 below the first valid value.
+define i32 @f1() {
+; CHECK: f1:
+; CHECK: lhi [[REG:%r[0-5]]], -1
+; CHECK: blah %r2 [[REG]]
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rJ" (i32 -1)
+  ret i32 %val
+}
+
+; Test the first valid value.
+define i32 @f2() {
+; CHECK: f2:
+; CHECK: blah %r2 0
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rJ" (i32 0)
+  ret i32 %val
+}
+
+; Test the last valid value.
+define i32 @f3() {
+; CHECK: f3:
+; CHECK: blah %r2 4095
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rJ" (i32 4095)
+  ret i32 %val
+}
+
+; Test 1 above the last valid value.
+define i32 @f4() {
+; CHECK: f4:
+; CHECK: lhi [[REG:%r[0-5]]], 4096
+; CHECK: blah %r2 [[REG]]
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rJ" (i32 4096)
+  ret i32 %val
+}
diff --git a/test/CodeGen/SystemZ/asm-13.ll b/test/CodeGen/SystemZ/asm-13.ll
new file mode 100644
index 0000000..af3fdb3
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-13.ll
@@ -0,0 +1,41 @@
+; Test the "K" constraint (16-bit signed constants).
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test 1 below the first valid value.
+define i32 @f1() {
+; CHECK: f1:
+; CHECK: iilf [[REG:%r[0-5]]], 4294934527
+; CHECK: blah %r2 [[REG]]
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rK" (i32 -32769)
+  ret i32 %val
+}
+
+; Test the first valid value.
+define i32 @f2() {
+; CHECK: f2:
+; CHECK: blah %r2 -32768
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rK" (i32 -32768)
+  ret i32 %val
+}
+
+; Test the last valid value.
+define i32 @f3() {
+; CHECK: f3:
+; CHECK: blah %r2 32767
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rK" (i32 32767)
+  ret i32 %val
+}
+
+; Test 1 above the last valid value.
+define i32 @f4() {
+; CHECK: f4:
+; CHECK: llill [[REG:%r[0-5]]], 32768
+; CHECK: blah %r2 [[REG]]
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rK" (i32 32768)
+  ret i32 %val
+}
diff --git a/test/CodeGen/SystemZ/asm-14.ll b/test/CodeGen/SystemZ/asm-14.ll
new file mode 100644
index 0000000..b6b28d6
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-14.ll
@@ -0,0 +1,41 @@
+; Test the "L" constraint (20-bit signed constants).
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test 1 below the first valid value.
+define i32 @f1() {
+; CHECK: f1:
+; CHECK: iilf [[REG:%r[0-5]]], 4294443007
+; CHECK: blah %r2 [[REG]]
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rL" (i32 -524289)
+  ret i32 %val
+}
+
+; Test the first valid value.
+define i32 @f2() {
+; CHECK: f2:
+; CHECK: blah %r2 -524288
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rL" (i32 -524288)
+  ret i32 %val
+}
+
+; Test the last valid value.
+define i32 @f3() {
+; CHECK: f3:
+; CHECK: blah %r2 524287
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rL" (i32 524287)
+  ret i32 %val
+}
+
+; Test 1 above the last valid value.
+define i32 @f4() {
+; CHECK: f4:
+; CHECK: llilh [[REG:%r[0-5]]], 8
+; CHECK: blah %r2 [[REG]]
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rL" (i32 524288)
+  ret i32 %val
+}
diff --git a/test/CodeGen/SystemZ/asm-15.ll b/test/CodeGen/SystemZ/asm-15.ll
new file mode 100644
index 0000000..4d0e2b4
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-15.ll
@@ -0,0 +1,32 @@
+; Test the "M" constraint (0x7fffffff)
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test 1 below the valid value.
+define i32 @f1() {
+; CHECK: f1:
+; CHECK: iilf [[REG:%r[0-5]]], 2147483646
+; CHECK: blah %r2 [[REG]]
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483646)
+  ret i32 %val
+}
+
+; Test the first valid value.
+define i32 @f2() {
+; CHECK: f2:
+; CHECK: blah %r2 2147483647
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483647)
+  ret i32 %val
+}
+
+; Test 1 above the valid value.
+define i32 @f3() {
+; CHECK: f3:
+; CHECK: llilh [[REG:%r[0-5]]], 32768
+; CHECK: blah %r2 [[REG]]
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483648)
+  ret i32 %val
+}
diff --git a/test/CodeGen/SystemZ/asm-16.ll b/test/CodeGen/SystemZ/asm-16.ll
new file mode 100644
index 0000000..4d0e2b4
--- /dev/null
+++ b/test/CodeGen/SystemZ/asm-16.ll
@@ -0,0 +1,32 @@
+; Test the "M" constraint (0x7fffffff)
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test 1 below the valid value.
+define i32 @f1() {
+; CHECK: f1:
+; CHECK: iilf [[REG:%r[0-5]]], 2147483646
+; CHECK: blah %r2 [[REG]]
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483646)
+  ret i32 %val
+}
+
+; Test the first valid value.
+define i32 @f2() {
+; CHECK: f2:
+; CHECK: blah %r2 2147483647
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483647)
+  ret i32 %val
+}
+
+; Test 1 above the valid value.
+define i32 @f3() {
+; CHECK: f3:
+; CHECK: llilh [[REG:%r[0-5]]], 32768
+; CHECK: blah %r2 [[REG]]
+; CHECK: br %r14
+  %val = call i32 asm "blah $0 $1", "=&r,rM" (i32 2147483648)
+  ret i32 %val
+}
diff --git a/test/CodeGen/SystemZ/atomic-load-01.ll b/test/CodeGen/SystemZ/atomic-load-01.ll
new file mode 100644
index 0000000..3e86bcf
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomic-load-01.ll
@@ -0,0 +1,13 @@
+; Test 8-bit atomic loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This is just a placeholder to make sure that loads are handled.
+; The CS-based sequence is probably far too conservative.
+define i8 @f1(i8 *%src) {
+; CHECK: f1:
+; CHECK: cs
+; CHECK: br %r14
+  %val = load atomic i8 *%src seq_cst, align 1
+  ret i8 %val
+}
diff --git a/test/CodeGen/SystemZ/atomic-load-02.ll b/test/CodeGen/SystemZ/atomic-load-02.ll
new file mode 100644
index 0000000..d6168ce
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomic-load-02.ll
@@ -0,0 +1,13 @@
+; Test 16-bit atomic loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This is just a placeholder to make sure that loads are handled.
+; The CS-based sequence is probably far too conservative.
+define i16 @f1(i16 *%src) {
+; CHECK: f1:
+; CHECK: cs
+; CHECK: br %r14
+  %val = load atomic i16 *%src seq_cst, align 2
+  ret i16 %val
+}
diff --git a/test/CodeGen/SystemZ/atomic-load-03.ll b/test/CodeGen/SystemZ/atomic-load-03.ll
new file mode 100644
index 0000000..fcf0cf3
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomic-load-03.ll
@@ -0,0 +1,14 @@
+; Test 32-bit atomic loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This is just a placeholder to make sure that loads are handled.
+; Using CS is probably too conservative.
+define i32 @f1(i32 %dummy, i32 *%src) {
+; CHECK: f1:
+; CHECK: lhi %r2, 0
+; CHECK: cs %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load atomic i32 *%src seq_cst, align 4
+  ret i32 %val
+}
diff --git a/test/CodeGen/SystemZ/atomic-load-04.ll b/test/CodeGen/SystemZ/atomic-load-04.ll
new file mode 100644
index 0000000..9593d35
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomic-load-04.ll
@@ -0,0 +1,14 @@
+; Test 64-bit atomic loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This is just a placeholder to make sure that loads are handled.
+; Using CSG is probably too conservative.
+define i64 @f1(i64 %dummy, i64 *%src) {
+; CHECK: f1:
+; CHECK: lghi %r2, 0
+; CHECK: csg %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load atomic i64 *%src seq_cst, align 8
+  ret i64 %val
+}
diff --git a/test/CodeGen/SystemZ/atomic-store-01.ll b/test/CodeGen/SystemZ/atomic-store-01.ll
new file mode 100644
index 0000000..b316e5c
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomic-store-01.ll
@@ -0,0 +1,13 @@
+; Test 8-bit atomic stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This is just a placeholder to make sure that stores are handled.
+; The CS-based sequence is probably far too conservative.
+define void @f1(i8 %val, i8 *%src) {
+; CHECK: f1:
+; CHECK: cs
+; CHECK: br %r14
+  store atomic i8 %val, i8 *%src seq_cst, align 1
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/atomic-store-02.ll b/test/CodeGen/SystemZ/atomic-store-02.ll
new file mode 100644
index 0000000..c761714
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomic-store-02.ll
@@ -0,0 +1,13 @@
+; Test 16-bit atomic stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This is just a placeholder to make sure that stores are handled.
+; The CS-based sequence is probably far too conservative.
+define void @f1(i16 %val, i16 *%src) {
+; CHECK: f1:
+; CHECK: cs
+; CHECK: br %r14
+  store atomic i16 %val, i16 *%src seq_cst, align 2
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/atomic-store-03.ll b/test/CodeGen/SystemZ/atomic-store-03.ll
new file mode 100644
index 0000000..6e29963
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomic-store-03.ll
@@ -0,0 +1,16 @@
+; Test 32-bit atomic stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This is just a placeholder to make sure that stores are handled.
+; Using CS is probably too conservative.
+define void @f1(i32 %val, i32 *%src) {
+; CHECK: f1:
+; CHECK: l %r0, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: cs %r0, %r2, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  store atomic i32 %val, i32 *%src seq_cst, align 4
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/atomic-store-04.ll b/test/CodeGen/SystemZ/atomic-store-04.ll
new file mode 100644
index 0000000..7a611c8
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomic-store-04.ll
@@ -0,0 +1,16 @@
+; Test 64-bit atomic stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This is just a placeholder to make sure that stores are handled.
+; Using CS is probably too conservative.
+define void @f1(i64 %val, i64 *%src) {
+; CHECK: f1:
+; CHECK: lg %r0, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: csg %r0, %r2, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  store atomic i64 %val, i64 *%src seq_cst, align 8
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-add-01.ll b/test/CodeGen/SystemZ/atomicrmw-add-01.ll
new file mode 100644
index 0000000..2a84857
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-add-01.ll
@@ -0,0 +1,132 @@
+; Test 8-bit atomic additions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check addition of a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used.  This shift is independent of the other loop prologue
+;   instructions.
+define i8 @f1(i8 *%src, i8 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: ar [[ROT]], %r3
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 24
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: ar {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw add i8 *%src, i8 %b seq_cst
+  ret i8 %res
+}
+
+; Check the minimum signed value.  We add 0x80000000 to the rotated word.
+define i8 @f2(i8 *%src) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: afi [[ROT]], -2147483648
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw add i8 *%src, i8 -128 seq_cst
+  ret i8 %res
+}
+
+; Check addition of -1.  We add 0xff000000 to the rotated word.
+define i8 @f3(i8 *%src) {
+; CHECK: f3:
+; CHECK: afi [[ROT]], -16777216
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw add i8 *%src, i8 -1 seq_cst
+  ret i8 %res
+}
+
+; Check addition of 1.  We add 0x01000000 to the rotated word.
+define i8 @f4(i8 *%src) {
+; CHECK: f4:
+; CHECK: afi [[ROT]], 16777216
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw add i8 *%src, i8 1 seq_cst
+  ret i8 %res
+}
+
+; Check the maximum signed value.  We add 0x7f000000 to the rotated word.
+define i8 @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: afi [[ROT]], 2130706432
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw add i8 *%src, i8 127 seq_cst
+  ret i8 %res
+}
+
+; Check addition of a large unsigned value.  We add 0xfe000000 to the
+; rotated word, expressed as a negative AFI operand.
+define i8 @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: afi [[ROT]], -33554432
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw add i8 *%src, i8 254 seq_cst
+  ret i8 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-add-02.ll b/test/CodeGen/SystemZ/atomicrmw-add-02.ll
new file mode 100644
index 0000000..3dd482d
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-add-02.ll
@@ -0,0 +1,132 @@
+; Test 16-bit atomic additions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check addition of a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used.  This shift is independent of the other loop prologue
+;   instructions.
+define i16 @f1(i16 *%src, i16 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: ar [[ROT]], %r3
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 16
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: ar {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw add i16 *%src, i16 %b seq_cst
+  ret i16 %res
+}
+
+; Check the minimum signed value.  We add 0x80000000 to the rotated word.
+define i16 @f2(i16 *%src) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: afi [[ROT]], -2147483648
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw add i16 *%src, i16 -32768 seq_cst
+  ret i16 %res
+}
+
+; Check addition of -1.  We add 0xffff0000 to the rotated word.
+define i16 @f3(i16 *%src) {
+; CHECK: f3:
+; CHECK: afi [[ROT]], -65536
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw add i16 *%src, i16 -1 seq_cst
+  ret i16 %res
+}
+
+; Check addition of 1.  We add 0x00010000 to the rotated word.
+define i16 @f4(i16 *%src) {
+; CHECK: f4:
+; CHECK: afi [[ROT]], 65536
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw add i16 *%src, i16 1 seq_cst
+  ret i16 %res
+}
+
+; Check the maximum signed value.  We add 0x7fff0000 to the rotated word.
+define i16 @f5(i16 *%src) {
+; CHECK: f5:
+; CHECK: afi [[ROT]], 2147418112
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw add i16 *%src, i16 32767 seq_cst
+  ret i16 %res
+}
+
+; Check addition of a large unsigned value.  We add 0xfffe0000 to the
+; rotated word, expressed as a negative AFI operand.
+define i16 @f6(i16 *%src) {
+; CHECK: f6:
+; CHECK: afi [[ROT]], -131072
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw add i16 *%src, i16 65534 seq_cst
+  ret i16 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-add-03.ll b/test/CodeGen/SystemZ/atomicrmw-add-03.ll
new file mode 100644
index 0000000..01eb8e0
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-add-03.ll
@@ -0,0 +1,94 @@
+; Test 32-bit atomic additions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check addition of a variable.
+define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f1:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lr %r0, %r2
+; CHECK: ar %r0, %r4
+; CHECK: cs %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw add i32 *%src, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check addition of 1, which can use AHI.
+define i32 @f2(i32 %dummy, i32 *%src) {
+; CHECK: f2:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lr %r0, %r2
+; CHECK: ahi %r0, 1
+; CHECK: cs %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw add i32 *%src, i32 1 seq_cst
+  ret i32 %res
+}
+
+; Check the high end of the AHI range.
+define i32 @f3(i32 %dummy, i32 *%src) {
+; CHECK: f3:
+; CHECK: ahi %r0, 32767
+; CHECK: br %r14
+  %res = atomicrmw add i32 *%src, i32 32767 seq_cst
+  ret i32 %res
+}
+
+; Check the next value up, which must use AFI.
+define i32 @f4(i32 %dummy, i32 *%src) {
+; CHECK: f4:
+; CHECK: afi %r0, 32768
+; CHECK: br %r14
+  %res = atomicrmw add i32 *%src, i32 32768 seq_cst
+  ret i32 %res
+}
+
+; Check the high end of the AFI range.
+define i32 @f5(i32 %dummy, i32 *%src) {
+; CHECK: f5:
+; CHECK: afi %r0, 2147483647
+; CHECK: br %r14
+  %res = atomicrmw add i32 *%src, i32 2147483647 seq_cst
+  ret i32 %res
+}
+
+; Check the next value up, which gets treated as a negative operand.
+define i32 @f6(i32 %dummy, i32 *%src) {
+; CHECK: f6:
+; CHECK: afi %r0, -2147483648
+; CHECK: br %r14
+  %res = atomicrmw add i32 *%src, i32 2147483648 seq_cst
+  ret i32 %res
+}
+
+; Check addition of -1, which can use AHI.
+define i32 @f7(i32 %dummy, i32 *%src) {
+; CHECK: f7:
+; CHECK: ahi %r0, -1
+; CHECK: br %r14
+  %res = atomicrmw add i32 *%src, i32 -1 seq_cst
+  ret i32 %res
+}
+
+; Check the low end of the AHI range.
+define i32 @f8(i32 %dummy, i32 *%src) {
+; CHECK: f8:
+; CHECK: ahi %r0, -32768
+; CHECK: br %r14
+  %res = atomicrmw add i32 *%src, i32 -32768 seq_cst
+  ret i32 %res
+}
+
+; Check the next value down, which must use AFI instead.
+define i32 @f9(i32 %dummy, i32 *%src) {
+; CHECK: f9:
+; CHECK: afi %r0, -32769
+; CHECK: br %r14
+  %res = atomicrmw add i32 *%src, i32 -32769 seq_cst
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-add-04.ll b/test/CodeGen/SystemZ/atomicrmw-add-04.ll
new file mode 100644
index 0000000..6b1d20b
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-add-04.ll
@@ -0,0 +1,112 @@
+; Test 64-bit atomic additions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check addition of a variable.
+define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f1:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: agr %r0, %r4
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw add i64 *%src, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check addition of 1, which can use AGHI.
+define i64 @f2(i64 %dummy, i64 *%src) {
+; CHECK: f2:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: aghi %r0, 1
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw add i64 *%src, i64 1 seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the AGHI range.
+define i64 @f3(i64 %dummy, i64 *%src) {
+; CHECK: f3:
+; CHECK: aghi %r0, 32767
+; CHECK: br %r14
+  %res = atomicrmw add i64 *%src, i64 32767 seq_cst
+  ret i64 %res
+}
+
+; Check the next value up, which must use AGFI.
+define i64 @f4(i64 %dummy, i64 *%src) {
+; CHECK: f4:
+; CHECK: agfi %r0, 32768
+; CHECK: br %r14
+  %res = atomicrmw add i64 *%src, i64 32768 seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the AGFI range.
+define i64 @f5(i64 %dummy, i64 *%src) {
+; CHECK: f5:
+; CHECK: agfi %r0, 2147483647
+; CHECK: br %r14
+  %res = atomicrmw add i64 *%src, i64 2147483647 seq_cst
+  ret i64 %res
+}
+
+; Check the next value up, which must use a register addition.
+define i64 @f6(i64 %dummy, i64 *%src) {
+; CHECK: f6:
+; CHECK: agr
+; CHECK: br %r14
+  %res = atomicrmw add i64 *%src, i64 2147483648 seq_cst
+  ret i64 %res
+}
+
+; Check addition of -1, which can use AGHI.
+define i64 @f7(i64 %dummy, i64 *%src) {
+; CHECK: f7:
+; CHECK: aghi %r0, -1
+; CHECK: br %r14
+  %res = atomicrmw add i64 *%src, i64 -1 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the AGHI range.
+define i64 @f8(i64 %dummy, i64 *%src) {
+; CHECK: f8:
+; CHECK: aghi %r0, -32768
+; CHECK: br %r14
+  %res = atomicrmw add i64 *%src, i64 -32768 seq_cst
+  ret i64 %res
+}
+
+; Check the next value down, which must use AGFI instead.
+define i64 @f9(i64 %dummy, i64 *%src) {
+; CHECK: f9:
+; CHECK: agfi %r0, -32769
+; CHECK: br %r14
+  %res = atomicrmw add i64 *%src, i64 -32769 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the AGFI range.
+define i64 @f10(i64 %dummy, i64 *%src) {
+; CHECK: f10:
+; CHECK: agfi %r0, -2147483648
+; CHECK: br %r14
+  %res = atomicrmw add i64 *%src, i64 -2147483648 seq_cst
+  ret i64 %res
+}
+
+; Check the next value down, which must use a register addition.
+define i64 @f11(i64 %dummy, i64 *%src) {
+; CHECK: f11:
+; CHECK: agr
+; CHECK: br %r14
+  %res = atomicrmw add i64 *%src, i64 -2147483649 seq_cst
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-and-01.ll b/test/CodeGen/SystemZ/atomicrmw-and-01.ll
new file mode 100644
index 0000000..ebbce8e
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-and-01.ll
@@ -0,0 +1,133 @@
+; Test 8-bit atomic ANDs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check AND of a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used, and that the low bits are set to 1.  This sequence is
+;   independent of the other loop prologue instructions.
+define i8 @f1(i8 *%src, i8 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: nr [[ROT]], %r3
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 24
+; CHECK-SHIFT2: oilf %r3, 16777215
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: nr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw and i8 *%src, i8 %b seq_cst
+  ret i8 %res
+}
+
+; Check the minimum signed value.  We AND the rotated word with 0x80ffffff.
+define i8 @f2(i8 *%src) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: nilh [[ROT]], 33023
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw and i8 *%src, i8 -128 seq_cst
+  ret i8 %res
+}
+
+; Check ANDs of -2 (-1 isn't useful).  We AND the rotated word with 0xfeffffff.
+define i8 @f3(i8 *%src) {
+; CHECK: f3:
+; CHECK: nilh [[ROT]], 65279
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw and i8 *%src, i8 -2 seq_cst
+  ret i8 %res
+}
+
+; Check ANDs of 1.  We AND the rotated word with 0x01ffffff.
+define i8 @f4(i8 *%src) {
+; CHECK: f4:
+; CHECK: nilh [[ROT]], 511
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw and i8 *%src, i8 1 seq_cst
+  ret i8 %res
+}
+
+; Check the maximum signed value.  We AND the rotated word with 0x7fffffff.
+define i8 @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: nilh [[ROT]], 32767
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw and i8 *%src, i8 127 seq_cst
+  ret i8 %res
+}
+
+; Check ANDs of a large unsigned value.  We AND the rotated word with
+; 0xfdffffff.
+define i8 @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: nilh [[ROT]], 65023
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw and i8 *%src, i8 253 seq_cst
+  ret i8 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-and-02.ll b/test/CodeGen/SystemZ/atomicrmw-and-02.ll
new file mode 100644
index 0000000..b63ca4a
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-and-02.ll
@@ -0,0 +1,133 @@
+; Test 16-bit atomic ANDs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check AND of a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used, and that the low bits are set to 1.  This sequence is
+;   independent of the other loop prologue instructions.
+define i16 @f1(i16 *%src, i16 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: nr [[ROT]], %r3
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 16
+; CHECK-SHIFT2: oill %r3, 65535
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: nr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw and i16 *%src, i16 %b seq_cst
+  ret i16 %res
+}
+
+; Check the minimum signed value.  We AND the rotated word with 0x8000ffff.
+define i16 @f2(i16 *%src) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: nilh [[ROT]], 32768
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw and i16 *%src, i16 -32768 seq_cst
+  ret i16 %res
+}
+
+; Check ANDs of -2 (-1 isn't useful).  We AND the rotated word with 0xfffeffff.
+define i16 @f3(i16 *%src) {
+; CHECK: f3:
+; CHECK: nilh [[ROT]], 65534
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw and i16 *%src, i16 -2 seq_cst
+  ret i16 %res
+}
+
+; Check ANDs of 1.  We AND the rotated word with 0x0001ffff.
+define i16 @f4(i16 *%src) {
+; CHECK: f4:
+; CHECK: nilh [[ROT]], 1
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw and i16 *%src, i16 1 seq_cst
+  ret i16 %res
+}
+
+; Check the maximum signed value.  We AND the rotated word with 0x7fffffff.
+define i16 @f5(i16 *%src) {
+; CHECK: f5:
+; CHECK: nilh [[ROT]], 32767
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw and i16 *%src, i16 32767 seq_cst
+  ret i16 %res
+}
+
+; Check ANDs of a large unsigned value.  We AND the rotated word with
+; 0xfffdffff.
+define i16 @f6(i16 *%src) {
+; CHECK: f6:
+; CHECK: nilh [[ROT]], 65533
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw and i16 *%src, i16 65533 seq_cst
+  ret i16 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-and-03.ll b/test/CodeGen/SystemZ/atomicrmw-and-03.ll
new file mode 100644
index 0000000..ec69edc
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-and-03.ll
@@ -0,0 +1,85 @@
+; Test 32-bit atomic ANDs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check ANDs of a variable.
+define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f1:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^ ]*]]:
+; CHECK: lr %r0, %r2
+; CHECK: nr %r0, %r4
+; CHECK: cs %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw and i32 *%src, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check ANDs of 1.
+define i32 @f2(i32 %dummy, i32 *%src) {
+; CHECK: f2:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^ ]*]]:
+; CHECK: lr %r0, %r2
+; CHECK: nilf %r0, 1
+; CHECK: cs %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw and i32 *%src, i32 1 seq_cst
+  ret i32 %res
+}
+
+; Check ANDs of the low end of the NILH range.
+define i32 @f3(i32 %dummy, i32 *%src) {
+; CHECK: f3:
+; CHECK: nilh %r0, 0
+; CHECK: br %r14
+  %res = atomicrmw and i32 *%src, i32 65535 seq_cst
+  ret i32 %res
+}
+
+; Check the next value up, which must use NILF.
+define i32 @f4(i32 %dummy, i32 *%src) {
+; CHECK: f4:
+; CHECK: nilf %r0, 65536
+; CHECK: br %r14
+  %res = atomicrmw and i32 *%src, i32 65536 seq_cst
+  ret i32 %res
+}
+
+; Check the largest useful NILL value.
+define i32 @f5(i32 %dummy, i32 *%src) {
+; CHECK: f5:
+; CHECK: nill %r0, 65534
+; CHECK: br %r14
+  %res = atomicrmw and i32 *%src, i32 -2 seq_cst
+  ret i32 %res
+}
+
+; Check the low end of the NILL range.
+define i32 @f6(i32 %dummy, i32 *%src) {
+; CHECK: f6:
+; CHECK: nill %r0, 0
+; CHECK: br %r14
+  %res = atomicrmw and i32 *%src, i32 -65536 seq_cst
+  ret i32 %res
+}
+
+; Check the largest useful NILH value, which is one less than the above.
+define i32 @f7(i32 %dummy, i32 *%src) {
+; CHECK: f7:
+; CHECK: nilh %r0, 65534
+; CHECK: br %r14
+  %res = atomicrmw and i32 *%src, i32 -65537 seq_cst
+  ret i32 %res
+}
+
+; Check the highest useful NILF value, which is one less than the above.
+define i32 @f8(i32 %dummy, i32 *%src) {
+; CHECK: f8:
+; CHECK: nilf %r0, 4294901758
+; CHECK: br %r14
+  %res = atomicrmw and i32 *%src, i32 -65538 seq_cst
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-and-04.ll b/test/CodeGen/SystemZ/atomicrmw-and-04.ll
new file mode 100644
index 0000000..71f29ba
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-and-04.ll
@@ -0,0 +1,157 @@
+; Test 64-bit atomic ANDs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check ANDs of a variable.
+define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f1:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: ngr %r0, %r4
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check ANDs of 1, which must be done using a register.
+define i64 @f2(i64 %dummy, i64 *%src) {
+; CHECK: f2:
+; CHECK: ngr
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 1 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the NIHF range.
+define i64 @f3(i64 %dummy, i64 *%src) {
+; CHECK: f3:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: nihf %r0, 0
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 4294967295 seq_cst
+  ret i64 %res
+}
+
+; Check the next value up, which must use a register.
+define i64 @f4(i64 %dummy, i64 *%src) {
+; CHECK: f4:
+; CHECK: ngr
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 4294967296 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the NIHH range.
+define i64 @f5(i64 %dummy, i64 *%src) {
+; CHECK: f5:
+; CHECK: nihh %r0, 0
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 281474976710655 seq_cst
+  ret i64 %res
+}
+
+; Check the next value up, which must use a register.
+define i64 @f6(i64 %dummy, i64 *%src) {
+; CHECK: f6:
+; CHECK: ngr
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 281474976710656 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NILL value.
+define i64 @f7(i64 %dummy, i64 *%src) {
+; CHECK: f7:
+; CHECK: nill %r0, 65534
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 -2 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the NILL range.
+define i64 @f8(i64 %dummy, i64 *%src) {
+; CHECK: f8:
+; CHECK: nill %r0, 0
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 -65536 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NILH value, which is one less than the above.
+define i64 @f9(i64 %dummy, i64 *%src) {
+; CHECK: f9:
+; CHECK: nilh %r0, 65534
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 -65537 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NILF value, which is one less than the above.
+define i64 @f10(i64 %dummy, i64 *%src) {
+; CHECK: f10:
+; CHECK: nilf %r0, 4294901758
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 -65538 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the NILH range.
+define i64 @f11(i64 %dummy, i64 *%src) {
+; CHECK: f11:
+; CHECK: nilh %r0, 0
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 -4294901761 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the NILF range.
+define i64 @f12(i64 %dummy, i64 *%src) {
+; CHECK: f12:
+; CHECK: nilf %r0, 0
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 -4294967296 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NIHL value, which is one less than the above.
+define i64 @f13(i64 %dummy, i64 *%src) {
+; CHECK: f13:
+; CHECK: nihl %r0, 65534
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 -4294967297 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the NIHL range.
+define i64 @f14(i64 %dummy, i64 *%src) {
+; CHECK: f14:
+; CHECK: nihl %r0, 0
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 -281470681743361 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NIHH value, which is 1<<32 less than the above.
+define i64 @f15(i64 %dummy, i64 *%src) {
+; CHECK: f15:
+; CHECK: nihh %r0, 65534
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 -281474976710657 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NIHF value, which is 1<<32 less than the above.
+define i64 @f16(i64 %dummy, i64 *%src) {
+; CHECK: f16:
+; CHECK: nihf %r0, 4294901758
+; CHECK: br %r14
+  %res = atomicrmw and i64 *%src, i64 -281479271677953 seq_cst
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
new file mode 100644
index 0000000..c6ec77e
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-01.ll
@@ -0,0 +1,228 @@
+; Test 8-bit atomic min/max operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check signed minimum.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used, and that the low bits are set to 1.  This sequence is
+;   independent of the other loop prologue instructions.
+define i8 @f1(i8 *%src, i8 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: cr [[ROT]], %r3
+; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: risbg [[ROT]], %r3, 32, 39, 0
+; CHECK: [[KEEP]]:
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 24
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw min i8 *%src, i8 %b seq_cst
+  ret i8 %res
+}
+
+; Check signed maximum.
+define i8 @f2(i8 *%src, i8 %b) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: cr [[ROT]], %r3
+; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: risbg [[ROT]], %r3, 32, 39, 0
+; CHECK: [[KEEP]]:
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: sll %r3, 24
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw max i8 *%src, i8 %b seq_cst
+  ret i8 %res
+}
+
+; Check unsigned minimum.
+define i8 @f3(i8 *%src, i8 %b) {
+; CHECK: f3:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: clr [[ROT]], %r3
+; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: risbg [[ROT]], %r3, 32, 39, 0
+; CHECK: [[KEEP]]:
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: sll %r3, 24
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw umin i8 *%src, i8 %b seq_cst
+  ret i8 %res
+}
+
+; Check unsigned maximum.
+define i8 @f4(i8 *%src, i8 %b) {
+; CHECK: f4:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: clr [[ROT]], %r3
+; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: risbg [[ROT]], %r3, 32, 39, 0
+; CHECK: [[KEEP]]:
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: sll %r3, 24
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw umax i8 *%src, i8 %b seq_cst
+  ret i8 %res
+}
+
+; Check the lowest useful signed minimum value.  We need to load 0x81000000
+; into the source register.
+define i8 @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: llilh [[SRC2:%r[0-9]+]], 33024
+; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw min i8 *%src, i8 -127 seq_cst
+  ret i8 %res
+}
+
+; Check the highest useful signed maximum value.  We need to load 0x7e000000
+; into the source register.
+define i8 @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: llilh [[SRC2:%r[0-9]+]], 32256
+; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw max i8 *%src, i8 126 seq_cst
+  ret i8 %res
+}
+
+; Check the lowest useful unsigned minimum value.  We need to load 0x01000000
+; into the source register.
+define i8 @f7(i8 *%src) {
+; CHECK: f7:
+; CHECK: llilh [[SRC2:%r[0-9]+]], 256
+; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f7:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f7:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw umin i8 *%src, i8 1 seq_cst
+  ret i8 %res
+}
+
+; Check the highest useful unsigned maximum value.  We need to load 0xfe000000
+; into the source register.
+define i8 @f8(i8 *%src) {
+; CHECK: f8:
+; CHECK: llilh [[SRC2:%r[0-9]+]], 65024
+; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: risbg [[ROT]], [[SRC2]], 32, 39, 0
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f8:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f8:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw umax i8 *%src, i8 254 seq_cst
+  ret i8 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
new file mode 100644
index 0000000..9612e99
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-02.ll
@@ -0,0 +1,228 @@
+; Test 8-bit atomic min/max operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check signed minimum.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used, and that the low bits are set to 1.  This sequence is
+;   independent of the other loop prologue instructions.
+define i16 @f1(i16 *%src, i16 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: cr [[ROT]], %r3
+; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: risbg [[ROT]], %r3, 32, 47, 0
+; CHECK: [[KEEP]]:
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 16
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw min i16 *%src, i16 %b seq_cst
+  ret i16 %res
+}
+
+; Check signed maximum.
+define i16 @f2(i16 *%src, i16 %b) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: cr [[ROT]], %r3
+; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: risbg [[ROT]], %r3, 32, 47, 0
+; CHECK: [[KEEP]]:
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: sll %r3, 16
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: cr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw max i16 *%src, i16 %b seq_cst
+  ret i16 %res
+}
+
+; Check unsigned minimum.
+define i16 @f3(i16 *%src, i16 %b) {
+; CHECK: f3:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: clr [[ROT]], %r3
+; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: risbg [[ROT]], %r3, 32, 47, 0
+; CHECK: [[KEEP]]:
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: sll %r3, 16
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw umin i16 *%src, i16 %b seq_cst
+  ret i16 %res
+}
+
+; Check unsigned maximum.
+define i16 @f4(i16 *%src, i16 %b) {
+; CHECK: f4:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: clr [[ROT]], %r3
+; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: risbg [[ROT]], %r3, 32, 47, 0
+; CHECK: [[KEEP]]:
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: sll %r3, 16
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: clr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw umax i16 *%src, i16 %b seq_cst
+  ret i16 %res
+}
+
+; Check the lowest useful signed minimum value.  We need to load 0x80010000
+; into the source register.
+define i16 @f5(i16 *%src) {
+; CHECK: f5:
+; CHECK: llilh [[SRC2:%r[0-9]+]], 32769
+; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw min i16 *%src, i16 -32767 seq_cst
+  ret i16 %res
+}
+
+; Check the highest useful signed maximum value.  We need to load 0x7ffe0000
+; into the source register.
+define i16 @f6(i16 *%src) {
+; CHECK: f6:
+; CHECK: llilh [[SRC2:%r[0-9]+]], 32766
+; CHECK: cr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw max i16 *%src, i16 32766 seq_cst
+  ret i16 %res
+}
+
+; Check the lowest useful unsigned maximum value.  We need to load 0x00010000
+; into the source register.
+define i16 @f7(i16 *%src) {
+; CHECK: f7:
+; CHECK: llilh [[SRC2:%r[0-9]+]], 1
+; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f7:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f7:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw umin i16 *%src, i16 1 seq_cst
+  ret i16 %res
+}
+
+; Check the highest useful unsigned maximum value.  We need to load 0xfffe0000
+; into the source register.
+define i16 @f8(i16 *%src) {
+; CHECK: f8:
+; CHECK: llilh [[SRC2:%r[0-9]+]], 65534
+; CHECK: clr [[ROT:%r[0-9]+]], [[SRC2]]
+; CHECK: risbg [[ROT]], [[SRC2]], 32, 47, 0
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f8:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f8:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw umax i16 *%src, i16 65534 seq_cst
+  ret i16 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
new file mode 100644
index 0000000..b5809bd
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-03.ll
@@ -0,0 +1,176 @@
+; Test 32-bit atomic minimum and maximum.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check signed minium.
+define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f1:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: cr %r2, %r4
+; CHECK: lr [[NEW:%r[0-9]+]], %r2
+; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: lr [[NEW]], %r4
+; CHECK: cs %r2, [[NEW]], 0(%r3)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: br %r14
+  %res = atomicrmw min i32 *%src, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check signed maximum.
+define i32 @f2(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f2:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: cr %r2, %r4
+; CHECK: lr [[NEW:%r[0-9]+]], %r2
+; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: lr [[NEW]], %r4
+; CHECK: cs %r2, [[NEW]], 0(%r3)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: br %r14
+  %res = atomicrmw max i32 *%src, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check unsigned minimum.
+define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f3:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: clr %r2, %r4
+; CHECK: lr [[NEW:%r[0-9]+]], %r2
+; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: lr [[NEW]], %r4
+; CHECK: cs %r2, [[NEW]], 0(%r3)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: br %r14
+  %res = atomicrmw umin i32 *%src, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check unsigned maximum.
+define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f4:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: clr %r2, %r4
+; CHECK: lr [[NEW:%r[0-9]+]], %r2
+; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: lr [[NEW]], %r4
+; CHECK: cs %r2, [[NEW]], 0(%r3)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: br %r14
+  %res = atomicrmw umax i32 *%src, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the high end of the aligned CS range.
+define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f5:
+; CHECK: l %r2, 4092(%r3)
+; CHECK: cs %r2, {{%r[0-9]+}}, 4092(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1023
+  %res = atomicrmw min i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the next word up, which requires CSY.
+define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f6:
+; CHECK: ly %r2, 4096(%r3)
+; CHECK: csy %r2, {{%r[0-9]+}}, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1024
+  %res = atomicrmw min i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the high end of the aligned CSY range.
+define i32 @f7(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f7:
+; CHECK: ly %r2, 524284(%r3)
+; CHECK: csy %r2, {{%r[0-9]+}}, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %res = atomicrmw min i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the next word up, which needs separate address logic.
+define i32 @f8(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f8:
+; CHECK: agfi %r3, 524288
+; CHECK: l %r2, 0(%r3)
+; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %res = atomicrmw min i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the high end of the negative aligned CSY range.
+define i32 @f9(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f9:
+; CHECK: ly %r2, -4(%r3)
+; CHECK: csy %r2, {{%r[0-9]+}}, -4(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %res = atomicrmw min i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the low end of the CSY range.
+define i32 @f10(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f10:
+; CHECK: ly %r2, -524288(%r3)
+; CHECK: csy %r2, {{%r[0-9]+}}, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %res = atomicrmw min i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the next word down, which needs separate address logic.
+define i32 @f11(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f11:
+; CHECK: agfi %r3, -524292
+; CHECK: l %r2, 0(%r3)
+; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %res = atomicrmw min i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check that indexed addresses are not allowed.
+define i32 @f12(i32 %dummy, i64 %base, i64 %index, i32 %b) {
+; CHECK: f12:
+; CHECK: agr %r3, %r4
+; CHECK: l %r2, 0(%r3)
+; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
+; CHECK: br %r14
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i32 *
+  %res = atomicrmw min i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check that constants are forced into a register.
+define i32 @f13(i32 %dummy, i32 *%ptr) {
+; CHECK: f13:
+; CHECK: lhi [[LIMIT:%r[0-9]+]], 42
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: cr %r2, [[LIMIT]]
+; CHECK: lr [[NEW:%r[0-9]+]], %r2
+; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: lr [[NEW]], [[LIMIT]]
+; CHECK: cs %r2, [[NEW]], 0(%r3)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: br %r14
+  %res = atomicrmw min i32 *%ptr, i32 42 seq_cst
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll b/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
new file mode 100644
index 0000000..6897854
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-minmax-04.ll
@@ -0,0 +1,143 @@
+; Test 64-bit atomic minimum and maximum.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check signed minium.
+define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f1:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: cgr %r2, %r4
+; CHECK: lgr [[NEW:%r[0-9]+]], %r2
+; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: lgr [[NEW]], %r4
+; CHECK: csg %r2, [[NEW]], 0(%r3)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: br %r14
+  %res = atomicrmw min i64 *%src, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check signed maximum.
+define i64 @f2(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f2:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: cgr %r2, %r4
+; CHECK: lgr [[NEW:%r[0-9]+]], %r2
+; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: lgr [[NEW]], %r4
+; CHECK: csg %r2, [[NEW]], 0(%r3)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: br %r14
+  %res = atomicrmw max i64 *%src, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check unsigned minimum.
+define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f3:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: clgr %r2, %r4
+; CHECK: lgr [[NEW:%r[0-9]+]], %r2
+; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: lgr [[NEW]], %r4
+; CHECK: csg %r2, [[NEW]], 0(%r3)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: br %r14
+  %res = atomicrmw umin i64 *%src, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check unsigned maximum.
+define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f4:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: clgr %r2, %r4
+; CHECK: lgr [[NEW:%r[0-9]+]], %r2
+; CHECK: j{{g?}}he [[KEEP:\..*]]
+; CHECK: lgr [[NEW]], %r4
+; CHECK: csg %r2, [[NEW]], 0(%r3)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: br %r14
+  %res = atomicrmw umax i64 *%src, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the aligned CSG range.
+define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f5:
+; CHECK: lg %r2, 524280(%r3)
+; CHECK: csg %r2, {{%r[0-9]+}}, 524280(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %res = atomicrmw min i64 *%ptr, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check the next doubleword up, which requires separate address logic.
+define i64 @f6(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f6:
+; CHECK: agfi %r3, 524288
+; CHECK: lg %r2, 0(%r3)
+; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %res = atomicrmw min i64 *%ptr, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the CSG range.
+define i64 @f7(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f7:
+; CHECK: lg %r2, -524288(%r3)
+; CHECK: csg %r2, {{%r[0-9]+}}, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %res = atomicrmw min i64 *%ptr, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check the next doubleword down, which requires separate address logic.
+define i64 @f8(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f8:
+; CHECK: agfi %r3, -524296
+; CHECK: lg %r2, 0(%r3)
+; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %res = atomicrmw min i64 *%ptr, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check that indexed addresses are not allowed.
+define i64 @f9(i64 %dummy, i64 %base, i64 %index, i64 %b) {
+; CHECK: f9:
+; CHECK: agr %r3, %r4
+; CHECK: lg %r2, 0(%r3)
+; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
+; CHECK: br %r14
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i64 *
+  %res = atomicrmw min i64 *%ptr, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check that constants are forced into a register.
+define i64 @f10(i64 %dummy, i64 *%ptr) {
+; CHECK: f10:
+; CHECK: lghi [[LIMIT:%r[0-9]+]], 42
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LOOP:\.[^:]*]]:
+; CHECK: cgr %r2, [[LIMIT]]
+; CHECK: lgr [[NEW:%r[0-9]+]], %r2
+; CHECK: j{{g?}}le [[KEEP:\..*]]
+; CHECK: lgr [[NEW]], [[LIMIT]]
+; CHECK: csg %r2, [[NEW]], 0(%r3)
+; CHECK: j{{g?}}lh [[LOOP]]
+; CHECK: br %r14
+  %res = atomicrmw min i64 *%ptr, i64 42 seq_cst
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-01.ll b/test/CodeGen/SystemZ/atomicrmw-nand-01.ll
new file mode 100644
index 0000000..1ede3b4
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-nand-01.ll
@@ -0,0 +1,139 @@
+; Test 8-bit atomic NANDs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check NAND of a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used, and that the low bits are set to 1.  This sequence is
+;   independent of the other loop prologue instructions.
+define i8 @f1(i8 *%src, i8 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: nr [[ROT]], %r3
+; CHECK: xilf [[ROT]], 4278190080
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 24
+; CHECK-SHIFT2: oilf %r3, 16777215
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: nr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw nand i8 *%src, i8 %b seq_cst
+  ret i8 %res
+}
+
+; Check the minimum signed value.  We AND the rotated word with 0x80ffffff.
+define i8 @f2(i8 *%src) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: nilh [[ROT]], 33023
+; CHECK: xilf [[ROT]], 4278190080
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw nand i8 *%src, i8 -128 seq_cst
+  ret i8 %res
+}
+
+; Check NANDs of -2 (-1 isn't useful).  We AND the rotated word with 0xfeffffff.
+define i8 @f3(i8 *%src) {
+; CHECK: f3:
+; CHECK: nilh [[ROT]], 65279
+; CHECK: xilf [[ROT]], 4278190080
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw nand i8 *%src, i8 -2 seq_cst
+  ret i8 %res
+}
+
+; Check NANDs of 1.  We AND the rotated word with 0x01ffffff.
+define i8 @f4(i8 *%src) {
+; CHECK: f4:
+; CHECK: nilh [[ROT]], 511
+; CHECK: xilf [[ROT]], 4278190080
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw nand i8 *%src, i8 1 seq_cst
+  ret i8 %res
+}
+
+; Check the maximum signed value.  We AND the rotated word with 0x7fffffff.
+define i8 @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: nilh [[ROT]], 32767
+; CHECK: xilf [[ROT]], 4278190080
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw nand i8 *%src, i8 127 seq_cst
+  ret i8 %res
+}
+
+; Check NANDs of a large unsigned value.  We AND the rotated word with
+; 0xfdffffff.
+define i8 @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: nilh [[ROT]], 65023
+; CHECK: xilf [[ROT]], 4278190080
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw nand i8 *%src, i8 253 seq_cst
+  ret i8 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-02.ll b/test/CodeGen/SystemZ/atomicrmw-nand-02.ll
new file mode 100644
index 0000000..d5cf864
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-nand-02.ll
@@ -0,0 +1,139 @@
+; Test 16-bit atomic NANDs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check NAND of a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used, and that the low bits are set to 1.  This sequence is
+;   independent of the other loop prologue instructions.
+define i16 @f1(i16 *%src, i16 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: nr [[ROT]], %r3
+; CHECK: xilf [[ROT]], 4294901760
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 16
+; CHECK-SHIFT2: oill %r3, 65535
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: nr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw nand i16 *%src, i16 %b seq_cst
+  ret i16 %res
+}
+
+; Check the minimum signed value.  We AND the rotated word with 0x8000ffff.
+define i16 @f2(i16 *%src) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: nilh [[ROT]], 32768
+; CHECK: xilf [[ROT]], 4294901760
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw nand i16 *%src, i16 -32768 seq_cst
+  ret i16 %res
+}
+
+; Check NANDs of -2 (-1 isn't useful).  We AND the rotated word with 0xfffeffff.
+define i16 @f3(i16 *%src) {
+; CHECK: f3:
+; CHECK: nilh [[ROT]], 65534
+; CHECK: xilf [[ROT]], 4294901760
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw nand i16 *%src, i16 -2 seq_cst
+  ret i16 %res
+}
+
+; Check ANDs of 1.  We AND the rotated word with 0x0001ffff.
+define i16 @f4(i16 *%src) {
+; CHECK: f4:
+; CHECK: nilh [[ROT]], 1
+; CHECK: xilf [[ROT]], 4294901760
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw nand i16 *%src, i16 1 seq_cst
+  ret i16 %res
+}
+
+; Check the maximum signed value.  We AND the rotated word with 0x7fffffff.
+define i16 @f5(i16 *%src) {
+; CHECK: f5:
+; CHECK: nilh [[ROT]], 32767
+; CHECK: xilf [[ROT]], 4294901760
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw nand i16 *%src, i16 32767 seq_cst
+  ret i16 %res
+}
+
+; Check NANDs of a large unsigned value.  We AND the rotated word with
+; 0xfffdffff.
+define i16 @f6(i16 *%src) {
+; CHECK: f6:
+; CHECK: nilh [[ROT]], 65533
+; CHECK: xilf [[ROT]], 4294901760
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw nand i16 *%src, i16 65533 seq_cst
+  ret i16 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-03.ll b/test/CodeGen/SystemZ/atomicrmw-nand-03.ll
new file mode 100644
index 0000000..cc2a086
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-nand-03.ll
@@ -0,0 +1,93 @@
+; Test 32-bit atomic NANDs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check NANDs of a variable.
+define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f1:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^ ]*]]:
+; CHECK: lr %r0, %r2
+; CHECK: nr %r0, %r4
+; CHECK: xilf %r0, 4294967295
+; CHECK: cs %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw nand i32 *%src, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check NANDs of 1.
+define i32 @f2(i32 %dummy, i32 *%src) {
+; CHECK: f2:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^ ]*]]:
+; CHECK: lr %r0, %r2
+; CHECK: nilf %r0, 1
+; CHECK: xilf %r0, 4294967295
+; CHECK: cs %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw nand i32 *%src, i32 1 seq_cst
+  ret i32 %res
+}
+
+; Check NANDs of the low end of the NILH range.
+define i32 @f3(i32 %dummy, i32 *%src) {
+; CHECK: f3:
+; CHECK: nilh %r0, 0
+; CHECK: xilf %r0, 4294967295
+; CHECK: br %r14
+  %res = atomicrmw nand i32 *%src, i32 65535 seq_cst
+  ret i32 %res
+}
+
+; Check the next value up, which must use NILF.
+define i32 @f4(i32 %dummy, i32 *%src) {
+; CHECK: f4:
+; CHECK: nilf %r0, 65536
+; CHECK: xilf %r0, 4294967295
+; CHECK: br %r14
+  %res = atomicrmw nand i32 *%src, i32 65536 seq_cst
+  ret i32 %res
+}
+
+; Check the largest useful NILL value.
+define i32 @f5(i32 %dummy, i32 *%src) {
+; CHECK: f5:
+; CHECK: nill %r0, 65534
+; CHECK: xilf %r0, 4294967295
+; CHECK: br %r14
+  %res = atomicrmw nand i32 *%src, i32 -2 seq_cst
+  ret i32 %res
+}
+
+; Check the low end of the NILL range.
+define i32 @f6(i32 %dummy, i32 *%src) {
+; CHECK: f6:
+; CHECK: nill %r0, 0
+; CHECK: xilf %r0, 4294967295
+; CHECK: br %r14
+  %res = atomicrmw nand i32 *%src, i32 -65536 seq_cst
+  ret i32 %res
+}
+
+; Check the largest useful NILH value, which is one less than the above.
+define i32 @f7(i32 %dummy, i32 *%src) {
+; CHECK: f7:
+; CHECK: nilh %r0, 65534
+; CHECK: xilf %r0, 4294967295
+; CHECK: br %r14
+  %res = atomicrmw nand i32 *%src, i32 -65537 seq_cst
+  ret i32 %res
+}
+
+; Check the highest useful NILF value, which is one less than the above.
+define i32 @f8(i32 %dummy, i32 *%src) {
+; CHECK: f8:
+; CHECK: nilf %r0, 4294901758
+; CHECK: xilf %r0, 4294967295
+; CHECK: br %r14
+  %res = atomicrmw nand i32 *%src, i32 -65538 seq_cst
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-nand-04.ll b/test/CodeGen/SystemZ/atomicrmw-nand-04.ll
new file mode 100644
index 0000000..0c857d9
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-nand-04.ll
@@ -0,0 +1,183 @@
+; Test 64-bit atomic NANDs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check NANDs of a variable.
+define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f1:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: ngr %r0, %r4
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check NANDs of 1, which must be done using a register.
+define i64 @f2(i64 %dummy, i64 *%src) {
+; CHECK: f2:
+; CHECK: ngr
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 1 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the NIHF range.
+define i64 @f3(i64 %dummy, i64 *%src) {
+; CHECK: f3:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: nihf %r0, 0
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 4294967295 seq_cst
+  ret i64 %res
+}
+
+; Check the next value up, which must use a register.
+define i64 @f4(i64 %dummy, i64 *%src) {
+; CHECK: f4:
+; CHECK: ngr
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 4294967296 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the NIHH range.
+define i64 @f5(i64 %dummy, i64 *%src) {
+; CHECK: f5:
+; CHECK: nihh %r0, 0
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 281474976710655 seq_cst
+  ret i64 %res
+}
+
+; Check the next value up, which must use a register.
+define i64 @f6(i64 %dummy, i64 *%src) {
+; CHECK: f6:
+; CHECK: ngr
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 281474976710656 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NILL value.
+define i64 @f7(i64 %dummy, i64 *%src) {
+; CHECK: f7:
+; CHECK: nill %r0, 65534
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 -2 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the NILL range.
+define i64 @f8(i64 %dummy, i64 *%src) {
+; CHECK: f8:
+; CHECK: nill %r0, 0
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 -65536 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NILH value, which is one less than the above.
+define i64 @f9(i64 %dummy, i64 *%src) {
+; CHECK: f9:
+; CHECK: nilh %r0, 65534
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 -65537 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NILF value, which is one less than the above.
+define i64 @f10(i64 %dummy, i64 *%src) {
+; CHECK: f10:
+; CHECK: nilf %r0, 4294901758
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 -65538 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the NILH range.
+define i64 @f11(i64 %dummy, i64 *%src) {
+; CHECK: f11:
+; CHECK: nilh %r0, 0
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 -4294901761 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the NILF range.
+define i64 @f12(i64 %dummy, i64 *%src) {
+; CHECK: f12:
+; CHECK: nilf %r0, 0
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 -4294967296 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NIHL value, which is one less than the above.
+define i64 @f13(i64 %dummy, i64 *%src) {
+; CHECK: f13:
+; CHECK: nihl %r0, 65534
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 -4294967297 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the NIHL range.
+define i64 @f14(i64 %dummy, i64 *%src) {
+; CHECK: f14:
+; CHECK: nihl %r0, 0
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 -281470681743361 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NIHH value, which is 1<<32 less than the above.
+define i64 @f15(i64 %dummy, i64 *%src) {
+; CHECK: f15:
+; CHECK: nihh %r0, 65534
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 -281474976710657 seq_cst
+  ret i64 %res
+}
+
+; Check the highest useful NIHF value, which is 1<<32 less than the above.
+define i64 @f16(i64 %dummy, i64 *%src) {
+; CHECK: f16:
+; CHECK: nihf %r0, 4294901758
+; CHECK: lcgr %r0, %r0
+; CHECK: aghi %r0, -1
+; CHECK: br %r14
+  %res = atomicrmw nand i64 *%src, i64 -281479271677953 seq_cst
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-or-01.ll b/test/CodeGen/SystemZ/atomicrmw-or-01.ll
new file mode 100644
index 0000000..31303b7
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-or-01.ll
@@ -0,0 +1,132 @@
+; Test 8-bit atomic ORs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check OR of a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used.  This shift is independent of the other loop prologue
+;   instructions.
+define i8 @f1(i8 *%src, i8 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: or [[ROT]], %r3
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 24
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: or {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw or i8 *%src, i8 %b seq_cst
+  ret i8 %res
+}
+
+; Check the minimum signed value.  We OR the rotated word with 0x80000000.
+define i8 @f2(i8 *%src) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: oilh [[ROT]], 32768
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw or i8 *%src, i8 -128 seq_cst
+  ret i8 %res
+}
+
+; Check ORs of -2 (-1 isn't useful).  We OR the rotated word with 0xfe000000.
+define i8 @f3(i8 *%src) {
+; CHECK: f3:
+; CHECK: oilh [[ROT]], 65024
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw or i8 *%src, i8 -2 seq_cst
+  ret i8 %res
+}
+
+; Check ORs of 1.  We OR the rotated word with 0x01000000.
+define i8 @f4(i8 *%src) {
+; CHECK: f4:
+; CHECK: oilh [[ROT]], 256
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw or i8 *%src, i8 1 seq_cst
+  ret i8 %res
+}
+
+; Check the maximum signed value.  We OR the rotated word with 0x7f000000.
+define i8 @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: oilh [[ROT]], 32512
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw or i8 *%src, i8 127 seq_cst
+  ret i8 %res
+}
+
+; Check ORs of a large unsigned value.  We OR the rotated word with
+; 0xfd000000.
+define i8 @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: oilh [[ROT]], 64768
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw or i8 *%src, i8 253 seq_cst
+  ret i8 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-or-02.ll b/test/CodeGen/SystemZ/atomicrmw-or-02.ll
new file mode 100644
index 0000000..9880d0b
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-or-02.ll
@@ -0,0 +1,132 @@
+; Test 16-bit atomic ORs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check OR of a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used.  This shift is independent of the other loop prologue
+;   instructions.
+define i16 @f1(i16 *%src, i16 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: or [[ROT]], %r3
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 16
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: or {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw or i16 *%src, i16 %b seq_cst
+  ret i16 %res
+}
+
+; Check the minimum signed value.  We OR the rotated word with 0x80000000.
+define i16 @f2(i16 *%src) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: oilh [[ROT]], 32768
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw or i16 *%src, i16 -32768 seq_cst
+  ret i16 %res
+}
+
+; Check ORs of -2 (-1 isn't useful).  We OR the rotated word with 0xfffe0000.
+define i16 @f3(i16 *%src) {
+; CHECK: f3:
+; CHECK: oilh [[ROT]], 65534
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw or i16 *%src, i16 -2 seq_cst
+  ret i16 %res
+}
+
+; Check ORs of 1.  We OR the rotated word with 0x00010000.
+define i16 @f4(i16 *%src) {
+; CHECK: f4:
+; CHECK: oilh [[ROT]], 1
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw or i16 *%src, i16 1 seq_cst
+  ret i16 %res
+}
+
+; Check the maximum signed value.  We OR the rotated word with 0x7fff0000.
+define i16 @f5(i16 *%src) {
+; CHECK: f5:
+; CHECK: oilh [[ROT]], 32767
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw or i16 *%src, i16 32767 seq_cst
+  ret i16 %res
+}
+
+; Check ORs of a large unsigned value.  We OR the rotated word with
+; 0xfffd0000.
+define i16 @f6(i16 *%src) {
+; CHECK: f6:
+; CHECK: oilh [[ROT]], 65533
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw or i16 *%src, i16 65533 seq_cst
+  ret i16 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-or-03.ll b/test/CodeGen/SystemZ/atomicrmw-or-03.ll
new file mode 100644
index 0000000..33fd21b
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-or-03.ll
@@ -0,0 +1,85 @@
+; Test 32-bit atomic ORs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check ORs of a variable.
+define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f1:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^ ]*]]:
+; CHECK: lr %r0, %r2
+; CHECK: or %r0, %r4
+; CHECK: cs %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw or i32 *%src, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the lowest useful OILL value.
+define i32 @f2(i32 %dummy, i32 *%src) {
+; CHECK: f2:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^ ]*]]:
+; CHECK: lr %r0, %r2
+; CHECK: oill %r0, 1
+; CHECK: cs %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw or i32 *%src, i32 1 seq_cst
+  ret i32 %res
+}
+
+; Check the high end of the OILL range.
+define i32 @f3(i32 %dummy, i32 *%src) {
+; CHECK: f3:
+; CHECK: oill %r0, 65535
+; CHECK: br %r14
+  %res = atomicrmw or i32 *%src, i32 65535 seq_cst
+  ret i32 %res
+}
+
+; Check the lowest useful OILH value, which is the next value up.
+define i32 @f4(i32 %dummy, i32 *%src) {
+; CHECK: f4:
+; CHECK: oilh %r0, 1
+; CHECK: br %r14
+  %res = atomicrmw or i32 *%src, i32 65536 seq_cst
+  ret i32 %res
+}
+
+; Check the lowest useful OILF value, which is the next value up.
+define i32 @f5(i32 %dummy, i32 *%src) {
+; CHECK: f5:
+; CHECK: oilf %r0, 65537
+; CHECK: br %r14
+  %res = atomicrmw or i32 *%src, i32 65537 seq_cst
+  ret i32 %res
+}
+
+; Check the high end of the OILH range.
+define i32 @f6(i32 %dummy, i32 *%src) {
+; CHECK: f6:
+; CHECK: oilh %r0, 65535
+; CHECK: br %r14
+  %res = atomicrmw or i32 *%src, i32 -65536 seq_cst
+  ret i32 %res
+}
+
+; Check the next value up, which must use OILF.
+define i32 @f7(i32 %dummy, i32 *%src) {
+; CHECK: f7:
+; CHECK: oilf %r0, 4294901761
+; CHECK: br %r14
+  %res = atomicrmw or i32 *%src, i32 -65535 seq_cst
+  ret i32 %res
+}
+
+; Check the largest useful OILF value.
+define i32 @f8(i32 %dummy, i32 *%src) {
+; CHECK: f8:
+; CHECK: oilf %r0, 4294967294
+; CHECK: br %r14
+  %res = atomicrmw or i32 *%src, i32 -2 seq_cst
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-or-04.ll b/test/CodeGen/SystemZ/atomicrmw-or-04.ll
new file mode 100644
index 0000000..a74f6f9
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-or-04.ll
@@ -0,0 +1,158 @@
+; Test 64-bit atomic ORs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check ORs of a variable.
+define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f1:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^ ]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: ogr %r0, %r4
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check the lowest useful OILL value.
+define i64 @f2(i64 %dummy, i64 *%src) {
+; CHECK: f2:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^ ]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: oill %r0, 1
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 1 seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the OILL range.
+define i64 @f3(i64 %dummy, i64 *%src) {
+; CHECK: f3:
+; CHECK: oill %r0, 65535
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 65535 seq_cst
+  ret i64 %res
+}
+
+; Check the lowest useful OILH value, which is the next value up.
+define i64 @f4(i64 %dummy, i64 *%src) {
+; CHECK: f4:
+; CHECK: oilh %r0, 1
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 65536 seq_cst
+  ret i64 %res
+}
+
+; Check the lowest useful OILF value, which is the next value up again.
+define i64 @f5(i64 %dummy, i64 *%src) {
+; CHECK: f5:
+; CHECK: oilf %r0, 65537
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 65537 seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the OILH range.
+define i64 @f6(i64 %dummy, i64 *%src) {
+; CHECK: f6:
+; CHECK: oilh %r0, 65535
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 4294901760 seq_cst
+  ret i64 %res
+}
+
+; Check the next value up, which must use OILF.
+define i64 @f7(i64 %dummy, i64 *%src) {
+; CHECK: f7:
+; CHECK: oilf %r0, 4294901761
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 4294901761 seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the OILF range.
+define i64 @f8(i64 %dummy, i64 *%src) {
+; CHECK: f8:
+; CHECK: oilf %r0, 4294967295
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 4294967295 seq_cst
+  ret i64 %res
+}
+
+; Check the lowest useful OIHL value, which is one greater than above.
+define i64 @f9(i64 %dummy, i64 *%src) {
+; CHECK: f9:
+; CHECK: oihl %r0, 1
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 4294967296 seq_cst
+  ret i64 %res
+}
+
+; Check the next value up, which must use a register.  (We could use
+; combinations of OIH* and OIL* instead, but that isn't implemented.)
+define i64 @f10(i64 %dummy, i64 *%src) {
+; CHECK: f10:
+; CHECK: ogr
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 4294967297 seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the OIHL range.
+define i64 @f11(i64 %dummy, i64 *%src) {
+; CHECK: f11:
+; CHECK: oihl %r0, 65535
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 281470681743360 seq_cst
+  ret i64 %res
+}
+
+; Check the lowest useful OIHH value, which is 1<<32 greater than above.
+define i64 @f12(i64 %dummy, i64 *%src) {
+; CHECK: f12:
+; CHECK: oihh %r0, 1
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 281474976710656 seq_cst
+  ret i64 %res
+}
+
+; Check the lowest useful OIHF value, which is 1<<32 greater again.
+define i64 @f13(i64 %dummy, i64 *%src) {
+; CHECK: f13:
+; CHECK: oihf %r0, 65537
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 281479271677952 seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the OIHH range.
+define i64 @f14(i64 %dummy, i64 *%src) {
+; CHECK: f14:
+; CHECK: oihh %r0, 65535
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 18446462598732840960 seq_cst
+  ret i64 %res
+}
+
+; Check the next value up, which must use a register.
+define i64 @f15(i64 %dummy, i64 *%src) {
+; CHECK: f15:
+; CHECK: ogr
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 18446462598732840961 seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the OIHF range.
+define i64 @f16(i64 %dummy, i64 *%src) {
+; CHECK: f16:
+; CHECK: oihf %r0, 4294967295
+; CHECK: br %r14
+  %res = atomicrmw or i64 *%src, i64 -4294967296 seq_cst
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-01.ll b/test/CodeGen/SystemZ/atomicrmw-sub-01.ll
new file mode 100644
index 0000000..d073dc5
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-sub-01.ll
@@ -0,0 +1,132 @@
+; Test 8-bit atomic subtractions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check subtraction of a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used.  This shift is independent of the other loop prologue
+;   instructions.
+define i8 @f1(i8 *%src, i8 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: sr [[ROT]], %r3
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 24
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: sr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw sub i8 *%src, i8 %b seq_cst
+  ret i8 %res
+}
+
+; Check the minimum signed value.  We add 0x80000000 to the rotated word.
+define i8 @f2(i8 *%src) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: afi [[ROT]], -2147483648
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw sub i8 *%src, i8 -128 seq_cst
+  ret i8 %res
+}
+
+; Check subtraction of -1.  We add 0x01000000 to the rotated word.
+define i8 @f3(i8 *%src) {
+; CHECK: f3:
+; CHECK: afi [[ROT]], 16777216
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw sub i8 *%src, i8 -1 seq_cst
+  ret i8 %res
+}
+
+; Check subtraction of -1.  We add 0xff000000 to the rotated word.
+define i8 @f4(i8 *%src) {
+; CHECK: f4:
+; CHECK: afi [[ROT]], -16777216
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw sub i8 *%src, i8 1 seq_cst
+  ret i8 %res
+}
+
+; Check the maximum signed value.  We add 0x81000000 to the rotated word.
+define i8 @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: afi [[ROT]], -2130706432
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw sub i8 *%src, i8 127 seq_cst
+  ret i8 %res
+}
+
+; Check subtraction of a large unsigned value.  We add 0x02000000 to the
+; rotated word.
+define i8 @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: afi [[ROT]], 33554432
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw sub i8 *%src, i8 254 seq_cst
+  ret i8 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-02.ll b/test/CodeGen/SystemZ/atomicrmw-sub-02.ll
new file mode 100644
index 0000000..449d92f
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-sub-02.ll
@@ -0,0 +1,132 @@
+; Test 16-bit atomic subtractions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check subtraction of a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used.  This shift is independent of the other loop prologue
+;   instructions.
+define i16 @f1(i16 *%src, i16 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: sr [[ROT]], %r3
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 16
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: sr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw sub i16 *%src, i16 %b seq_cst
+  ret i16 %res
+}
+
+; Check the minimum signed value.  We add 0x80000000 to the rotated word.
+define i16 @f2(i16 *%src) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: afi [[ROT]], -2147483648
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw sub i16 *%src, i16 -32768 seq_cst
+  ret i16 %res
+}
+
+; Check subtraction of -1.  We add 0x00010000 to the rotated word.
+define i16 @f3(i16 *%src) {
+; CHECK: f3:
+; CHECK: afi [[ROT]], 65536
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw sub i16 *%src, i16 -1 seq_cst
+  ret i16 %res
+}
+
+; Check subtraction of 1.  We add 0xffff0000 to the rotated word.
+define i16 @f4(i16 *%src) {
+; CHECK: f4:
+; CHECK: afi [[ROT]], -65536
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw sub i16 *%src, i16 1 seq_cst
+  ret i16 %res
+}
+
+; Check the maximum signed value.  We add 0x80010000 to the rotated word.
+define i16 @f5(i16 *%src) {
+; CHECK: f5:
+; CHECK: afi [[ROT]], -2147418112
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw sub i16 *%src, i16 32767 seq_cst
+  ret i16 %res
+}
+
+; Check subtraction of a large unsigned value.  We add 0x00020000 to the
+; rotated word.
+define i16 @f6(i16 *%src) {
+; CHECK: f6:
+; CHECK: afi [[ROT]], 131072
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw sub i16 *%src, i16 65534 seq_cst
+  ret i16 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-03.ll b/test/CodeGen/SystemZ/atomicrmw-sub-03.ll
new file mode 100644
index 0000000..da07fb5
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-sub-03.ll
@@ -0,0 +1,94 @@
+; Test 32-bit atomic subtractions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check subtraction of a variable.
+define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f1:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lr %r0, %r2
+; CHECK: sr %r0, %r4
+; CHECK: cs %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw sub i32 *%src, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check subtraction of 1, which can use AHI.
+define i32 @f2(i32 %dummy, i32 *%src) {
+; CHECK: f2:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lr %r0, %r2
+; CHECK: ahi %r0, -1
+; CHECK: cs %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw sub i32 *%src, i32 1 seq_cst
+  ret i32 %res
+}
+
+; Check the low end of the AHI range.
+define i32 @f3(i32 %dummy, i32 *%src) {
+; CHECK: f3:
+; CHECK: ahi %r0, -32768
+; CHECK: br %r14
+  %res = atomicrmw sub i32 *%src, i32 32768 seq_cst
+  ret i32 %res
+}
+
+; Check the next value down, which must use AFI.
+define i32 @f4(i32 %dummy, i32 *%src) {
+; CHECK: f4:
+; CHECK: afi %r0, -32769
+; CHECK: br %r14
+  %res = atomicrmw sub i32 *%src, i32 32769 seq_cst
+  ret i32 %res
+}
+
+; Check the low end of the AFI range.
+define i32 @f5(i32 %dummy, i32 *%src) {
+; CHECK: f5:
+; CHECK: afi %r0, -2147483648
+; CHECK: br %r14
+  %res = atomicrmw sub i32 *%src, i32 2147483648 seq_cst
+  ret i32 %res
+}
+
+; Check the next value up, which gets treated as a positive operand.
+define i32 @f6(i32 %dummy, i32 *%src) {
+; CHECK: f6:
+; CHECK: afi %r0, 2147483647
+; CHECK: br %r14
+  %res = atomicrmw sub i32 *%src, i32 2147483649 seq_cst
+  ret i32 %res
+}
+
+; Check subtraction of -1, which can use AHI.
+define i32 @f7(i32 %dummy, i32 *%src) {
+; CHECK: f7:
+; CHECK: ahi %r0, 1
+; CHECK: br %r14
+  %res = atomicrmw sub i32 *%src, i32 -1 seq_cst
+  ret i32 %res
+}
+
+; Check the high end of the AHI range.
+define i32 @f8(i32 %dummy, i32 *%src) {
+; CHECK: f8:
+; CHECK: ahi %r0, 32767
+; CHECK: br %r14
+  %res = atomicrmw sub i32 *%src, i32 -32767 seq_cst
+  ret i32 %res
+}
+
+; Check the next value down, which must use AFI instead.
+define i32 @f9(i32 %dummy, i32 *%src) {
+; CHECK: f9:
+; CHECK: afi %r0, 32768
+; CHECK: br %r14
+  %res = atomicrmw sub i32 *%src, i32 -32768 seq_cst
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-sub-04.ll b/test/CodeGen/SystemZ/atomicrmw-sub-04.ll
new file mode 100644
index 0000000..26f75af
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-sub-04.ll
@@ -0,0 +1,112 @@
+; Test 64-bit atomic subtractions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check subtraction of a variable.
+define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f1:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: sgr %r0, %r4
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw sub i64 *%src, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check subtraction of 1, which can use AGHI.
+define i64 @f2(i64 %dummy, i64 *%src) {
+; CHECK: f2:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: aghi %r0, -1
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw sub i64 *%src, i64 1 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the AGHI range.
+define i64 @f3(i64 %dummy, i64 *%src) {
+; CHECK: f3:
+; CHECK: aghi %r0, -32768
+; CHECK: br %r14
+  %res = atomicrmw sub i64 *%src, i64 32768 seq_cst
+  ret i64 %res
+}
+
+; Check the next value up, which must use AGFI.
+define i64 @f4(i64 %dummy, i64 *%src) {
+; CHECK: f4:
+; CHECK: agfi %r0, -32769
+; CHECK: br %r14
+  %res = atomicrmw sub i64 *%src, i64 32769 seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the AGFI range.
+define i64 @f5(i64 %dummy, i64 *%src) {
+; CHECK: f5:
+; CHECK: agfi %r0, -2147483648
+; CHECK: br %r14
+  %res = atomicrmw sub i64 *%src, i64 2147483648 seq_cst
+  ret i64 %res
+}
+
+; Check the next value up, which must use a register operation.
+define i64 @f6(i64 %dummy, i64 *%src) {
+; CHECK: f6:
+; CHECK: sgr
+; CHECK: br %r14
+  %res = atomicrmw sub i64 *%src, i64 2147483649 seq_cst
+  ret i64 %res
+}
+
+; Check subtraction of -1, which can use AGHI.
+define i64 @f7(i64 %dummy, i64 *%src) {
+; CHECK: f7:
+; CHECK: aghi %r0, 1
+; CHECK: br %r14
+  %res = atomicrmw sub i64 *%src, i64 -1 seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the AGHI range.
+define i64 @f8(i64 %dummy, i64 *%src) {
+; CHECK: f8:
+; CHECK: aghi %r0, 32767
+; CHECK: br %r14
+  %res = atomicrmw sub i64 *%src, i64 -32767 seq_cst
+  ret i64 %res
+}
+
+; Check the next value down, which must use AGFI instead.
+define i64 @f9(i64 %dummy, i64 *%src) {
+; CHECK: f9:
+; CHECK: agfi %r0, 32768
+; CHECK: br %r14
+  %res = atomicrmw sub i64 *%src, i64 -32768 seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the AGFI range.
+define i64 @f10(i64 %dummy, i64 *%src) {
+; CHECK: f10:
+; CHECK: agfi %r0, 2147483647
+; CHECK: br %r14
+  %res = atomicrmw sub i64 *%src, i64 -2147483647 seq_cst
+  ret i64 %res
+}
+
+; Check the next value down, which must use a register operation.
+define i64 @f11(i64 %dummy, i64 *%src) {
+; CHECK: f11:
+; CHECK: sgr
+; CHECK: br %r14
+  %res = atomicrmw sub i64 *%src, i64 -2147483648 seq_cst
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll
new file mode 100644
index 0000000..e33597b
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-xchg-01.ll
@@ -0,0 +1,55 @@
+; Test 8-bit atomic exchange.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT
+
+; Check exchange with a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.  CHECK-SHIFT also checks that %r3 is not modified before
+;   being used in the RISBG (in contrast to things like atomic addition,
+;   which shift %r3 left so that %b is at the high end of the word).
+define i8 @f1(i8 *%src, i8 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: risbg [[ROT]], %r3, 32, 39, 24
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT: f1:
+; CHECK-SHIFT-NOT: %r3
+; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT-NOT: %r3
+; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT-NOT: %r3
+; CHECK-SHIFT: rll
+; CHECK-SHIFT-NOT: %r3
+; CHECK-SHIFT: risbg {{%r[0-9]+}}, %r3, 32, 39, 24
+; CHECK-SHIFT: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT: rll
+; CHECK-SHIFT: br %r14
+  %res = atomicrmw xchg i8 *%src, i8 %b seq_cst
+  ret i8 %res
+}
+
+; Check exchange with a constant.  We should force the constant into
+; a register and use the sequence above.
+define i8 @f2(i8 *%src) {
+; CHECK: f2:
+; CHECK: lhi [[VALUE:%r[0-9]+]], 88
+; CHECK: risbg {{%r[0-9]+}}, [[VALUE]], 32, 39, 24
+; CHECK: br %r14
+;
+; CHECK-SHIFT: f2:
+; CHECK-SHIFT: br %r14
+  %res = atomicrmw xchg i8 *%src, i8 88 seq_cst
+  ret i8 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll
new file mode 100644
index 0000000..31f8026
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-xchg-02.ll
@@ -0,0 +1,55 @@
+; Test 16-bit atomic exchange.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT
+
+; Check exchange with a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.  CHECK-SHIFT also checks that %r3 is not modified before
+;   being used in the RISBG (in contrast to things like atomic addition,
+;   which shift %r3 left so that %b is at the high end of the word).
+define i16 @f1(i16 *%src, i16 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: risbg [[ROT]], %r3, 32, 47, 16
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT: f1:
+; CHECK-SHIFT-NOT: %r3
+; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT-NOT: %r3
+; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT-NOT: %r3
+; CHECK-SHIFT: rll
+; CHECK-SHIFT-NOT: %r3
+; CHECK-SHIFT: risbg {{%r[0-9]+}}, %r3, 32, 47, 16
+; CHECK-SHIFT: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT: rll
+; CHECK-SHIFT: br %r14
+  %res = atomicrmw xchg i16 *%src, i16 %b seq_cst
+  ret i16 %res
+}
+
+; Check exchange with a constant.  We should force the constant into
+; a register and use the sequence above.
+define i16 @f2(i16 *%src) {
+; CHECK: f2:
+; CHECK: lhi [[VALUE:%r[0-9]+]], -25536
+; CHECK: risbg {{%r[0-9]+}}, [[VALUE]], 32, 47, 16
+; CHECK: br %r14
+;
+; CHECK-SHIFT: f2:
+; CHECK-SHIFT: br %r14
+  %res = atomicrmw xchg i16 *%src, i16 40000 seq_cst
+  ret i16 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll
new file mode 100644
index 0000000..37581ab
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-xchg-03.ll
@@ -0,0 +1,122 @@
+; Test 32-bit atomic exchange.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register exchange.
+define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f1:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: cs %r2, %r4, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw xchg i32 *%src, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the high end of the aligned CS range.
+define i32 @f2(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f2:
+; CHECK: l %r2, 4092(%r3)
+; CHECK: cs %r2, {{%r[0-9]+}}, 4092(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1023
+  %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the next word up, which requires CSY.
+define i32 @f3(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f3:
+; CHECK: ly %r2, 4096(%r3)
+; CHECK: csy %r2, {{%r[0-9]+}}, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1024
+  %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the high end of the aligned CSY range.
+define i32 @f4(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f4:
+; CHECK: ly %r2, 524284(%r3)
+; CHECK: csy %r2, {{%r[0-9]+}}, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the next word up, which needs separate address logic.
+define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f5:
+; CHECK: agfi %r3, 524288
+; CHECK: l %r2, 0(%r3)
+; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the high end of the negative aligned CSY range.
+define i32 @f6(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f6:
+; CHECK: ly %r2, -4(%r3)
+; CHECK: csy %r2, {{%r[0-9]+}}, -4(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the low end of the CSY range.
+define i32 @f7(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f7:
+; CHECK: ly %r2, -524288(%r3)
+; CHECK: csy %r2, {{%r[0-9]+}}, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the next word down, which needs separate address logic.
+define i32 @f8(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f8:
+; CHECK: agfi %r3, -524292
+; CHECK: l %r2, 0(%r3)
+; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check that indexed addresses are not allowed.
+define i32 @f9(i32 %dummy, i64 %base, i64 %index, i32 %b) {
+; CHECK: f9:
+; CHECK: agr %r3, %r4
+; CHECK: l %r2, 0(%r3)
+; CHECK: cs %r2, {{%r[0-9]+}}, 0(%r3)
+; CHECK: br %r14
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i32 *
+  %res = atomicrmw xchg i32 *%ptr, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check exchange of a constant.  We should force it into a register and
+; use the sequence above.
+define i32 @f10(i32 %dummy, i32 *%src) {
+; CHECK: f10:
+; CHECK: llill [[VALUE:%r[0-9+]]], 40000
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: cs %r2, [[VALUE]], 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw xchg i32 *%src, i32 40000 seq_cst
+  ret i32 %res
+}
+
diff --git a/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll b/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll
new file mode 100644
index 0000000..a68295e
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-xchg-04.ll
@@ -0,0 +1,88 @@
+; Test 64-bit atomic exchange.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register exchange.
+define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f1:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: csg %r2, %r4, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw xchg i64 *%src, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the aligned CSG range.
+define i64 @f2(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f2:
+; CHECK: lg %r2, 524280(%r3)
+; CHECK: csg %r2, {{%r[0-9]+}}, 524280(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check the next doubleword up, which requires separate address logic.
+define i64 @f3(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f3:
+; CHECK: agfi %r3, 524288
+; CHECK: lg %r2, 0(%r3)
+; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check the low end of the CSG range.
+define i64 @f4(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f4:
+; CHECK: lg %r2, -524288(%r3)
+; CHECK: csg %r2, {{%r[0-9]+}}, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check the next doubleword down, which requires separate address logic.
+define i64 @f5(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f5:
+; CHECK: agfi %r3, -524296
+; CHECK: lg %r2, 0(%r3)
+; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check that indexed addresses are not allowed.
+define i64 @f6(i64 %dummy, i64 %base, i64 %index, i64 %b) {
+; CHECK: f6:
+; CHECK: agr %r3, %r4
+; CHECK: lg %r2, 0(%r3)
+; CHECK: csg %r2, {{%r[0-9]+}}, 0(%r3)
+; CHECK: br %r14
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i64 *
+  %res = atomicrmw xchg i64 *%ptr, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check exchange of a constant.  We should force it into a register and
+; use the sequence above.
+define i64 @f7(i64 %dummy, i64 *%ptr) {
+; CHECK: f7:
+; CHECK: llilf [[VALUE:%r[0-9+]]], 3000000000
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: csg %r2, [[VALUE]], 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw xchg i64 *%ptr, i64 3000000000 seq_cst
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-01.ll b/test/CodeGen/SystemZ/atomicrmw-xor-01.ll
new file mode 100644
index 0000000..13cdf02
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-xor-01.ll
@@ -0,0 +1,132 @@
+; Test 8-bit atomic XORs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check XOR of a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used.  This shift is independent of the other loop prologue
+;   instructions.
+define i8 @f1(i8 *%src, i8 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: xr [[ROT]], %r3
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 24
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: xr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw xor i8 *%src, i8 %b seq_cst
+  ret i8 %res
+}
+
+; Check the minimum signed value.  We XOR the rotated word with 0x80000000.
+define i8 @f2(i8 *%src) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: xilf [[ROT]], 2147483648
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw xor i8 *%src, i8 -128 seq_cst
+  ret i8 %res
+}
+
+; Check XORs of -1.  We XOR the rotated word with 0xff000000.
+define i8 @f3(i8 *%src) {
+; CHECK: f3:
+; CHECK: xilf [[ROT]], 4278190080
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw xor i8 *%src, i8 -1 seq_cst
+  ret i8 %res
+}
+
+; Check XORs of 1.  We XOR the rotated word with 0x01000000.
+define i8 @f4(i8 *%src) {
+; CHECK: f4:
+; CHECK: xilf [[ROT]], 16777216
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw xor i8 *%src, i8 1 seq_cst
+  ret i8 %res
+}
+
+; Check the maximum signed value.  We XOR the rotated word with 0x7f000000.
+define i8 @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: xilf [[ROT]], 2130706432
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw xor i8 *%src, i8 127 seq_cst
+  ret i8 %res
+}
+
+; Check XORs of a large unsigned value.  We XOR the rotated word with
+; 0xfd000000.
+define i8 @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: xilf [[ROT]], 4244635648
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw xor i8 *%src, i8 253 seq_cst
+  ret i8 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-02.ll b/test/CodeGen/SystemZ/atomicrmw-xor-02.ll
new file mode 100644
index 0000000..4faa64f
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-xor-02.ll
@@ -0,0 +1,132 @@
+; Test 16-bit atomic XORs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT1
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT2
+
+; Check XOR of a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT1 makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.
+; - CHECK-SHIFT2 makes sure that %b is shifted into the high part of the word
+;   before being used.  This shift is independent of the other loop prologue
+;   instructions.
+define i16 @f1(i16 *%src, i16 %b) {
+; CHECK: f1:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: xr [[ROT]], %r3
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0({{%r[1-9]+}})
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f1:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f1:
+; CHECK-SHIFT2: sll %r3, 16
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: xr {{%r[0-9]+}}, %r3
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: rll
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw xor i16 *%src, i16 %b seq_cst
+  ret i16 %res
+}
+
+; Check the minimum signed value.  We XOR the rotated word with 0x80000000.
+define i16 @f2(i16 *%src) {
+; CHECK: f2:
+; CHECK: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK: nill %r2, 65532
+; CHECK: l [[OLD:%r[0-9]+]], 0(%r2)
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: rll [[ROT:%r[0-9]+]], [[OLD]], 0([[SHIFT]])
+; CHECK: xilf [[ROT]], 2147483648
+; CHECK: rll [[NEW:%r[0-9]+]], [[ROT]], 0([[NEGSHIFT:%r[1-9]+]])
+; CHECK: cs [[OLD]], [[NEW]], 0(%r2)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f2:
+; CHECK-SHIFT1: sllg [[SHIFT:%r[1-9]+]], %r2, 3
+; CHECK-SHIFT1: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: rll {{%r[0-9]+}}, {{%r[0-9]+}}, 0([[NEGSHIFT]])
+; CHECK-SHIFT1: rll
+; CHECK-SHIFT1: br %r14
+;
+; CHECK-SHIFT2: f2:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw xor i16 *%src, i16 -32768 seq_cst
+  ret i16 %res
+}
+
+; Check XORs of -1.  We XOR the rotated word with 0xffff0000.
+define i16 @f3(i16 *%src) {
+; CHECK: f3:
+; CHECK: xilf [[ROT]], 4294901760
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f3:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f3:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw xor i16 *%src, i16 -1 seq_cst
+  ret i16 %res
+}
+
+; Check XORs of 1.  We XOR the rotated word with 0x00010000.
+define i16 @f4(i16 *%src) {
+; CHECK: f4:
+; CHECK: xilf [[ROT]], 65536
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f4:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f4:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw xor i16 *%src, i16 1 seq_cst
+  ret i16 %res
+}
+
+; Check the maximum signed value.  We XOR the rotated word with 0x7fff0000.
+define i16 @f5(i16 *%src) {
+; CHECK: f5:
+; CHECK: xilf [[ROT]], 2147418112
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f5:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f5:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw xor i16 *%src, i16 32767 seq_cst
+  ret i16 %res
+}
+
+; Check XORs of a large unsigned value.  We XOR the rotated word with
+; 0xfffd0000.
+define i16 @f6(i16 *%src) {
+; CHECK: f6:
+; CHECK: xilf [[ROT]], 4294770688
+; CHECK: br %r14
+;
+; CHECK-SHIFT1: f6:
+; CHECK-SHIFT1: br %r14
+; CHECK-SHIFT2: f6:
+; CHECK-SHIFT2: br %r14
+  %res = atomicrmw xor i16 *%src, i16 65533 seq_cst
+  ret i16 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-03.ll b/test/CodeGen/SystemZ/atomicrmw-xor-03.ll
new file mode 100644
index 0000000..23884f8
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-xor-03.ll
@@ -0,0 +1,49 @@
+; Test 32-bit atomic XORs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check XORs of a variable.
+define i32 @f1(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f1:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^ ]*]]:
+; CHECK: lr %r0, %r2
+; CHECK: xr %r0, %r4
+; CHECK: cs %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw xor i32 *%src, i32 %b seq_cst
+  ret i32 %res
+}
+
+; Check the lowest useful constant.
+define i32 @f2(i32 %dummy, i32 *%src) {
+; CHECK: f2:
+; CHECK: l %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^ ]*]]:
+; CHECK: lr %r0, %r2
+; CHECK: xilf %r0, 1
+; CHECK: cs %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw xor i32 *%src, i32 1 seq_cst
+  ret i32 %res
+}
+
+; Check an arbitrary constant.
+define i32 @f3(i32 %dummy, i32 *%src) {
+; CHECK: f3:
+; CHECK: xilf %r0, 3000000000
+; CHECK: br %r14
+  %res = atomicrmw xor i32 *%src, i32 3000000000 seq_cst
+  ret i32 %res
+}
+
+; Check bitwise negation.
+define i32 @f4(i32 %dummy, i32 *%src) {
+; CHECK: f4:
+; CHECK: xilf %r0, 4294967295
+; CHECK: br %r14
+  %res = atomicrmw xor i32 *%src, i32 -1 seq_cst
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/atomicrmw-xor-04.ll b/test/CodeGen/SystemZ/atomicrmw-xor-04.ll
new file mode 100644
index 0000000..21130fb
--- /dev/null
+++ b/test/CodeGen/SystemZ/atomicrmw-xor-04.ll
@@ -0,0 +1,77 @@
+; Test 64-bit atomic XORs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check XORs of a variable.
+define i64 @f1(i64 %dummy, i64 *%src, i64 %b) {
+; CHECK: f1:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^ ]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: xgr %r0, %r4
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw xor i64 *%src, i64 %b seq_cst
+  ret i64 %res
+}
+
+; Check the lowest useful XILF value.
+define i64 @f2(i64 %dummy, i64 *%src) {
+; CHECK: f2:
+; CHECK: lg %r2, 0(%r3)
+; CHECK: [[LABEL:\.[^ ]*]]:
+; CHECK: lgr %r0, %r2
+; CHECK: xilf %r0, 1
+; CHECK: csg %r2, %r0, 0(%r3)
+; CHECK: j{{g?}}lh [[LABEL]]
+; CHECK: br %r14
+  %res = atomicrmw xor i64 *%src, i64 1 seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the XILF range.
+define i64 @f3(i64 %dummy, i64 *%src) {
+; CHECK: f3:
+; CHECK: xilf %r0, 4294967295
+; CHECK: br %r14
+  %res = atomicrmw xor i64 *%src, i64 4294967295 seq_cst
+  ret i64 %res
+}
+
+; Check the lowest useful XIHF value, which is one greater than above.
+define i64 @f4(i64 %dummy, i64 *%src) {
+; CHECK: f4:
+; CHECK: xihf %r0, 1
+; CHECK: br %r14
+  %res = atomicrmw xor i64 *%src, i64 4294967296 seq_cst
+  ret i64 %res
+}
+
+; Check the next value up, which must use a register.  (We could use
+; combinations of XIH* and XIL* instead, but that isn't implemented.)
+define i64 @f5(i64 %dummy, i64 *%src) {
+; CHECK: f5:
+; CHECK: xgr
+; CHECK: br %r14
+  %res = atomicrmw xor i64 *%src, i64 4294967297 seq_cst
+  ret i64 %res
+}
+
+; Check the high end of the XIHF range.
+define i64 @f6(i64 %dummy, i64 *%src) {
+; CHECK: f6:
+; CHECK: xihf %r0, 4294967295
+; CHECK: br %r14
+  %res = atomicrmw xor i64 *%src, i64 -4294967296 seq_cst
+  ret i64 %res
+}
+
+; Check the next value up, which must use a register.
+define i64 @f7(i64 %dummy, i64 *%src) {
+; CHECK: f7:
+; CHECK: xgr
+; CHECK: br %r14
+  %res = atomicrmw xor i64 *%src, i64 -4294967295 seq_cst
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/branch-01.ll b/test/CodeGen/SystemZ/branch-01.ll
new file mode 100644
index 0000000..8ff91ac
--- /dev/null
+++ b/test/CodeGen/SystemZ/branch-01.ll
@@ -0,0 +1,14 @@
+; Test a simple unconditional jump.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define void @f1(i8 *%dest) {
+; CHECK: f1:
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: mvi 0(%r2), 1
+; CHECK: j{{g?}} .L[[LABEL]]
+  br label %loop
+loop:
+  store volatile i8 1, i8 *%dest
+  br label %loop
+}
diff --git a/test/CodeGen/SystemZ/branch-02.ll b/test/CodeGen/SystemZ/branch-02.ll
new file mode 100644
index 0000000..cde9b56
--- /dev/null
+++ b/test/CodeGen/SystemZ/branch-02.ll
@@ -0,0 +1,94 @@
+; Test all condition-code masks that are relevant for signed integer
+; comparisons.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define void @f1(i32 *%src, i32 %target) {
+; CHECK: f1:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: c %r3, 0(%r2)
+; CHECK-NEXT: j{{g?}}e .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile i32 *%src
+  %cond = icmp eq i32 %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f2(i32 *%src, i32 %target) {
+; CHECK: f2:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: c %r3, 0(%r2)
+; CHECK-NEXT: j{{g?}}lh .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile i32 *%src
+  %cond = icmp ne i32 %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f3(i32 *%src, i32 %target) {
+; CHECK: f3:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: c %r3, 0(%r2)
+; CHECK-NEXT: j{{g?}}le .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile i32 *%src
+  %cond = icmp sle i32 %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f4(i32 *%src, i32 %target) {
+; CHECK: f4:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: c %r3, 0(%r2)
+; CHECK-NEXT: j{{g?}}l .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile i32 *%src
+  %cond = icmp slt i32 %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f5(i32 *%src, i32 %target) {
+; CHECK: f5:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: c %r3, 0(%r2)
+; CHECK-NEXT: j{{g?}}h .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile i32 *%src
+  %cond = icmp sgt i32 %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f6(i32 *%src, i32 %target) {
+; CHECK: f6:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: c %r3, 0(%r2)
+; CHECK-NEXT: j{{g?}}he .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile i32 *%src
+  %cond = icmp sge i32 %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/branch-03.ll b/test/CodeGen/SystemZ/branch-03.ll
new file mode 100644
index 0000000..1e447d0
--- /dev/null
+++ b/test/CodeGen/SystemZ/branch-03.ll
@@ -0,0 +1,63 @@
+; Test all condition-code masks that are relevant for unsigned integer
+; comparisons.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+define void @f1(i32 *%src, i32 %target) {
+; CHECK: f1:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: cl %r3, 0(%r2)
+; CHECK-NEXT: j{{g?}}le .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile i32 *%src
+  %cond = icmp ule i32 %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f2(i32 *%src, i32 %target) {
+; CHECK: f2:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: cl %r3, 0(%r2)
+; CHECK-NEXT: j{{g?}}l .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile i32 *%src
+  %cond = icmp ult i32 %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f3(i32 *%src, i32 %target) {
+; CHECK: f3:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: cl %r3, 0(%r2)
+; CHECK-NEXT: j{{g?}}h .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile i32 *%src
+  %cond = icmp ugt i32 %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f4(i32 *%src, i32 %target) {
+; CHECK: f4:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: cl %r3, 0(%r2)
+; CHECK-NEXT: j{{g?}}he .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile i32 *%src
+  %cond = icmp uge i32 %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/branch-04.ll b/test/CodeGen/SystemZ/branch-04.ll
new file mode 100644
index 0000000..3d41750
--- /dev/null
+++ b/test/CodeGen/SystemZ/branch-04.ll
@@ -0,0 +1,218 @@
+; Test all condition-code masks that are relevant for floating-point
+; comparisons.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define void @f1(float *%src, float %target) {
+; CHECK: f1:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}e .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp oeq float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f2(float *%src, float %target) {
+; CHECK: f2:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}lh .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp one float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f3(float *%src, float %target) {
+; CHECK: f3:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}le .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp ole float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f4(float *%src, float %target) {
+; CHECK: f4:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}l .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp olt float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f5(float *%src, float %target) {
+; CHECK: f5:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}h .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp ogt float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f6(float *%src, float %target) {
+; CHECK: f6:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}he .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp oge float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f7(float *%src, float %target) {
+; CHECK: f7:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}nlh .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp ueq float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f8(float *%src, float %target) {
+; CHECK: f8:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}ne .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp une float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f9(float *%src, float %target) {
+; CHECK: f9:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}nh .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp ule float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f10(float *%src, float %target) {
+; CHECK: f10:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}nhe .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp ult float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f11(float *%src, float %target) {
+; CHECK: f11:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}nle .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp ugt float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+define void @f12(float *%src, float %target) {
+; CHECK: f12:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}nl .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp uge float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; "jno" == "jump if no overflow", which corresponds to "jump if ordered"
+; rather than "jump if not ordered" after a floating-point comparison.
+define void @f13(float *%src, float %target) {
+; CHECK: f13:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}no .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp ord float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
+
+; "jo" == "jump if overflow", which corresponds to "jump if not ordered"
+; rather than "jump if ordered" after a floating-point comparison.
+define void @f14(float *%src, float %target) {
+; CHECK: f14:
+; CHECK: .cfi_startproc
+; CHECK: .L[[LABEL:.*]]:
+; CHECK: ceb %f0, 0(%r2)
+; CHECK-NEXT: j{{g?}}o .L[[LABEL]]
+  br label %loop
+loop:
+  %val = load volatile float *%src
+  %cond = fcmp uno float %target, %val
+  br i1 %cond, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/branch-05.ll b/test/CodeGen/SystemZ/branch-05.ll
new file mode 100644
index 0000000..d149e0b
--- /dev/null
+++ b/test/CodeGen/SystemZ/branch-05.ll
@@ -0,0 +1,58 @@
+; Test indirect jumps.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define i32 @f1(i32 %x, i32 %y, i32 %op) {
+; CHECK: f1:
+; CHECK: ahi %r4, -1
+; CHECK: clfi %r4, 5
+; CHECK-NEXT: j{{g?}}g
+; CHECK: llgfr [[OP64:%r[0-5]]], %r4
+; CHECK: sllg [[INDEX:%r[1-5]]], [[OP64]], 3
+; CHECK: larl [[BASE:%r[1-5]]]
+; CHECK: lg [[TARGET:%r[1-5]]], 0([[BASE]],[[INDEX]])
+; CHECK: br [[TARGET]]
+entry:
+  switch i32 %op, label %exit [
+    i32 1, label %b.add
+    i32 2, label %b.sub
+    i32 3, label %b.and
+    i32 4, label %b.or
+    i32 5, label %b.xor
+    i32 6, label %b.mul
+  ]
+
+b.add:
+  %add = add i32 %x, %y
+  br label %exit
+
+b.sub:
+  %sub = sub i32 %x, %y
+  br label %exit
+
+b.and:
+  %and = and i32 %x, %y
+  br label %exit
+
+b.or:
+  %or = or i32 %x, %y
+  br label %exit
+
+b.xor:
+  %xor = xor i32 %x, %y
+  br label %exit
+
+b.mul:
+  %mul = mul i32 %x, %y
+  br label %exit
+
+exit:
+  %res = phi i32 [ %x,   %entry ],
+                 [ %add, %b.add ],
+                 [ %sub, %b.sub ],
+                 [ %and, %b.and ],
+                 [ %or,  %b.or ],
+                 [ %xor, %b.xor ],
+                 [ %mul, %b.mul ]
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/bswap-01.ll b/test/CodeGen/SystemZ/bswap-01.ll
new file mode 100644
index 0000000..952903d
--- /dev/null
+++ b/test/CodeGen/SystemZ/bswap-01.ll
@@ -0,0 +1,24 @@
+; Test byteswaps between registers.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i32 @llvm.bswap.i32(i32 %a)
+declare i64 @llvm.bswap.i64(i64 %a)
+
+; Check 32-bit register-to-register byteswaps.
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lrvr [[REGISTER:%r[0-5]]], %r2
+; CHECk: br %r14
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %swapped
+}
+
+; Check 64-bit register-to-register byteswaps.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: lrvgr %r2, %r2
+; CHECk: br %r14
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %swapped
+}
diff --git a/test/CodeGen/SystemZ/bswap-02.ll b/test/CodeGen/SystemZ/bswap-02.ll
new file mode 100644
index 0000000..e9b7eb5
--- /dev/null
+++ b/test/CodeGen/SystemZ/bswap-02.ll
@@ -0,0 +1,87 @@
+; Test 32-bit byteswaps from memory to registers.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i32 @llvm.bswap.i32(i32 %a)
+
+; Check LRV with no displacement.
+define i32 @f1(i32 *%src) {
+; CHECK: f1:
+; CHECK: lrv %r2, 0(%r2)
+; CHECK: br %r14
+  %a = load i32 *%src
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %swapped
+}
+
+; Check the high end of the aligned LRV range.
+define i32 @f2(i32 *%src) {
+; CHECK: f2:
+; CHECK: lrv %r2, 524284(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %a = load i32 *%ptr
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %swapped
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f3(i32 *%src) {
+; CHECK: f3:
+; CHECK: agfi %r2, 524288
+; CHECK: lrv %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %a = load i32 *%ptr
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %swapped
+}
+
+; Check the high end of the negative aligned LRV range.
+define i32 @f4(i32 *%src) {
+; CHECK: f4:
+; CHECK: lrv %r2, -4(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %a = load i32 *%ptr
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %swapped
+}
+
+; Check the low end of the LRV range.
+define i32 @f5(i32 *%src) {
+; CHECK: f5:
+; CHECK: lrv %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %a = load i32 *%ptr
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %swapped
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f6(i32 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r2, -524292
+; CHECK: lrv %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %a = load i32 *%ptr
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %swapped
+}
+
+; Check that LRV allows an index.
+define i32 @f7(i64 %src, i64 %index) {
+; CHECK: f7:
+; CHECK: lrv %r2, 524287({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i32 *
+  %a = load i32 *%ptr
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %swapped
+}
diff --git a/test/CodeGen/SystemZ/bswap-03.ll b/test/CodeGen/SystemZ/bswap-03.ll
new file mode 100644
index 0000000..2e6bcdc
--- /dev/null
+++ b/test/CodeGen/SystemZ/bswap-03.ll
@@ -0,0 +1,87 @@
+; Test 64-bit byteswaps from memory to registers.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i64 @llvm.bswap.i64(i64 %a)
+
+; Check LRVG with no displacement.
+define i64 @f1(i64 *%src) {
+; CHECK: f1:
+; CHECK: lrvg %r2, 0(%r2)
+; CHECK: br %r14
+  %a = load i64 *%src
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %swapped
+}
+
+; Check the high end of the aligned LRVG range.
+define i64 @f2(i64 *%src) {
+; CHECK: f2:
+; CHECK: lrvg %r2, 524280(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %a = load i64 *%ptr
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %swapped
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f3(i64 *%src) {
+; CHECK: f3:
+; CHECK: agfi %r2, 524288
+; CHECK: lrvg %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %a = load i64 *%ptr
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %swapped
+}
+
+; Check the high end of the negative aligned LRVG range.
+define i64 @f4(i64 *%src) {
+; CHECK: f4:
+; CHECK: lrvg %r2, -8(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -1
+  %a = load i64 *%ptr
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %swapped
+}
+
+; Check the low end of the LRVG range.
+define i64 @f5(i64 *%src) {
+; CHECK: f5:
+; CHECK: lrvg %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %a = load i64 *%ptr
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %swapped
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f6(i64 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r2, -524296
+; CHECK: lrvg %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %a = load i64 *%ptr
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %swapped
+}
+
+; Check that LRVG allows an index.
+define i64 @f7(i64 %src, i64 %index) {
+; CHECK: f7:
+; CHECK: lrvg %r2, 524287({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i64 *
+  %a = load i64 *%ptr
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %swapped
+}
diff --git a/test/CodeGen/SystemZ/bswap-04.ll b/test/CodeGen/SystemZ/bswap-04.ll
new file mode 100644
index 0000000..192327b
--- /dev/null
+++ b/test/CodeGen/SystemZ/bswap-04.ll
@@ -0,0 +1,87 @@
+; Test 32-bit byteswaps from registers to memory.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i32 @llvm.bswap.i32(i32 %a)
+
+; Check STRV with no displacement.
+define void @f1(i32 *%src, i32 %a) {
+; CHECK: f1:
+; CHECK: strv %r3, 0(%r2)
+; CHECK: br %r14
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  store i32 %swapped, i32 *%src
+  ret void
+}
+
+; Check the high end of the aligned STRV range.
+define void @f2(i32 *%src, i32 %a) {
+; CHECK: f2:
+; CHECK: strv %r3, 524284(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  store i32 %swapped, i32 *%ptr
+  ret void
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f3(i32 *%src, i32 %a) {
+; CHECK: f3:
+; CHECK: agfi %r2, 524288
+; CHECK: strv %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  store i32 %swapped, i32 *%ptr
+  ret void
+}
+
+; Check the high end of the negative aligned STRV range.
+define void @f4(i32 *%src, i32 %a) {
+; CHECK: f4:
+; CHECK: strv %r3, -4(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  store i32 %swapped, i32 *%ptr
+  ret void
+}
+
+; Check the low end of the STRV range.
+define void @f5(i32 *%src, i32 %a) {
+; CHECK: f5:
+; CHECK: strv %r3, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  store i32 %swapped, i32 *%ptr
+  ret void
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(i32 *%src, i32 %a) {
+; CHECK: f6:
+; CHECK: agfi %r2, -524292
+; CHECK: strv %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  store i32 %swapped, i32 *%ptr
+  ret void
+}
+
+; Check that STRV allows an index.
+define void @f7(i64 %src, i64 %index, i32 %a) {
+; CHECK: f7:
+; CHECK: strv %r4, 524287({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i32 *
+  %swapped = call i32 @llvm.bswap.i32(i32 %a)
+  store i32 %swapped, i32 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/bswap-05.ll b/test/CodeGen/SystemZ/bswap-05.ll
new file mode 100644
index 0000000..e58cb80
--- /dev/null
+++ b/test/CodeGen/SystemZ/bswap-05.ll
@@ -0,0 +1,87 @@
+; Test 64-bit byteswaps from registers to memory.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i64 @llvm.bswap.i64(i64 %a)
+
+; Check STRVG with no displacement.
+define void @f1(i64 *%src, i64 %a) {
+; CHECK: f1:
+; CHECK: strvg %r3, 0(%r2)
+; CHECK: br %r14
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  store i64 %swapped, i64 *%src
+  ret void
+}
+
+; Check the high end of the aligned STRVG range.
+define void @f2(i64 *%src, i64 %a) {
+; CHECK: f2:
+; CHECK: strvg %r3, 524280(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  store i64 %swapped, i64 *%ptr
+  ret void
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f3(i64 *%src, i64 %a) {
+; CHECK: f3:
+; CHECK: agfi %r2, 524288
+; CHECK: strvg %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  store i64 %swapped, i64 *%ptr
+  ret void
+}
+
+; Check the high end of the negative aligned STRVG range.
+define void @f4(i64 *%src, i64 %a) {
+; CHECK: f4:
+; CHECK: strvg %r3, -8(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -1
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  store i64 %swapped, i64 *%ptr
+  ret void
+}
+
+; Check the low end of the STRVG range.
+define void @f5(i64 *%src, i64 %a) {
+; CHECK: f5:
+; CHECK: strvg %r3, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  store i64 %swapped, i64 *%ptr
+  ret void
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(i64 *%src, i64 %a) {
+; CHECK: f6:
+; CHECK: agfi %r2, -524296
+; CHECK: strvg %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  store i64 %swapped, i64 *%ptr
+  ret void
+}
+
+; Check that STRVG allows an index.
+define void @f7(i64 %src, i64 %index, i64 %a) {
+; CHECK: f7:
+; CHECK: strvg %r4, 524287({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i64 *
+  %swapped = call i64 @llvm.bswap.i64(i64 %a)
+  store i64 %swapped, i64 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/call-01.ll b/test/CodeGen/SystemZ/call-01.ll
new file mode 100644
index 0000000..1b9172b
--- /dev/null
+++ b/test/CodeGen/SystemZ/call-01.ll
@@ -0,0 +1,18 @@
+; Test direct calls.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i64 @bar()
+
+; We must allocate 160 bytes for the callee and save and restore %r14.
+define i64 @f1() {
+; CHECK: f1:
+; CHECK: stmg %r14, %r15, 112(%r15)
+; CHECK: aghi %r15, -160
+; CHECK: brasl %r14, bar@PLT
+; CHECK: lmg %r14, %r15, 272(%r15)
+; CHECK: br %r14
+  %ret = call i64 @bar()
+  %inc = add i64 %ret, 1
+  ret i64 %inc
+}
diff --git a/test/CodeGen/SystemZ/call-02.ll b/test/CodeGen/SystemZ/call-02.ll
new file mode 100644
index 0000000..07dd67b
--- /dev/null
+++ b/test/CodeGen/SystemZ/call-02.ll
@@ -0,0 +1,16 @@
+; Test indirect calls.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; We must allocate 160 bytes for the callee and save and restore %r14.
+define i64 @f1(i64() *%bar) {
+; CHECK: f1:
+; CHECK: stmg %r14, %r15, 112(%r15)
+; CHECK: aghi %r15, -160
+; CHECK: basr %r14, %r2
+; CHECK: lmg %r14, %r15, 272(%r15)
+; CHECK: br %r14
+  %ret = call i64 %bar()
+  %inc = add i64 %ret, 1
+  ret i64 %inc
+}
diff --git a/test/CodeGen/SystemZ/cmpxchg-01.ll b/test/CodeGen/SystemZ/cmpxchg-01.ll
new file mode 100644
index 0000000..477bcb0
--- /dev/null
+++ b/test/CodeGen/SystemZ/cmpxchg-01.ll
@@ -0,0 +1,56 @@
+; Test 8-bit compare and swap.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT
+
+; Check compare and swap with a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.  CHECK-SHIFT also checks that %r3 is not modified before
+;   being used in the RISBG (in contrast to things like atomic addition,
+;   which shift %r3 left so that %b is at the high end of the word).
+define i8 @f1(i8 %dummy, i8 *%src, i8 %cmp, i8 %swap) {
+; CHECK-MAIN: f1:
+; CHECK-MAIN: sllg [[SHIFT:%r[1-9]+]], %r3, 3
+; CHECK-MAIN: nill %r3, 65532
+; CHECK-MAIN: l [[OLD:%r[0-9]+]], 0(%r3)
+; CHECK-MAIN: [[LOOP:\.[^ ]*]]:
+; CHECK-MAIN: rll %r2, [[OLD]], 8([[SHIFT]])
+; CHECK-MAIN: risbg %r4, %r2, 32, 55, 0
+; CHECK-MAIN: cr %r2, %r4
+; CHECK-MAIN: j{{g?}}lh [[EXIT:\.[^ ]*]]
+; CHECK-MAIN: risbg %r5, %r2, 32, 55, 0
+; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r5, -8({{%r[1-9]+}})
+; CHECK-MAIN: cs [[OLD]], [[NEW]], 0(%r3)
+; CHECK-MAIN: j{{g?}}lh [[LOOP]]
+; CHECK-MAIN: [[EXIT]]:
+; CHECK-MAIN-NOT: %r2
+; CHECK-MAIN: br %r14
+;
+; CHECK-SHIFT: f1:
+; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r3, 3
+; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT: rll
+; CHECK-SHIFT: rll {{%r[0-9]+}}, %r5, -8([[NEGSHIFT]])
+  %res = cmpxchg i8 *%src, i8 %cmp, i8 %swap seq_cst
+  ret i8 %res
+}
+
+; Check compare and swap with constants.  We should force the constants into
+; registers and use the sequence above.
+define i8 @f2(i8 *%src) {
+; CHECK: f2:
+; CHECK: lhi [[CMP:%r[0-9]+]], 42
+; CHECK: risbg [[CMP]], {{%r[0-9]+}}, 32, 55, 0
+; CHECK: risbg
+; CHECK: br %r14
+;
+; CHECK-SHIFT: f2:
+; CHECK-SHIFT: lhi [[SWAP:%r[0-9]+]], 88
+; CHECK-SHIFT: risbg
+; CHECK-SHIFT: risbg [[SWAP]], {{%r[0-9]+}}, 32, 55, 0
+; CHECK-SHIFT: br %r14
+  %res = cmpxchg i8 *%src, i8 42, i8 88 seq_cst
+  ret i8 %res
+}
diff --git a/test/CodeGen/SystemZ/cmpxchg-02.ll b/test/CodeGen/SystemZ/cmpxchg-02.ll
new file mode 100644
index 0000000..cc34523
--- /dev/null
+++ b/test/CodeGen/SystemZ/cmpxchg-02.ll
@@ -0,0 +1,56 @@
+; Test 16-bit compare and swap.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-SHIFT
+
+; Check compare and swap with a variable.
+; - CHECK is for the main loop.
+; - CHECK-SHIFT makes sure that the negated shift count used by the second
+;   RLL is set up correctly.  The negation is independent of the NILL and L
+;   tested in CHECK.  CHECK-SHIFT also checks that %r3 is not modified before
+;   being used in the RISBG (in contrast to things like atomic addition,
+;   which shift %r3 left so that %b is at the high end of the word).
+define i16 @f1(i16 %dummy, i16 *%src, i16 %cmp, i16 %swap) {
+; CHECK-MAIN: f1:
+; CHECK-MAIN: sllg [[SHIFT:%r[1-9]+]], %r3, 3
+; CHECK-MAIN: nill %r3, 65532
+; CHECK-MAIN: l [[OLD:%r[0-9]+]], 0(%r3)
+; CHECK-MAIN: [[LOOP:\.[^ ]*]]:
+; CHECK-MAIN: rll %r2, [[OLD]], 16([[SHIFT]])
+; CHECK-MAIN: risbg %r4, %r2, 32, 47, 0
+; CHECK-MAIN: cr %r2, %r4
+; CHECK-MAIN: j{{g?}}lh [[EXIT:\.[^ ]*]]
+; CHECK-MAIN: risbg %r5, %r2, 32, 47, 0
+; CHECK-MAIN: rll [[NEW:%r[0-9]+]], %r5, -16({{%r[1-9]+}})
+; CHECK-MAIN: cs [[OLD]], [[NEW]], 0(%r3)
+; CHECK-MAIN: j{{g?}}lh [[LOOP]]
+; CHECK-MAIN: [[EXIT]]:
+; CHECK-MAIN-NOT: %r2
+; CHECK-MAIN: br %r14
+;
+; CHECK-SHIFT: f1:
+; CHECK-SHIFT: sllg [[SHIFT:%r[1-9]+]], %r3, 3
+; CHECK-SHIFT: lcr [[NEGSHIFT:%r[1-9]+]], [[SHIFT]]
+; CHECK-SHIFT: rll
+; CHECK-SHIFT: rll {{%r[0-9]+}}, %r5, -16([[NEGSHIFT]])
+  %res = cmpxchg i16 *%src, i16 %cmp, i16 %swap seq_cst
+  ret i16 %res
+}
+
+; Check compare and swap with constants.  We should force the constants into
+; registers and use the sequence above.
+define i16 @f2(i16 *%src) {
+; CHECK: f2:
+; CHECK: lhi [[CMP:%r[0-9]+]], 42
+; CHECK: risbg [[CMP]], {{%r[0-9]+}}, 32, 47, 0
+; CHECK: risbg
+; CHECK: br %r14
+;
+; CHECK-SHIFT: f2:
+; CHECK-SHIFT: lhi [[SWAP:%r[0-9]+]], 88
+; CHECK-SHIFT: risbg
+; CHECK-SHIFT: risbg [[SWAP]], {{%r[0-9]+}}, 32, 47, 0
+; CHECK-SHIFT: br %r14
+  %res = cmpxchg i16 *%src, i16 42, i16 88 seq_cst
+  ret i16 %res
+}
diff --git a/test/CodeGen/SystemZ/cmpxchg-03.ll b/test/CodeGen/SystemZ/cmpxchg-03.ll
new file mode 100644
index 0000000..45e224e
--- /dev/null
+++ b/test/CodeGen/SystemZ/cmpxchg-03.ll
@@ -0,0 +1,131 @@
+; Test 32-bit compare and swap.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the CS range.
+define i32 @f1(i32 %cmp, i32 %swap, i32 *%src) {
+; CHECK: f1:
+; CHECK: cs %r2, %r3, 0(%r4)
+; CHECK: br %r14
+  %val = cmpxchg i32 *%src, i32 %cmp, i32 %swap seq_cst
+  ret i32 %val
+}
+
+; Check the high end of the aligned CS range.
+define i32 @f2(i32 %cmp, i32 %swap, i32 *%src) {
+; CHECK: f2:
+; CHECK: cs %r2, %r3, 4092(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1023
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  ret i32 %val
+}
+
+; Check the next word up, which should use CSY instead of CS.
+define i32 @f3(i32 %cmp, i32 %swap, i32 *%src) {
+; CHECK: f3:
+; CHECK: csy %r2, %r3, 4096(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1024
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  ret i32 %val
+}
+
+; Check the high end of the aligned CSY range.
+define i32 @f4(i32 %cmp, i32 %swap, i32 *%src) {
+; CHECK: f4:
+; CHECK: csy %r2, %r3, 524284(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  ret i32 %val
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f5(i32 %cmp, i32 %swap, i32 *%src) {
+; CHECK: f5:
+; CHECK: agfi %r4, 524288
+; CHECK: cs %r2, %r3, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  ret i32 %val
+}
+
+; Check the high end of the negative aligned CSY range.
+define i32 @f6(i32 %cmp, i32 %swap, i32 *%src) {
+; CHECK: f6:
+; CHECK: csy %r2, %r3, -4(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  ret i32 %val
+}
+
+; Check the low end of the CSY range.
+define i32 @f7(i32 %cmp, i32 %swap, i32 *%src) {
+; CHECK: f7:
+; CHECK: csy %r2, %r3, -524288(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  ret i32 %val
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f8(i32 %cmp, i32 %swap, i32 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r4, -524292
+; CHECK: cs %r2, %r3, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  ret i32 %val
+}
+
+; Check that CS does not allow an index.
+define i32 @f9(i32 %cmp, i32 %swap, i64 %src, i64 %index) {
+; CHECK: f9:
+; CHECK: agr %r4, %r5
+; CHECK: cs %r2, %r3, 0(%r4)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %ptr = inttoptr i64 %add1 to i32 *
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  ret i32 %val
+}
+
+; Check that CSY does not allow an index.
+define i32 @f10(i32 %cmp, i32 %swap, i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: agr %r4, %r5
+; CHECK: csy %r2, %r3, 4096(%r4)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i32 *
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 %swap seq_cst
+  ret i32 %val
+}
+
+; Check that a constant %cmp value is loaded into a register first.
+define i32 @f11(i32 %dummy, i32 %swap, i32 *%ptr) {
+; CHECK: f11:
+; CHECK: lhi %r2, 1001
+; CHECK: cs %r2, %r3, 0(%r4)
+; CHECK: br %r14
+  %val = cmpxchg i32 *%ptr, i32 1001, i32 %swap seq_cst
+  ret i32 %val
+}
+
+; Check that a constant %swap value is loaded into a register first.
+define i32 @f12(i32 %cmp, i32 *%ptr) {
+; CHECK: f12:
+; CHECK: lhi [[SWAP:%r[0-9]+]], 1002
+; CHECK: cs %r2, [[SWAP]], 0(%r3)
+; CHECK: br %r14
+  %val = cmpxchg i32 *%ptr, i32 %cmp, i32 1002 seq_cst
+  ret i32 %val
+}
diff --git a/test/CodeGen/SystemZ/cmpxchg-04.ll b/test/CodeGen/SystemZ/cmpxchg-04.ll
new file mode 100644
index 0000000..f8969ee
--- /dev/null
+++ b/test/CodeGen/SystemZ/cmpxchg-04.ll
@@ -0,0 +1,98 @@
+; Test 64-bit compare and swap.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check CSG without a displacement.
+define i64 @f1(i64 %cmp, i64 %swap, i64 *%src) {
+; CHECK: f1:
+; CHECK: csg %r2, %r3, 0(%r4)
+; CHECK: br %r14
+  %val = cmpxchg i64 *%src, i64 %cmp, i64 %swap seq_cst
+  ret i64 %val
+}
+
+; Check the high end of the aligned CSG range.
+define i64 @f2(i64 %cmp, i64 %swap, i64 *%src) {
+; CHECK: f2:
+; CHECK: csg %r2, %r3, 524280(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst
+  ret i64 %val
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f3(i64 %cmp, i64 %swap, i64 *%src) {
+; CHECK: f3:
+; CHECK: agfi %r4, 524288
+; CHECK: csg %r2, %r3, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst
+  ret i64 %val
+}
+
+; Check the high end of the negative aligned CSG range.
+define i64 @f4(i64 %cmp, i64 %swap, i64 *%src) {
+; CHECK: f4:
+; CHECK: csg %r2, %r3, -8(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -1
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst
+  ret i64 %val
+}
+
+; Check the low end of the CSG range.
+define i64 @f5(i64 %cmp, i64 %swap, i64 *%src) {
+; CHECK: f5:
+; CHECK: csg %r2, %r3, -524288(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst
+  ret i64 %val
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f6(i64 %cmp, i64 %swap, i64 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r4, -524296
+; CHECK: csg %r2, %r3, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst
+  ret i64 %val
+}
+
+; Check that CSG does not allow an index.
+define i64 @f7(i64 %cmp, i64 %swap, i64 %src, i64 %index) {
+; CHECK: f7:
+; CHECK: agr %r4, %r5
+; CHECK: csg %r2, %r3, 0(%r4)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %ptr = inttoptr i64 %add1 to i64 *
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 %swap seq_cst
+  ret i64 %val
+}
+
+; Check that a constant %cmp value is loaded into a register first.
+define i64 @f8(i64 %dummy, i64 %swap, i64 *%ptr) {
+; CHECK: f8:
+; CHECK: lghi %r2, 1001
+; CHECK: csg %r2, %r3, 0(%r4)
+; CHECK: br %r14
+  %val = cmpxchg i64 *%ptr, i64 1001, i64 %swap seq_cst
+  ret i64 %val
+}
+
+; Check that a constant %swap value is loaded into a register first.
+define i64 @f9(i64 %cmp, i64 *%ptr) {
+; CHECK: f9:
+; CHECK: lghi [[SWAP:%r[0-9]+]], 1002
+; CHECK: csg %r2, [[SWAP]], 0(%r3)
+; CHECK: br %r14
+  %val = cmpxchg i64 *%ptr, i64 %cmp, i64 1002 seq_cst
+  ret i64 %val
+}
diff --git a/test/CodeGen/SystemZ/fp-abs-01.ll b/test/CodeGen/SystemZ/fp-abs-01.ll
new file mode 100644
index 0000000..81b3fb2
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-abs-01.ll
@@ -0,0 +1,40 @@
+; Test floating-point absolute.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test f32.
+declare float @llvm.fabs.f32(float %f)
+define float @f1(float %f) {
+; CHECK: f1:
+; CHECK: lpebr %f0, %f0
+; CHECK: br %r14
+  %res = call float @llvm.fabs.f32(float %f)
+  ret float %res
+}
+
+; Test f64.
+declare double @llvm.fabs.f64(double %f)
+define double @f2(double %f) {
+; CHECK: f2:
+; CHECK: lpdbr %f0, %f0
+; CHECK: br %r14
+  %res = call double @llvm.fabs.f64(double %f)
+  ret double %res
+}
+
+; Test f128.  With the loads and stores, a pure absolute would probably
+; be better implemented using an NI on the upper byte.  Do some extra
+; processing so that using FPRs is unequivocally better.
+declare fp128 @llvm.fabs.f128(fp128 %f)
+define void @f3(fp128 *%ptr, fp128 *%ptr2) {
+; CHECK: f3:
+; CHECK: lpxbr
+; CHECK: dxbr
+; CHECK: br %r14
+  %orig = load fp128 *%ptr
+  %abs = call fp128 @llvm.fabs.f128(fp128 %orig)
+  %op2 = load fp128 *%ptr2
+  %res = fdiv fp128 %abs, %op2
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-abs-02.ll b/test/CodeGen/SystemZ/fp-abs-02.ll
new file mode 100644
index 0000000..513d49c
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-abs-02.ll
@@ -0,0 +1,43 @@
+; Test negated floating-point absolute.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test f32.
+declare float @llvm.fabs.f32(float %f)
+define float @f1(float %f) {
+; CHECK: f1:
+; CHECK: lnebr %f0, %f0
+; CHECK: br %r14
+  %abs = call float @llvm.fabs.f32(float %f)
+  %res = fsub float -0.0, %abs
+  ret float %res
+}
+
+; Test f64.
+declare double @llvm.fabs.f64(double %f)
+define double @f2(double %f) {
+; CHECK: f2:
+; CHECK: lndbr %f0, %f0
+; CHECK: br %r14
+  %abs = call double @llvm.fabs.f64(double %f)
+  %res = fsub double -0.0, %abs
+  ret double %res
+}
+
+; Test f128.  With the loads and stores, a pure negative-absolute would
+; probably be better implemented using an OI on the upper byte.  Do some
+; extra processing so that using FPRs is unequivocally better.
+declare fp128 @llvm.fabs.f128(fp128 %f)
+define void @f3(fp128 *%ptr, fp128 *%ptr2) {
+; CHECK: f3:
+; CHECK: lnxbr
+; CHECK: dxbr
+; CHECK: br %r14
+  %orig = load fp128 *%ptr
+  %abs = call fp128 @llvm.fabs.f128(fp128 %orig)
+  %negabs = fsub fp128 0xL00000000000000008000000000000000, %abs
+  %op2 = load fp128 *%ptr2
+  %res = fdiv fp128 %negabs, %op2
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-add-01.ll b/test/CodeGen/SystemZ/fp-add-01.ll
new file mode 100644
index 0000000..7ce0777
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-add-01.ll
@@ -0,0 +1,71 @@
+; Test 32-bit floating-point addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register addition.
+define float @f1(float %f1, float %f2) {
+; CHECK: f1:
+; CHECK: aebr %f0, %f2
+; CHECK: br %r14
+  %res = fadd float %f1, %f2
+  ret float %res
+}
+
+; Check the low end of the AEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK: f2:
+; CHECK: aeb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float *%ptr
+  %res = fadd float %f1, %f2
+  ret float %res
+}
+
+; Check the high end of the aligned AEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK: f3:
+; CHECK: aeb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1023
+  %f2 = load float *%ptr
+  %res = fadd float %f1, %f2
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: aeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1024
+  %f2 = load float *%ptr
+  %res = fadd float %f1, %f2
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK: f5:
+; CHECK: aghi %r2, -4
+; CHECK: aeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 -1
+  %f2 = load float *%ptr
+  %res = fadd float %f1, %f2
+  ret float %res
+}
+
+; Check that AEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: aeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%base, i64 %index
+  %ptr2 = getelementptr float *%ptr1, i64 100
+  %f2 = load float *%ptr2
+  %res = fadd float %f1, %f2
+  ret float %res
+}
diff --git a/test/CodeGen/SystemZ/fp-add-02.ll b/test/CodeGen/SystemZ/fp-add-02.ll
new file mode 100644
index 0000000..08eb90e
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-add-02.ll
@@ -0,0 +1,71 @@
+; Test 64-bit floating-point addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register addition.
+define double @f1(double %f1, double %f2) {
+; CHECK: f1:
+; CHECK: adbr %f0, %f2
+; CHECK: br %r14
+  %res = fadd double %f1, %f2
+  ret double %res
+}
+
+; Check the low end of the ADB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK: f2:
+; CHECK: adb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load double *%ptr
+  %res = fadd double %f1, %f2
+  ret double %res
+}
+
+; Check the high end of the aligned ADB range.
+define double @f3(double %f1, double *%base) {
+; CHECK: f3:
+; CHECK: adb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 511
+  %f2 = load double *%ptr
+  %res = fadd double %f1, %f2
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: adb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 512
+  %f2 = load double *%ptr
+  %res = fadd double %f1, %f2
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK: f5:
+; CHECK: aghi %r2, -8
+; CHECK: adb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 -1
+  %f2 = load double *%ptr
+  %res = fadd double %f1, %f2
+  ret double %res
+}
+
+; Check that ADB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: adb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%base, i64 %index
+  %ptr2 = getelementptr double *%ptr1, i64 100
+  %f2 = load double *%ptr2
+  %res = fadd double %f1, %f2
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/fp-add-03.ll b/test/CodeGen/SystemZ/fp-add-03.ll
new file mode 100644
index 0000000..13ffb02
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-add-03.ll
@@ -0,0 +1,20 @@
+; Test 128-bit floating-point addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; There is no memory form of 128-bit addition.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK: f1:
+; CHECK: lxebr %f0, %f0
+; CHECK: ld %f1, 0(%r2)
+; CHECK: ld %f3, 8(%r2)
+; CHECK: axbr %f1, %f0
+; CHECK: std %f1, 0(%r2)
+; CHECK: std %f3, 8(%r2)
+; CHECK: br %r14
+  %f1 = load fp128 *%ptr
+  %f2x = fpext float %f2 to fp128
+  %sum = fadd fp128 %f1, %f2x
+  store fp128 %sum, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-cmp-01.ll b/test/CodeGen/SystemZ/fp-cmp-01.ll
new file mode 100644
index 0000000..b80a715
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-cmp-01.ll
@@ -0,0 +1,89 @@
+; Test 32-bit floating-point comparison.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check comparison with registers.
+define i64 @f1(i64 %a, i64 %b, float %f1, float %f2) {
+; CHECK: f1:
+; CHECK: cebr %f0, %f2
+; CHECK-NEXT: j{{g?}}e
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %cond = fcmp oeq float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Check the low end of the CEB range.
+define i64 @f2(i64 %a, i64 %b, float %f1, float *%ptr) {
+; CHECK: f2:
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: j{{g?}}e
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f2 = load float *%ptr
+  %cond = fcmp oeq float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Check the high end of the aligned CEB range.
+define i64 @f3(i64 %a, i64 %b, float %f1, float *%base) {
+; CHECK: f3:
+; CHECK: ceb %f0, 4092(%r4)
+; CHECK-NEXT: j{{g?}}e
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1023
+  %f2 = load float *%ptr
+  %cond = fcmp oeq float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i64 %b, float %f1, float *%base) {
+; CHECK: f4:
+; CHECK: aghi %r4, 4096
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: j{{g?}}e
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1024
+  %f2 = load float *%ptr
+  %cond = fcmp oeq float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define i64 @f5(i64 %a, i64 %b, float %f1, float *%base) {
+; CHECK: f5:
+; CHECK: aghi %r4, -4
+; CHECK: ceb %f0, 0(%r4)
+; CHECK-NEXT: j{{g?}}e
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 -1
+  %f2 = load float *%ptr
+  %cond = fcmp oeq float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Check that CEB allows indices.
+define i64 @f6(i64 %a, i64 %b, float %f1, float *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r5, 2
+; CHECK: ceb %f0, 400(%r1,%r4)
+; CHECK-NEXT: j{{g?}}e
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%base, i64 %index
+  %ptr2 = getelementptr float *%ptr1, i64 100
+  %f2 = load float *%ptr2
+  %cond = fcmp oeq float %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/fp-cmp-02.ll b/test/CodeGen/SystemZ/fp-cmp-02.ll
new file mode 100644
index 0000000..8227308
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-cmp-02.ll
@@ -0,0 +1,89 @@
+; Test 64-bit floating-point comparison.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check comparison with registers.
+define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) {
+; CHECK: f1:
+; CHECK: cdbr %f0, %f2
+; CHECK-NEXT: j{{g?}}e
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %cond = fcmp oeq double %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Check the low end of the CDB range.
+define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) {
+; CHECK: f2:
+; CHECK: cdb %f0, 0(%r4)
+; CHECK-NEXT: j{{g?}}e
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f2 = load double *%ptr
+  %cond = fcmp oeq double %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Check the high end of the aligned CDB range.
+define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) {
+; CHECK: f3:
+; CHECK: cdb %f0, 4088(%r4)
+; CHECK-NEXT: j{{g?}}e
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 511
+  %f2 = load double *%ptr
+  %cond = fcmp oeq double %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) {
+; CHECK: f4:
+; CHECK: aghi %r4, 4096
+; CHECK: cdb %f0, 0(%r4)
+; CHECK-NEXT: j{{g?}}e
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 512
+  %f2 = load double *%ptr
+  %cond = fcmp oeq double %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) {
+; CHECK: f5:
+; CHECK: aghi %r4, -8
+; CHECK: cdb %f0, 0(%r4)
+; CHECK-NEXT: j{{g?}}e
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 -1
+  %f2 = load double *%ptr
+  %cond = fcmp oeq double %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
+
+; Check that CDB allows indices.
+define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r5, 3
+; CHECK: cdb %f0, 800(%r1,%r4)
+; CHECK-NEXT: j{{g?}}e
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%base, i64 %index
+  %ptr2 = getelementptr double *%ptr1, i64 100
+  %f2 = load double *%ptr2
+  %cond = fcmp oeq double %f1, %f2
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/fp-cmp-03.ll b/test/CodeGen/SystemZ/fp-cmp-03.ll
new file mode 100644
index 0000000..fd12c93
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-cmp-03.ll
@@ -0,0 +1,20 @@
+; Test 128-bit floating-point comparison.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; There is no memory form of 128-bit comparison.
+define i64 @f1(i64 %a, i64 %b, fp128 *%ptr, float %f2) {
+; CHECK: f1:
+; CHECK: lxebr %f0, %f0
+; CHECK: ld %f1, 0(%r4)
+; CHECK: ld %f3, 8(%r4)
+; CHECK: cxbr %f1, %f0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  %f2x = fpext float %f2 to fp128
+  %f1 = load fp128 *%ptr
+  %cond = fcmp oeq fp128 %f1, %f2x
+  %res = select i1 %cond, i64 %a, i64 %b
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/fp-const-01.ll b/test/CodeGen/SystemZ/fp-const-01.ll
new file mode 100644
index 0000000..65209d6
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-const-01.ll
@@ -0,0 +1,30 @@
+; Test loads of floating-point zero.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test f32.
+define float @f1() {
+; CHECK: f1:
+; CHECK: lzer %f0
+; CHECK: br %r14
+  ret float 0.0
+}
+
+; Test f64.
+define double @f2() {
+; CHECK: f2:
+; CHECK: lzdr %f0
+; CHECK: br %r14
+  ret double 0.0
+}
+
+; Test f128.
+define void @f3(fp128 *%x) {
+; CHECK: f3:
+; CHECK: lzxr %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  store fp128 0xL00000000000000000000000000000000, fp128 *%x
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-const-02.ll b/test/CodeGen/SystemZ/fp-const-02.ll
new file mode 100644
index 0000000..2dedf54
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-const-02.ll
@@ -0,0 +1,31 @@
+; Test loads of negative floating-point zero.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test f32.
+define float @f1() {
+; CHECK: f1:
+; CHECK: lzer [[REGISTER:%f[0-5]+]]
+; CHECK: lcebr %f0, [[REGISTER]]
+; CHECK: br %r14
+  ret float -0.0
+}
+
+; Test f64.
+define double @f2() {
+; CHECK: f2:
+; CHECK: lzdr [[REGISTER:%f[0-5]+]]
+; CHECK: lcdbr %f0, [[REGISTER]]
+; CHECK: br %r14
+  ret double -0.0
+}
+
+; Test f128.
+define void @f3(fp128 *%x) {
+; CHECK: f3:
+; CHECK: lzxr [[REGISTER:%f[0-5]+]]
+; CHECK: lcxbr %f0, [[REGISTER]]
+; CHECK: br %r14
+  store fp128 0xL00000000000000008000000000000000, fp128 *%x
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-const-03.ll b/test/CodeGen/SystemZ/fp-const-03.ll
new file mode 100644
index 0000000..4c287e4
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-const-03.ll
@@ -0,0 +1,14 @@
+; Test loads of 32-bit floating-point constants.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
+
+define float @f1() {
+; CHECK: f1:
+; CHECK: larl [[REGISTER:%r[1-5]]], {{.*}}
+; CHECK: le %f0, 0([[REGISTER]])
+; CHECK: br %r14
+;
+; CONST: .long 1065353217
+  ret float 0x3ff0000020000000
+}
diff --git a/test/CodeGen/SystemZ/fp-const-04.ll b/test/CodeGen/SystemZ/fp-const-04.ll
new file mode 100644
index 0000000..847c380
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-const-04.ll
@@ -0,0 +1,15 @@
+; Test loads of 64-bit floating-point constants that can be represented
+; as 32-bit constants.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
+
+define double @f1() {
+; CHECK: f1:
+; CHECK: larl [[REGISTER:%r[1-5]]], {{.*}}
+; CHECK: ldeb %f0, 0([[REGISTER]])
+; CHECK: br %r14
+;
+; CONST: .long 1065353217
+  ret double 0x3ff0000020000000
+}
diff --git a/test/CodeGen/SystemZ/fp-const-05.ll b/test/CodeGen/SystemZ/fp-const-05.ll
new file mode 100644
index 0000000..48f84ce
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-const-05.ll
@@ -0,0 +1,18 @@
+; Test loads of 128-bit floating-point constants that can be represented
+; as 32-bit constants.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
+
+define void @f1(fp128 *%x) {
+; CHECK: f1:
+; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}}
+; CHECK: lxeb %f0, 0([[REGISTER]])
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+;
+; CONST: .long 1065353217
+  store fp128 0xL00000000000000003fff000002000000, fp128 *%x
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-const-06.ll b/test/CodeGen/SystemZ/fp-const-06.ll
new file mode 100644
index 0000000..1da3d5e
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-const-06.ll
@@ -0,0 +1,14 @@
+; Test loads of 64-bit floating-point constants.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
+
+define double @f1() {
+; CHECK: f1:
+; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}}
+; CHECK: ld %f0, 0([[REGISTER]])
+; CHECK: br %r14
+;
+; CONST: .quad 4607182419068452864
+  ret double 0x3ff0000010000000
+}
diff --git a/test/CodeGen/SystemZ/fp-const-07.ll b/test/CodeGen/SystemZ/fp-const-07.ll
new file mode 100644
index 0000000..5a10845
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-const-07.ll
@@ -0,0 +1,18 @@
+; Test loads of 128-bit floating-point constants that can be represented
+; as 64-bit constants.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
+
+define void @f1(fp128 *%x) {
+; CHECK: f1:
+; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}}
+; CHECK: lxdb %f0, 0([[REGISTER]])
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+;
+; CONST: .quad 4607182419068452864
+  store fp128 0xL00000000000000003fff000001000000, fp128 *%x
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-const-08.ll b/test/CodeGen/SystemZ/fp-const-08.ll
new file mode 100644
index 0000000..6a8a1ab
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-const-08.ll
@@ -0,0 +1,21 @@
+; Test loads of 128-bit floating-point constants.  This value would actually
+; fit within the x86 80-bit format, so the test make sure we don't try to
+; extend from an f80.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
+
+define void @f1(fp128 *%x) {
+; CHECK: f1:
+; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}}
+; CHECK: ld %f0, 0([[REGISTER]])
+; CHECK: ld %f2, 8([[REGISTER]])
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+;
+; CONST: .quad 4611404543450677248
+; CONST: .quad 576460752303423488
+  store fp128 0xL08000000000000003fff000000000000, fp128 *%x
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-const-09.ll b/test/CodeGen/SystemZ/fp-const-09.ll
new file mode 100644
index 0000000..435dcba
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-const-09.ll
@@ -0,0 +1,20 @@
+; Test loads of 128-bit floating-point constants in which the low bit of
+; the significand is set.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CONST
+
+define void @f1(fp128 *%x) {
+; CHECK: f1:
+; CHECK: larl [[REGISTER:%r[1-5]+]], {{.*}}
+; CHECK: ld %f0, 0([[REGISTER]])
+; CHECK: ld %f2, 8([[REGISTER]])
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+;
+; CONST: .quad 4611404543450677248
+; CONST: .quad 1
+  store fp128 0xL00000000000000013fff000000000000, fp128 *%x
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-01.ll b/test/CodeGen/SystemZ/fp-conv-01.ll
new file mode 100644
index 0000000..6c8ef48
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-conv-01.ll
@@ -0,0 +1,61 @@
+; Test floating-point truncations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test f64->f32.
+define float @f1(double %d1, double %d2) {
+; CHECK: f1:
+; CHECK: ledbr %f0, %f2
+; CHECK: br %r14
+  %res = fptrunc double %d2 to float
+  ret float %res
+}
+
+; Test f128->f32.
+define float @f2(fp128 *%ptr) {
+; CHECK: f2:
+; CHECK: lexbr %f0, %f0
+; CHECK: br %r14
+  %val = load fp128 *%ptr
+  %res = fptrunc fp128 %val to float
+  ret float %res
+}
+
+; Make sure that we don't use %f0 as the destination of LEXBR when %f2
+; is still live.
+define void @f3(float *%dst, fp128 *%ptr, float %d1, float %d2) {
+; CHECK: f3:
+; CHECK: lexbr %f1, %f1
+; CHECK: aebr %f1, %f2
+; CHECK: ste %f1, 0(%r2)
+; CHECK: br %r14
+  %val = load fp128 *%ptr
+  %conv = fptrunc fp128 %val to float
+  %res = fadd float %conv, %d2
+  store float %res, float *%dst
+  ret void
+}
+
+; Test f128->f64.
+define double @f4(fp128 *%ptr) {
+; CHECK: f4:
+; CHECK: ldxbr %f0, %f0
+; CHECK: br %r14
+  %val = load fp128 *%ptr
+  %res = fptrunc fp128 %val to double
+  ret double %res
+}
+
+; Like f3, but for f128->f64.
+define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) {
+; CHECK: f5:
+; CHECK: ldxbr %f1, %f1
+; CHECK: adbr %f1, %f2
+; CHECK: std %f1, 0(%r2)
+; CHECK: br %r14
+  %val = load fp128 *%ptr
+  %conv = fptrunc fp128 %val to double
+  %res = fadd double %conv, %d2
+  store double %res, double *%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-02.ll b/test/CodeGen/SystemZ/fp-conv-02.ll
new file mode 100644
index 0000000..f284e1d
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-conv-02.ll
@@ -0,0 +1,71 @@
+; Test extensions of f32 to f64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register extension.
+define double @f1(float %val) {
+; CHECK: f1:
+; CHECK: ldebr %f0, %f0
+; CHECK: br %r14
+  %res = fpext float %val to double
+  ret double %res
+}
+
+; Check the low end of the LDEB range.
+define double @f2(float *%ptr) {
+; CHECK: f2:
+; CHECK: ldeb %f0, 0(%r2)
+; CHECK: br %r14
+  %val = load float *%ptr
+  %res = fpext float %val to double
+  ret double %res
+}
+
+; Check the high end of the aligned LDEB range.
+define double @f3(float *%base) {
+; CHECK: f3:
+; CHECK: ldeb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1023
+  %val = load float *%ptr
+  %res = fpext float %val to double
+  ret double %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(float *%base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: ldeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1024
+  %val = load float *%ptr
+  %res = fpext float %val to double
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(float *%base) {
+; CHECK: f5:
+; CHECK: aghi %r2, -4
+; CHECK: ldeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 -1
+  %val = load float *%ptr
+  %res = fpext float %val to double
+  ret double %res
+}
+
+; Check that LDEB allows indices.
+define double @f6(float *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: ldeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%base, i64 %index
+  %ptr2 = getelementptr float *%ptr1, i64 100
+  %val = load float *%ptr2
+  %res = fpext float %val to double
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-03.ll b/test/CodeGen/SystemZ/fp-conv-03.ll
new file mode 100644
index 0000000..703a141
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-conv-03.ll
@@ -0,0 +1,89 @@
+; Test extensions of f32 to f128.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register extension.
+define void @f1(fp128 *%dst, float %val) {
+; CHECK: f1:
+; CHECK: lxebr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %res = fpext float %val to fp128
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the low end of the LXEB range.
+define void @f2(fp128 *%dst, float *%ptr) {
+; CHECK: f2:
+; CHECK: lxeb %f0, 0(%r3)
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %val = load float *%ptr
+  %res = fpext float %val to fp128
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the high end of the aligned LXEB range.
+define void @f3(fp128 *%dst, float *%base) {
+; CHECK: f3:
+; CHECK: lxeb %f0, 4092(%r3)
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1023
+  %val = load float *%ptr
+  %res = fpext float %val to fp128
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f4(fp128 *%dst, float *%base) {
+; CHECK: f4:
+; CHECK: aghi %r3, 4096
+; CHECK: lxeb %f0, 0(%r3)
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1024
+  %val = load float *%ptr
+  %res = fpext float %val to fp128
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check negative displacements, which also need separate address logic.
+define void @f5(fp128 *%dst, float *%base) {
+; CHECK: f5:
+; CHECK: aghi %r3, -4
+; CHECK: lxeb %f0, 0(%r3)
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 -1
+  %val = load float *%ptr
+  %res = fpext float %val to fp128
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check that LXEB allows indices.
+define void @f6(fp128 *%dst, float *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r4, 2
+; CHECK: lxeb %f0, 400(%r1,%r3)
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%base, i64 %index
+  %ptr2 = getelementptr float *%ptr1, i64 100
+  %val = load float *%ptr2
+  %res = fpext float %val to fp128
+  store fp128 %res, fp128 *%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-04.ll b/test/CodeGen/SystemZ/fp-conv-04.ll
new file mode 100644
index 0000000..b7b5166
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-conv-04.ll
@@ -0,0 +1,89 @@
+; Test extensions of f64 to f128.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register extension.
+define void @f1(fp128 *%dst, double %val) {
+; CHECK: f1:
+; CHECK: lxdbr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %res = fpext double %val to fp128
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the low end of the LXDB range.
+define void @f2(fp128 *%dst, double *%ptr) {
+; CHECK: f2:
+; CHECK: lxdb %f0, 0(%r3)
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %val = load double *%ptr
+  %res = fpext double %val to fp128
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the high end of the aligned LXDB range.
+define void @f3(fp128 *%dst, double *%base) {
+; CHECK: f3:
+; CHECK: lxdb %f0, 4088(%r3)
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 511
+  %val = load double *%ptr
+  %res = fpext double %val to fp128
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f4(fp128 *%dst, double *%base) {
+; CHECK: f4:
+; CHECK: aghi %r3, 4096
+; CHECK: lxdb %f0, 0(%r3)
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 512
+  %val = load double *%ptr
+  %res = fpext double %val to fp128
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check negative displacements, which also need separate address logic.
+define void @f5(fp128 *%dst, double *%base) {
+; CHECK: f5:
+; CHECK: aghi %r3, -8
+; CHECK: lxdb %f0, 0(%r3)
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 -1
+  %val = load double *%ptr
+  %res = fpext double %val to fp128
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check that LXDB allows indices.
+define void @f6(fp128 *%dst, double *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r4, 3
+; CHECK: lxdb %f0, 800(%r1,%r3)
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%base, i64 %index
+  %ptr2 = getelementptr double *%ptr1, i64 100
+  %val = load double *%ptr2
+  %res = fpext double %val to fp128
+  store fp128 %res, fp128 *%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-05.ll b/test/CodeGen/SystemZ/fp-conv-05.ll
new file mode 100644
index 0000000..2d88732
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-conv-05.ll
@@ -0,0 +1,33 @@
+; Test conversions of signed i32s to floating-point values.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check i32->f32.
+define float @f1(i32 %i) {
+; CHECK: f1:
+; CHECK: cefbr %f0, %r2
+; CHECK: br %r14
+  %conv = sitofp i32 %i to float
+  ret float %conv
+}
+
+; Check i32->f64.
+define double @f2(i32 %i) {
+; CHECK: f2:
+; CHECK: cdfbr %f0, %r2
+; CHECK: br %r14
+  %conv = sitofp i32 %i to double
+  ret double %conv
+}
+
+; Check i32->f128.
+define void @f3(i32 %i, fp128 *%dst) {
+; CHECK: f3:
+; CHECK: cxfbr %f0, %r2
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %conv = sitofp i32 %i to fp128
+  store fp128 %conv, fp128 *%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-06.ll b/test/CodeGen/SystemZ/fp-conv-06.ll
new file mode 100644
index 0000000..1b39b67
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-conv-06.ll
@@ -0,0 +1,37 @@
+; Test conversions of unsigned i32s to floating-point values.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check i32->f32.  There is no native instruction, so we must promote
+; to i64 first.
+define float @f1(i32 %i) {
+; CHECK: f1:
+; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2
+; CHECK: cegbr %f0, [[REGISTER]]
+; CHECK: br %r14
+  %conv = uitofp i32 %i to float
+  ret float %conv
+}
+
+; Check i32->f64.
+define double @f2(i32 %i) {
+; CHECK: f2:
+; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2
+; CHECK: cdgbr %f0, [[REGISTER]]
+; CHECK: br %r14
+  %conv = uitofp i32 %i to double
+  ret double %conv
+}
+
+; Check i32->f128.
+define void @f3(i32 %i, fp128 *%dst) {
+; CHECK: f3:
+; CHECK: llgfr [[REGISTER:%r[0-5]]], %r2
+; CHECK: cxgbr %f0, [[REGISTER]]
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %conv = uitofp i32 %i to fp128
+  store fp128 %conv, fp128 *%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-07.ll b/test/CodeGen/SystemZ/fp-conv-07.ll
new file mode 100644
index 0000000..0ebbd37
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-conv-07.ll
@@ -0,0 +1,33 @@
+; Test conversions of signed i64s to floating-point values.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test i64->f32.
+define float @f1(i64 %i) {
+; CHECK: f1:
+; CHECK: cegbr %f0, %r2
+; CHECK: br %r14
+  %conv = sitofp i64 %i to float
+  ret float %conv
+}
+
+; Test i64->f64.
+define double @f2(i64 %i) {
+; CHECK: f2:
+; CHECK: cdgbr %f0, %r2
+; CHECK: br %r14
+  %conv = sitofp i64 %i to double
+  ret double %conv
+}
+
+; Test i64->f128.
+define void @f3(i64 %i, fp128 *%dst) {
+; CHECK: f3:
+; CHECK: cxgbr %f0, %r2
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %conv = sitofp i64 %i to fp128
+  store fp128 %conv, fp128 *%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-08.ll b/test/CodeGen/SystemZ/fp-conv-08.ll
new file mode 100644
index 0000000..20c4e30
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-conv-08.ll
@@ -0,0 +1,35 @@
+; Test conversions of unsigned i64s to floating-point values.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test i64->f32.  There's no native support for unsigned i64-to-fp conversions,
+; but we should be able to implement them using signed i64-to-fp conversions.
+define float @f1(i64 %i) {
+; CHECK: f1:
+; CHECK: cegbr
+; CHECK: aebr
+; CHECK: br %r14
+  %conv = uitofp i64 %i to float
+  ret float %conv
+}
+
+; Test i64->f64.
+define double @f2(i64 %i) {
+; CHECK: f2:
+; CHECK: ldgr
+; CHECL: adbr
+; CHECK: br %r14
+  %conv = uitofp i64 %i to double
+  ret double %conv
+}
+
+; Test i64->f128.
+define void @f3(i64 %i, fp128 *%dst) {
+; CHECK: f3:
+; CHECK: cxgbr
+; CHECK: axbr
+; CHECK: br %r14
+  %conv = uitofp i64 %i to fp128
+  store fp128 %conv, fp128 *%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-09.ll b/test/CodeGen/SystemZ/fp-conv-09.ll
new file mode 100644
index 0000000..e3c0352
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-conv-09.ll
@@ -0,0 +1,33 @@
+; Test conversion of floating-point values to signed i32s.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test f32->i32.
+define i32 @f1(float %f) {
+; CHECK: f1:
+; CHECK: cfebr %r2, 5, %f0
+; CHECK: br %r14
+  %conv = fptosi float %f to i32
+  ret i32 %conv
+}
+
+; Test f64->i32.
+define i32 @f2(double %f) {
+; CHECK: f2:
+; CHECK: cfdbr %r2, 5, %f0
+; CHECK: br %r14
+  %conv = fptosi double %f to i32
+  ret i32 %conv
+}
+
+; Test f128->i32.
+define i32 @f3(fp128 *%src) {
+; CHECK: f3:
+; CHECK: ld %f0, 0(%r2)
+; CHECK: ld %f2, 8(%r2)
+; CHECK: cfxbr %r2, 5, %f0
+; CHECK: br %r14
+  %f = load fp128 *%src
+  %conv = fptosi fp128 %f to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-10.ll b/test/CodeGen/SystemZ/fp-conv-10.ll
new file mode 100644
index 0000000..bb8878b
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-conv-10.ll
@@ -0,0 +1,45 @@
+; Test conversion of floating-point values to unsigned i32s.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; z10 doesn't have native support for unsigned fp-to-i32 conversions;
+; they were added in z196 as the Convert to Logical family of instructions.
+; Promoting to i64 doesn't generate an inexact condition for values that are
+; outside the i32 range but in the i64 range, so use the default expansion.
+
+; Test f32->i32.
+define i32 @f1(float %f) {
+; CHECK: f1:
+; CHECK: cebr
+; CHECK: sebr
+; CHECK: cfebr
+; CHECK: xilf
+; CHECK: br %r14
+  %conv = fptoui float %f to i32
+  ret i32 %conv
+}
+
+; Test f64->i32.
+define i32 @f2(double %f) {
+; CHECK: f2:
+; CHECK: cdbr
+; CHECK: sdbr
+; CHECK: cfdbr
+; CHECK: xilf
+; CHECK: br %r14
+  %conv = fptoui double %f to i32
+  ret i32 %conv
+}
+
+; Test f128->i32.
+define i32 @f3(fp128 *%src) {
+; CHECK: f3:
+; CHECK: cxbr
+; CHECK: sxbr
+; CHECK: cfxbr
+; CHECK: xilf
+; CHECK: br %r14
+  %f = load fp128 *%src
+  %conv = fptoui fp128 %f to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-11.ll b/test/CodeGen/SystemZ/fp-conv-11.ll
new file mode 100644
index 0000000..2a36cb9
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-conv-11.ll
@@ -0,0 +1,33 @@
+; Test conversion of floating-point values to signed i64s.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test f32->i64.
+define i64 @f1(float %f) {
+; CHECK: f1:
+; CHECK: cgebr %r2, 5, %f0
+; CHECK: br %r14
+  %conv = fptosi float %f to i64
+  ret i64 %conv
+}
+
+; Test f64->i64.
+define i64 @f2(double %f) {
+; CHECK: f2:
+; CHECK: cgdbr %r2, 5, %f0
+; CHECK: br %r14
+  %conv = fptosi double %f to i64
+  ret i64 %conv
+}
+
+; Test f128->i64.
+define i64 @f3(fp128 *%src) {
+; CHECK: f3:
+; CHECK: ld %f0, 0(%r2)
+; CHECK: ld %f2, 8(%r2)
+; CHECK: cgxbr %r2, 5, %f0
+; CHECK: br %r14
+  %f = load fp128 *%src
+  %conv = fptosi fp128 %f to i64
+  ret i64 %conv
+}
diff --git a/test/CodeGen/SystemZ/fp-conv-12.ll b/test/CodeGen/SystemZ/fp-conv-12.ll
new file mode 100644
index 0000000..4445b14
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-conv-12.ll
@@ -0,0 +1,44 @@
+; Test conversion of floating-point values to unsigned i64s.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; z10 doesn't have native support for unsigned fp-to-i64 conversions;
+; they were added in z196 as the Convert to Logical family of instructions.
+; Convert via signed i64s instead.
+
+; Test f32->i64.
+define i64 @f1(float %f) {
+; CHECK: f1:
+; CHECK: cebr
+; CHECK: sebr
+; CHECK: cgebr
+; CHECK: xihf
+; CHECK: br %r14
+  %conv = fptoui float %f to i64
+  ret i64 %conv
+}
+
+; Test f64->i64.
+define i64 @f2(double %f) {
+; CHECK: f2:
+; CHECK: cdbr
+; CHECK: sdbr
+; CHECK: cgdbr
+; CHECK: xihf
+; CHECK: br %r14
+  %conv = fptoui double %f to i64
+  ret i64 %conv
+}
+
+; Test f128->i64.
+define i64 @f3(fp128 *%src) {
+; CHECK: f3:
+; CHECK: cxbr
+; CHECK: sxbr
+; CHECK: cgxbr
+; CHECK: xihf
+; CHECK: br %r14
+  %f = load fp128 *%src
+  %conv = fptoui fp128 %f to i64
+  ret i64 %conv
+}
diff --git a/test/CodeGen/SystemZ/fp-copysign-01.ll b/test/CodeGen/SystemZ/fp-copysign-01.ll
new file mode 100644
index 0000000..458d475
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-copysign-01.ll
@@ -0,0 +1,128 @@
+; Test copysign operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare float @copysignf(float, float) readnone
+declare double @copysign(double, double) readnone
+; FIXME: not really the correct prototype for SystemZ.
+declare fp128 @copysignl(fp128, fp128) readnone
+
+; Test f32 copies in which the sign comes from an f32.
+define float @f1(float %a, float %b) {
+; CHECK: f1:
+; CHECK-NOT: %f2
+; CHECK: cpsdr %f0, %f0, %f2
+; CHECK: br %r14
+  %res = call float @copysignf(float %a, float %b) readnone
+  ret float %res
+}
+
+; Test f32 copies in which the sign comes from an f64.
+define float @f2(float %a, double %bd) {
+; CHECK: f2:
+; CHECK-NOT: %f2
+; CHECK: cpsdr %f0, %f0, %f2
+; CHECK: br %r14
+  %b = fptrunc double %bd to float
+  %res = call float @copysignf(float %a, float %b) readnone
+  ret float %res
+}
+
+; Test f32 copies in which the sign comes from an f128.
+define float @f3(float %a, fp128 *%bptr) {
+; CHECK: f3:
+; CHECK: ld [[BHIGH:%f[0-7]]], 0(%r2)
+; CHECK: ld [[BLOW:%f[0-7]]], 8(%r2)
+; CHECK: cpsdr %f0, %f0, [[BHIGH]]
+; CHECK: br %r14
+  %bl = load volatile fp128 *%bptr
+  %b = fptrunc fp128 %bl to float
+  %res = call float @copysignf(float %a, float %b) readnone
+  ret float %res
+}
+
+; Test f64 copies in which the sign comes from an f32.
+define double @f4(double %a, float %bf) {
+; CHECK: f4:
+; CHECK-NOT: %f2
+; CHECK: cpsdr %f0, %f0, %f2
+; CHECK: br %r14
+  %b = fpext float %bf to double
+  %res = call double @copysign(double %a, double %b) readnone
+  ret double %res
+}
+
+; Test f64 copies in which the sign comes from an f64.
+define double @f5(double %a, double %b) {
+; CHECK: f5:
+; CHECK-NOT: %f2
+; CHECK: cpsdr %f0, %f0, %f2
+; CHECK: br %r14
+  %res = call double @copysign(double %a, double %b) readnone
+  ret double %res
+}
+
+; Test f64 copies in which the sign comes from an f128.
+define double @f6(double %a, fp128 *%bptr) {
+; CHECK: f6:
+; CHECK: ld [[BHIGH:%f[0-7]]], 0(%r2)
+; CHECK: ld [[BLOW:%f[0-7]]], 8(%r2)
+; CHECK: cpsdr %f0, %f0, [[BHIGH]]
+; CHECK: br %r14
+  %bl = load volatile fp128 *%bptr
+  %b = fptrunc fp128 %bl to double
+  %res = call double @copysign(double %a, double %b) readnone
+  ret double %res
+}
+
+; Test f128 copies in which the sign comes from an f32.  We shouldn't
+; need any register shuffling here; %a should be tied to %c, with CPSDR
+; just changing the high register.
+define void @f7(fp128 *%cptr, fp128 *%aptr, float %bf) {
+; CHECK: f7:
+; CHECK: ld [[AHIGH:%f[0-7]]], 0(%r3)
+; CHECK: ld [[ALOW:%f[0-7]]], 8(%r3)
+; CHECK: cpsdr [[AHIGH]], [[AHIGH]], %f0
+; CHECK: std [[AHIGH]], 0(%r2)
+; CHECK: std [[ALOW]], 8(%r2)
+; CHECK: br %r14
+  %a = load volatile fp128 *%aptr
+  %b = fpext float %bf to fp128
+  %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone
+  store fp128 %c, fp128 *%cptr
+  ret void
+}
+
+; As above, but the sign comes from an f64.
+define void @f8(fp128 *%cptr, fp128 *%aptr, double %bd) {
+; CHECK: f8:
+; CHECK: ld [[AHIGH:%f[0-7]]], 0(%r3)
+; CHECK: ld [[ALOW:%f[0-7]]], 8(%r3)
+; CHECK: cpsdr [[AHIGH]], [[AHIGH]], %f0
+; CHECK: std [[AHIGH]], 0(%r2)
+; CHECK: std [[ALOW]], 8(%r2)
+; CHECK: br %r14
+  %a = load volatile fp128 *%aptr
+  %b = fpext double %bd to fp128
+  %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone
+  store fp128 %c, fp128 *%cptr
+  ret void
+}
+
+; As above, but the sign comes from an f128.  Don't require the low part
+; of %b to be loaded, since it isn't used.
+define void @f9(fp128 *%cptr, fp128 *%aptr, fp128 *%bptr) {
+; CHECK: f9:
+; CHECK: ld [[AHIGH:%f[0-7]]], 0(%r3)
+; CHECK: ld [[ALOW:%f[0-7]]], 8(%r3)
+; CHECK: ld [[BHIGH:%f[0-7]]], 0(%r4)
+; CHECK: cpsdr [[AHIGH]], [[AHIGH]], [[BHIGH]]
+; CHECK: std [[AHIGH]], 0(%r2)
+; CHECK: std [[ALOW]], 8(%r2)
+; CHECK: br %r14
+  %a = load volatile fp128 *%aptr
+  %b = load volatile fp128 *%bptr
+  %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone
+  store fp128 %c, fp128 *%cptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-div-01.ll b/test/CodeGen/SystemZ/fp-div-01.ll
new file mode 100644
index 0000000..080d45e
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-div-01.ll
@@ -0,0 +1,71 @@
+; Test 32-bit floating-point division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register division.
+define float @f1(float %f1, float %f2) {
+; CHECK: f1:
+; CHECK: debr %f0, %f2
+; CHECK: br %r14
+  %res = fdiv float %f1, %f2
+  ret float %res
+}
+
+; Check the low end of the DEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK: f2:
+; CHECK: deb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float *%ptr
+  %res = fdiv float %f1, %f2
+  ret float %res
+}
+
+; Check the high end of the aligned DEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK: f3:
+; CHECK: deb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1023
+  %f2 = load float *%ptr
+  %res = fdiv float %f1, %f2
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: deb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1024
+  %f2 = load float *%ptr
+  %res = fdiv float %f1, %f2
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK: f5:
+; CHECK: aghi %r2, -4
+; CHECK: deb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 -1
+  %f2 = load float *%ptr
+  %res = fdiv float %f1, %f2
+  ret float %res
+}
+
+; Check that DEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: deb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%base, i64 %index
+  %ptr2 = getelementptr float *%ptr1, i64 100
+  %f2 = load float *%ptr2
+  %res = fdiv float %f1, %f2
+  ret float %res
+}
diff --git a/test/CodeGen/SystemZ/fp-div-02.ll b/test/CodeGen/SystemZ/fp-div-02.ll
new file mode 100644
index 0000000..c5cae15
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-div-02.ll
@@ -0,0 +1,71 @@
+; Test 64-bit floating-point division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register division.
+define double @f1(double %f1, double %f2) {
+; CHECK: f1:
+; CHECK: ddbr %f0, %f2
+; CHECK: br %r14
+  %res = fdiv double %f1, %f2
+  ret double %res
+}
+
+; Check the low end of the DDB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK: f2:
+; CHECK: ddb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load double *%ptr
+  %res = fdiv double %f1, %f2
+  ret double %res
+}
+
+; Check the high end of the aligned DDB range.
+define double @f3(double %f1, double *%base) {
+; CHECK: f3:
+; CHECK: ddb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 511
+  %f2 = load double *%ptr
+  %res = fdiv double %f1, %f2
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: ddb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 512
+  %f2 = load double *%ptr
+  %res = fdiv double %f1, %f2
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK: f5:
+; CHECK: aghi %r2, -8
+; CHECK: ddb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 -1
+  %f2 = load double *%ptr
+  %res = fdiv double %f1, %f2
+  ret double %res
+}
+
+; Check that DDB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: ddb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%base, i64 %index
+  %ptr2 = getelementptr double *%ptr1, i64 100
+  %f2 = load double *%ptr2
+  %res = fdiv double %f1, %f2
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/fp-div-03.ll b/test/CodeGen/SystemZ/fp-div-03.ll
new file mode 100644
index 0000000..18f2d74
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-div-03.ll
@@ -0,0 +1,20 @@
+; Test 128-bit floating-point division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; There is no memory form of 128-bit division.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK: f1:
+; CHECK: lxebr %f0, %f0
+; CHECK: ld %f1, 0(%r2)
+; CHECK: ld %f3, 8(%r2)
+; CHECK: dxbr %f1, %f0
+; CHECK: std %f1, 0(%r2)
+; CHECK: std %f3, 8(%r2)
+; CHECK: br %r14
+  %f1 = load fp128 *%ptr
+  %f2x = fpext float %f2 to fp128
+  %sum = fdiv fp128 %f1, %f2x
+  store fp128 %sum, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-move-01.ll b/test/CodeGen/SystemZ/fp-move-01.ll
new file mode 100644
index 0000000..73cd978
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-move-01.ll
@@ -0,0 +1,30 @@
+; Test moves between FPRs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test f32 moves.
+define float @f1(float %a, float %b) {
+; CHECK: f1:
+; CHECK: ler %f0, %f2
+  ret float %b
+}
+
+; Test f64 moves.
+define double @f2(double %a, double %b) {
+; CHECK: f2:
+; CHECK: ldr %f0, %f2
+  ret double %b
+}
+
+; Test f128 moves.  Since f128s are passed by reference, we need to force
+; a copy by other means.
+define void @f3(fp128 *%x) {
+; CHECK: f3:
+; CHECK: lxr
+; CHECK: axbr
+  %val = load volatile fp128 *%x
+  %sum = fadd fp128 %val, %val
+  store volatile fp128 %sum, fp128 *%x
+  store volatile fp128 %val, fp128 *%x
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-move-02.ll b/test/CodeGen/SystemZ/fp-move-02.ll
new file mode 100644
index 0000000..9d87797
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-move-02.ll
@@ -0,0 +1,103 @@
+; Test moves between FPRs and GPRs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test 32-bit moves from GPRs to FPRs.  The GPR must be moved into the high
+; 32 bits of the FPR.
+define float @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 32
+; CHECK: ldgr %f0, [[REGISTER]]
+  %res = bitcast i32 %a to float
+  ret float %res
+}
+
+; Like f1, but create a situation where the shift can be folded with
+; surrounding code.
+define float @f2(i64 %big) {
+; CHECK: f2:
+; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 31
+; CHECK: ldgr %f0, [[REGISTER]]
+  %shift = lshr i64 %big, 1
+  %a = trunc i64 %shift to i32
+  %res = bitcast i32 %a to float
+  ret float %res
+}
+
+; Another example of the same thing.
+define float @f3(i64 %big) {
+; CHECK: f3:
+; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 2
+; CHECK: ldgr %f0, [[REGISTER]]
+  %shift = ashr i64 %big, 30
+  %a = trunc i64 %shift to i32
+  %res = bitcast i32 %a to float
+  ret float %res
+}
+
+; Like f1, but the value to transfer is already in the high 32 bits.
+define float @f4(i64 %big) {
+; CHECK: f4:
+; CHECK-NOT: %r2
+; CHECK: nilf %r2, 0
+; CHECK-NOT: %r2
+; CHECK: ldgr %f0, %r2
+  %shift = ashr i64 %big, 32
+  %a = trunc i64 %shift to i32
+  %res = bitcast i32 %a to float
+  ret float %res
+}
+
+; Test 64-bit moves from GPRs to FPRs.
+define double @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: ldgr %f0, %r2
+  %res = bitcast i64 %a to double
+  ret double %res
+}
+
+; Test 128-bit moves from GPRs to FPRs.  i128 isn't a legitimate type,
+; so this goes through memory.
+define void @f6(fp128 *%a, i128 *%b) {
+; CHECK: f6:
+; CHECK: lg
+; CHECK: lg
+; CHECK: stg
+; CHECK: stg
+  %val = load i128 *%b
+  %res = bitcast i128 %val to fp128
+  store fp128 %res, fp128 *%a
+  ret void
+}
+
+; Test 32-bit moves from FPRs to GPRs.  The high 32 bits of the FPR should
+; be moved into the low 32 bits of the GPR.
+define i32 @f7(float %a) {
+; CHECK: f7:
+; CHECK: lgdr [[REGISTER:%r[0-5]]], %f0
+; CHECK: srlg %r2, [[REGISTER]], 32
+  %res = bitcast float %a to i32
+  ret i32 %res
+}
+
+; Test 64-bit moves from FPRs to GPRs.
+define i64 @f8(double %a) {
+; CHECK: f8:
+; CHECK: lgdr %r2, %f0
+  %res = bitcast double %a to i64
+  ret i64 %res
+}
+
+; Test 128-bit moves from FPRs to GPRs, with the same restriction as f6.
+define void @f9(fp128 *%a, i128 *%b) {
+; CHECK: f9:
+; CHECK: ld
+; CHECK: ld
+; CHECK: std
+; CHECK: std
+  %val = load fp128 *%a
+  %res = bitcast fp128 %val to i128
+  store i128 %res, i128 *%b
+  ret void
+}
+
diff --git a/test/CodeGen/SystemZ/fp-move-03.ll b/test/CodeGen/SystemZ/fp-move-03.ll
new file mode 100644
index 0000000..37dbdfa
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-move-03.ll
@@ -0,0 +1,110 @@
+; Test 32-bit floating-point loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test the low end of the LE range.
+define float @f1(float *%src) {
+; CHECK: f1:
+; CHECK: le %f0, 0(%r2)
+; CHECK: br %r14
+  %val = load float *%src
+  ret float %val
+}
+
+; Test the high end of the LE range.
+define float @f2(float *%src) {
+; CHECK: f2:
+; CHECK: le %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 1023
+  %val = load float *%ptr
+  ret float %val
+}
+
+; Check the next word up, which should use LEY instead of LE.
+define float @f3(float *%src) {
+; CHECK: f3:
+; CHECK: ley %f0, 4096(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 1024
+  %val = load float *%ptr
+  ret float %val
+}
+
+; Check the high end of the aligned LEY range.
+define float @f4(float *%src) {
+; CHECK: f4:
+; CHECK: ley %f0, 524284(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 131071
+  %val = load float *%ptr
+  ret float %val
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f5(float *%src) {
+; CHECK: f5:
+; CHECK: agfi %r2, 524288
+; CHECK: le %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 131072
+  %val = load float *%ptr
+  ret float %val
+}
+
+; Check the high end of the negative aligned LEY range.
+define float @f6(float *%src) {
+; CHECK: f6:
+; CHECK: ley %f0, -4(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 -1
+  %val = load float *%ptr
+  ret float %val
+}
+
+; Check the low end of the LEY range.
+define float @f7(float *%src) {
+; CHECK: f7:
+; CHECK: ley %f0, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 -131072
+  %val = load float *%ptr
+  ret float %val
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f8(float *%src) {
+; CHECK: f8:
+; CHECK: agfi %r2, -524292
+; CHECK: le %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 -131073
+  %val = load float *%ptr
+  ret float %val
+}
+
+; Check that LE allows an index.
+define float @f9(i64 %src, i64 %index) {
+; CHECK: f9:
+; CHECK: le %f0, 4092({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4092
+  %ptr = inttoptr i64 %add2 to float *
+  %val = load float *%ptr
+  ret float %val
+}
+
+; Check that LEY allows an index.
+define float @f10(i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: ley %f0, 4096({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to float *
+  %val = load float *%ptr
+  ret float %val
+}
diff --git a/test/CodeGen/SystemZ/fp-move-04.ll b/test/CodeGen/SystemZ/fp-move-04.ll
new file mode 100644
index 0000000..72e90d1
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-move-04.ll
@@ -0,0 +1,110 @@
+; Test 64-bit floating-point loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test the low end of the LD range.
+define double @f1(double *%src) {
+; CHECK: f1:
+; CHECK: ld %f0, 0(%r2)
+; CHECK: br %r14
+  %val = load double *%src
+  ret double %val
+}
+
+; Test the high end of the LD range.
+define double @f2(double *%src) {
+; CHECK: f2:
+; CHECK: ld %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 511
+  %val = load double *%ptr
+  ret double %val
+}
+
+; Check the next doubleword up, which should use LDY instead of LD.
+define double @f3(double *%src) {
+; CHECK: f3:
+; CHECK: ldy %f0, 4096(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 512
+  %val = load double *%ptr
+  ret double %val
+}
+
+; Check the high end of the aligned LDY range.
+define double @f4(double *%src) {
+; CHECK: f4:
+; CHECK: ldy %f0, 524280(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 65535
+  %val = load double *%ptr
+  ret double %val
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f5(double *%src) {
+; CHECK: f5:
+; CHECK: agfi %r2, 524288
+; CHECK: ld %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 65536
+  %val = load double *%ptr
+  ret double %val
+}
+
+; Check the high end of the negative aligned LDY range.
+define double @f6(double *%src) {
+; CHECK: f6:
+; CHECK: ldy %f0, -8(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 -1
+  %val = load double *%ptr
+  ret double %val
+}
+
+; Check the low end of the LDY range.
+define double @f7(double *%src) {
+; CHECK: f7:
+; CHECK: ldy %f0, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 -65536
+  %val = load double *%ptr
+  ret double %val
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f8(double *%src) {
+; CHECK: f8:
+; CHECK: agfi %r2, -524296
+; CHECK: ld %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 -65537
+  %val = load double *%ptr
+  ret double %val
+}
+
+; Check that LD allows an index.
+define double @f9(i64 %src, i64 %index) {
+; CHECK: f9:
+; CHECK: ld %f0, 4095({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4095
+  %ptr = inttoptr i64 %add2 to double *
+  %val = load double *%ptr
+  ret double %val
+}
+
+; Check that LDY allows an index.
+define double @f10(i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: ldy %f0, 4096({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to double *
+  %val = load double *%ptr
+  ret double %val
+}
diff --git a/test/CodeGen/SystemZ/fp-move-05.ll b/test/CodeGen/SystemZ/fp-move-05.ll
new file mode 100644
index 0000000..66ad048
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-move-05.ll
@@ -0,0 +1,151 @@
+; Test 128-bit floating-point loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check loads with no offset.
+define double @f1(i64 %src) {
+; CHECK: f1:
+; CHECK: ld %f0, 0(%r2)
+; CHECK: ld %f2, 8(%r2)
+; CHECK: br %r14
+  %ptr = inttoptr i64 %src to fp128 *
+  %val = load fp128 *%ptr
+  %trunc = fptrunc fp128 %val to double
+  ret double %trunc
+}
+
+; Check the highest aligned offset that allows LD for both halves.
+define double @f2(i64 %src) {
+; CHECK: f2:
+; CHECK: ld %f0, 4080(%r2)
+; CHECK: ld %f2, 4088(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, 4080
+  %ptr = inttoptr i64 %add to fp128 *
+  %val = load fp128 *%ptr
+  %trunc = fptrunc fp128 %val to double
+  ret double %trunc
+}
+
+; Check the next doubleword up, which requires a mixture of LD and LDY.
+define double @f3(i64 %src) {
+; CHECK: f3:
+; CHECK: ld %f0, 4088(%r2)
+; CHECK: ldy %f2, 4096(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, 4088
+  %ptr = inttoptr i64 %add to fp128 *
+  %val = load fp128 *%ptr
+  %trunc = fptrunc fp128 %val to double
+  ret double %trunc
+}
+
+; Check the next doubleword after that, which requires LDY for both halves.
+define double @f4(i64 %src) {
+; CHECK: f4:
+; CHECK: ldy %f0, 4096(%r2)
+; CHECK: ldy %f2, 4104(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, 4096
+  %ptr = inttoptr i64 %add to fp128 *
+  %val = load fp128 *%ptr
+  %trunc = fptrunc fp128 %val to double
+  ret double %trunc
+}
+
+; Check the highest aligned offset that allows LDY for both halves.
+define double @f5(i64 %src) {
+; CHECK: f5:
+; CHECK: ldy %f0, 524272(%r2)
+; CHECK: ldy %f2, 524280(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, 524272
+  %ptr = inttoptr i64 %add to fp128 *
+  %val = load fp128 *%ptr
+  %trunc = fptrunc fp128 %val to double
+  ret double %trunc
+}
+
+; Check the next doubleword up, which requires separate address logic.
+; Other sequences besides this one would be OK.
+define double @f6(i64 %src) {
+; CHECK: f6:
+; CHECK: lay %r1, 524280(%r2)
+; CHECK: ld %f0, 0(%r1)
+; CHECK: ld %f2, 8(%r1)
+; CHECK: br %r14
+  %add = add i64 %src, 524280
+  %ptr = inttoptr i64 %add to fp128 *
+  %val = load fp128 *%ptr
+  %trunc = fptrunc fp128 %val to double
+  ret double %trunc
+}
+
+; Check the highest aligned negative offset, which needs a combination of
+; LDY and LD.
+define double @f7(i64 %src) {
+; CHECK: f7:
+; CHECK: ldy %f0, -8(%r2)
+; CHECK: ld %f2, 0(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, -8
+  %ptr = inttoptr i64 %add to fp128 *
+  %val = load fp128 *%ptr
+  %trunc = fptrunc fp128 %val to double
+  ret double %trunc
+}
+
+; Check the next doubleword down, which requires LDY for both halves.
+define double @f8(i64 %src) {
+; CHECK: f8:
+; CHECK: ldy %f0, -16(%r2)
+; CHECK: ldy %f2, -8(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, -16
+  %ptr = inttoptr i64 %add to fp128 *
+  %val = load fp128 *%ptr
+  %trunc = fptrunc fp128 %val to double
+  ret double %trunc
+}
+
+; Check the lowest offset that allows LDY for both halves.
+define double @f9(i64 %src) {
+; CHECK: f9:
+; CHECK: ldy %f0, -524288(%r2)
+; CHECK: ldy %f2, -524280(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, -524288
+  %ptr = inttoptr i64 %add to fp128 *
+  %val = load fp128 *%ptr
+  %trunc = fptrunc fp128 %val to double
+  ret double %trunc
+}
+
+; Check the next doubleword down, which requires separate address logic.
+; Other sequences besides this one would be OK.
+define double @f10(i64 %src) {
+; CHECK: f10:
+; CHECK: agfi %r2, -524296
+; CHECK: ld %f0, 0(%r2)
+; CHECK: ld %f2, 8(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, -524296
+  %ptr = inttoptr i64 %add to fp128 *
+  %val = load fp128 *%ptr
+  %trunc = fptrunc fp128 %val to double
+  ret double %trunc
+}
+
+; Check that indices are allowed.
+define double @f11(i64 %src, i64 %index) {
+; CHECK: f11:
+; CHECK: ld %f0, 4088({{%r2,%r3|%r3,%r2}})
+; CHECK: ldy %f2, 4096({{%r2,%r3|%r3,%r2}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4088
+  %ptr = inttoptr i64 %add2 to fp128 *
+  %val = load fp128 *%ptr
+  %trunc = fptrunc fp128 %val to double
+  ret double %trunc
+}
diff --git a/test/CodeGen/SystemZ/fp-move-06.ll b/test/CodeGen/SystemZ/fp-move-06.ll
new file mode 100644
index 0000000..b660c2a
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-move-06.ll
@@ -0,0 +1,110 @@
+; Test 32-bit floating-point stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test the low end of the STE range.
+define void @f1(float *%ptr, float %val) {
+; CHECK: f1:
+; CHECK: ste %f0, 0(%r2)
+; CHECK: br %r14
+  store float %val, float *%ptr
+  ret void
+}
+
+; Test the high end of the STE range.
+define void @f2(float *%src, float %val) {
+; CHECK: f2:
+; CHECK: ste %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 1023
+  store float %val, float *%ptr
+  ret void
+}
+
+; Check the next word up, which should use STEY instead of STE.
+define void @f3(float *%src, float %val) {
+; CHECK: f3:
+; CHECK: stey %f0, 4096(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 1024
+  store float %val, float *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STEY range.
+define void @f4(float *%src, float %val) {
+; CHECK: f4:
+; CHECK: stey %f0, 524284(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 131071
+  store float %val, float *%ptr
+  ret void
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f5(float *%src, float %val) {
+; CHECK: f5:
+; CHECK: agfi %r2, 524288
+; CHECK: ste %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 131072
+  store float %val, float *%ptr
+  ret void
+}
+
+; Check the high end of the negative aligned STEY range.
+define void @f6(float *%src, float %val) {
+; CHECK: f6:
+; CHECK: stey %f0, -4(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 -1
+  store float %val, float *%ptr
+  ret void
+}
+
+; Check the low end of the STEY range.
+define void @f7(float *%src, float %val) {
+; CHECK: f7:
+; CHECK: stey %f0, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 -131072
+  store float %val, float *%ptr
+  ret void
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f8(float *%src, float %val) {
+; CHECK: f8:
+; CHECK: agfi %r2, -524292
+; CHECK: ste %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%src, i64 -131073
+  store float %val, float *%ptr
+  ret void
+}
+
+; Check that STE allows an index.
+define void @f9(i64 %src, i64 %index, float %val) {
+; CHECK: f9:
+; CHECK: ste %f0, 4092({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4092
+  %ptr = inttoptr i64 %add2 to float *
+  store float %val, float *%ptr
+  ret void
+}
+
+; Check that STEY allows an index.
+define void @f10(i64 %src, i64 %index, float %val) {
+; CHECK: f10:
+; CHECK: stey %f0, 4096({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to float *
+  store float %val, float *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-move-07.ll b/test/CodeGen/SystemZ/fp-move-07.ll
new file mode 100644
index 0000000..0cb0474
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-move-07.ll
@@ -0,0 +1,110 @@
+; Test 64-bit floating-point stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test the low end of the STD range.
+define void @f1(double *%src, double %val) {
+; CHECK: f1:
+; CHECK: std %f0, 0(%r2)
+; CHECK: br %r14
+  store double %val, double *%src
+  ret void
+}
+
+; Test the high end of the STD range.
+define void @f2(double *%src, double %val) {
+; CHECK: f2:
+; CHECK: std %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 511
+  store double %val, double *%ptr
+  ret void
+}
+
+; Check the next doubleword up, which should use STDY instead of STD.
+define void @f3(double *%src, double %val) {
+; CHECK: f3:
+; CHECK: stdy %f0, 4096(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 512
+  store double %val, double *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STDY range.
+define void @f4(double *%src, double %val) {
+; CHECK: f4:
+; CHECK: stdy %f0, 524280(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 65535
+  store double %val, double *%ptr
+  ret void
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f5(double *%src, double %val) {
+; CHECK: f5:
+; CHECK: agfi %r2, 524288
+; CHECK: std %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 65536
+  store double %val, double *%ptr
+  ret void
+}
+
+; Check the high end of the negative aligned STDY range.
+define void @f6(double *%src, double %val) {
+; CHECK: f6:
+; CHECK: stdy %f0, -8(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 -1
+  store double %val, double *%ptr
+  ret void
+}
+
+; Check the low end of the STDY range.
+define void @f7(double *%src, double %val) {
+; CHECK: f7:
+; CHECK: stdy %f0, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 -65536
+  store double %val, double *%ptr
+  ret void
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f8(double *%src, double %val) {
+; CHECK: f8:
+; CHECK: agfi %r2, -524296
+; CHECK: std %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%src, i64 -65537
+  store double %val, double *%ptr
+  ret void
+}
+
+; Check that STD allows an index.
+define void @f9(i64 %src, i64 %index, double %val) {
+; CHECK: f9:
+; CHECK: std %f0, 4095({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4095
+  %ptr = inttoptr i64 %add2 to double *
+  store double %val, double *%ptr
+  ret void
+}
+
+; Check that STDY allows an index.
+define void @f10(i64 %src, i64 %index, double %val) {
+; CHECK: f10:
+; CHECK: stdy %f0, 4096({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to double *
+  store double %val, double *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-move-08.ll b/test/CodeGen/SystemZ/fp-move-08.ll
new file mode 100644
index 0000000..448d2ac
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-move-08.ll
@@ -0,0 +1,151 @@
+; Test 128-bit floating-point stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check stores with no offset.
+define void @f1(i64 %src, double %val) {
+; CHECK: f1:
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %ptr = inttoptr i64 %src to fp128 *
+  %ext = fpext double %val to fp128
+  store fp128 %ext, fp128 *%ptr
+  ret void
+}
+
+; Check the highest aligned offset that allows STD for both halves.
+define void @f2(i64 %src, double %val) {
+; CHECK: f2:
+; CHECK: std %f0, 4080(%r2)
+; CHECK: std %f2, 4088(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, 4080
+  %ptr = inttoptr i64 %add to fp128 *
+  %ext = fpext double %val to fp128
+  store fp128 %ext, fp128 *%ptr
+  ret void
+}
+
+; Check the next doubleword up, which requires a mixture of STD and STDY.
+define void @f3(i64 %src, double %val) {
+; CHECK: f3:
+; CHECK: std %f0, 4088(%r2)
+; CHECK: stdy %f2, 4096(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, 4088
+  %ptr = inttoptr i64 %add to fp128 *
+  %ext = fpext double %val to fp128
+  store fp128 %ext, fp128 *%ptr
+  ret void
+}
+
+; Check the next doubleword after that, which requires STDY for both halves.
+define void @f4(i64 %src, double %val) {
+; CHECK: f4:
+; CHECK: stdy %f0, 4096(%r2)
+; CHECK: stdy %f2, 4104(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, 4096
+  %ptr = inttoptr i64 %add to fp128 *
+  %ext = fpext double %val to fp128
+  store fp128 %ext, fp128 *%ptr
+  ret void
+}
+
+; Check the highest aligned offset that allows STDY for both halves.
+define void @f5(i64 %src, double %val) {
+; CHECK: f5:
+; CHECK: stdy %f0, 524272(%r2)
+; CHECK: stdy %f2, 524280(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, 524272
+  %ptr = inttoptr i64 %add to fp128 *
+  %ext = fpext double %val to fp128
+  store fp128 %ext, fp128 *%ptr
+  ret void
+}
+
+; Check the next doubleword up, which requires separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(i64 %src, double %val) {
+; CHECK: f6:
+; CHECK: lay %r1, 524280(%r2)
+; CHECK: std %f0, 0(%r1)
+; CHECK: std %f2, 8(%r1)
+; CHECK: br %r14
+  %add = add i64 %src, 524280
+  %ptr = inttoptr i64 %add to fp128 *
+  %ext = fpext double %val to fp128
+  store fp128 %ext, fp128 *%ptr
+  ret void
+}
+
+; Check the highest aligned negative offset, which needs a combination of
+; STDY and STD.
+define void @f7(i64 %src, double %val) {
+; CHECK: f7:
+; CHECK: stdy %f0, -8(%r2)
+; CHECK: std %f2, 0(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, -8
+  %ptr = inttoptr i64 %add to fp128 *
+  %ext = fpext double %val to fp128
+  store fp128 %ext, fp128 *%ptr
+  ret void
+}
+
+; Check the next doubleword down, which requires STDY for both halves.
+define void @f8(i64 %src, double %val) {
+; CHECK: f8:
+; CHECK: stdy %f0, -16(%r2)
+; CHECK: stdy %f2, -8(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, -16
+  %ptr = inttoptr i64 %add to fp128 *
+  %ext = fpext double %val to fp128
+  store fp128 %ext, fp128 *%ptr
+  ret void
+}
+
+; Check the lowest offset that allows STDY for both halves.
+define void @f9(i64 %src, double %val) {
+; CHECK: f9:
+; CHECK: stdy %f0, -524288(%r2)
+; CHECK: stdy %f2, -524280(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, -524288
+  %ptr = inttoptr i64 %add to fp128 *
+  %ext = fpext double %val to fp128
+  store fp128 %ext, fp128 *%ptr
+  ret void
+}
+
+; Check the next doubleword down, which requires separate address logic.
+; Other sequences besides this one would be OK.
+define void @f10(i64 %src, double %val) {
+; CHECK: f10:
+; CHECK: agfi %r2, -524296
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %add = add i64 %src, -524296
+  %ptr = inttoptr i64 %add to fp128 *
+  %ext = fpext double %val to fp128
+  store fp128 %ext, fp128 *%ptr
+  ret void
+}
+
+; Check that indices are allowed.
+define void @f11(i64 %src, i64 %index, double %val) {
+; CHECK: f11:
+; CHECK: std %f0, 4088({{%r2,%r3|%r3,%r2}})
+; CHECK: stdy %f2, 4096({{%r2,%r3|%r3,%r2}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4088
+  %ptr = inttoptr i64 %add2 to fp128 *
+  %ext = fpext double %val to fp128
+  store fp128 %ext, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-01.ll b/test/CodeGen/SystemZ/fp-mul-01.ll
new file mode 100644
index 0000000..68c78ee
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-mul-01.ll
@@ -0,0 +1,71 @@
+; Test multiplication of two f32s, producing an f32 result.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register multiplication.
+define float @f1(float %f1, float %f2) {
+; CHECK: f1:
+; CHECK: meebr %f0, %f2
+; CHECK: br %r14
+  %res = fmul float %f1, %f2
+  ret float %res
+}
+
+; Check the low end of the MEEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK: f2:
+; CHECK: meeb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float *%ptr
+  %res = fmul float %f1, %f2
+  ret float %res
+}
+
+; Check the high end of the aligned MEEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK: f3:
+; CHECK: meeb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1023
+  %f2 = load float *%ptr
+  %res = fmul float %f1, %f2
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: meeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1024
+  %f2 = load float *%ptr
+  %res = fmul float %f1, %f2
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK: f5:
+; CHECK: aghi %r2, -4
+; CHECK: meeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 -1
+  %f2 = load float *%ptr
+  %res = fmul float %f1, %f2
+  ret float %res
+}
+
+; Check that MEEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: meeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%base, i64 %index
+  %ptr2 = getelementptr float *%ptr1, i64 100
+  %f2 = load float *%ptr2
+  %res = fmul float %f1, %f2
+  ret float %res
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-02.ll b/test/CodeGen/SystemZ/fp-mul-02.ll
new file mode 100644
index 0000000..ec51a4c
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-mul-02.ll
@@ -0,0 +1,83 @@
+; Test multiplication of two f32s, producing an f64 result.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register multiplication.
+define double @f1(float %f1, float %f2) {
+; CHECK: f1:
+; CHECK: mdebr %f0, %f2
+; CHECK: br %r14
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = fmul double %f1x, %f2x
+  ret double %res
+}
+
+; Check the low end of the MDEB range.
+define double @f2(float %f1, float *%ptr) {
+; CHECK: f2:
+; CHECK: mdeb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float *%ptr
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = fmul double %f1x, %f2x
+  ret double %res
+}
+
+; Check the high end of the aligned MDEB range.
+define double @f3(float %f1, float *%base) {
+; CHECK: f3:
+; CHECK: mdeb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1023
+  %f2 = load float *%ptr
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = fmul double %f1x, %f2x
+  ret double %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(float %f1, float *%base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mdeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1024
+  %f2 = load float *%ptr
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = fmul double %f1x, %f2x
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(float %f1, float *%base) {
+; CHECK: f5:
+; CHECK: aghi %r2, -4
+; CHECK: mdeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 -1
+  %f2 = load float *%ptr
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = fmul double %f1x, %f2x
+  ret double %res
+}
+
+; Check that MDEB allows indices.
+define double @f6(float %f1, float *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: mdeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%base, i64 %index
+  %ptr2 = getelementptr float *%ptr1, i64 100
+  %f2 = load float *%ptr2
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = fmul double %f1x, %f2x
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-03.ll b/test/CodeGen/SystemZ/fp-mul-03.ll
new file mode 100644
index 0000000..9849247
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-mul-03.ll
@@ -0,0 +1,71 @@
+; Test multiplication of two f64s, producing an f64 result.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register multiplication.
+define double @f1(double %f1, double %f2) {
+; CHECK: f1:
+; CHECK: mdbr %f0, %f2
+; CHECK: br %r14
+  %res = fmul double %f1, %f2
+  ret double %res
+}
+
+; Check the low end of the MDB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK: f2:
+; CHECK: mdb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load double *%ptr
+  %res = fmul double %f1, %f2
+  ret double %res
+}
+
+; Check the high end of the aligned MDB range.
+define double @f3(double %f1, double *%base) {
+; CHECK: f3:
+; CHECK: mdb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 511
+  %f2 = load double *%ptr
+  %res = fmul double %f1, %f2
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 512
+  %f2 = load double *%ptr
+  %res = fmul double %f1, %f2
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK: f5:
+; CHECK: aghi %r2, -8
+; CHECK: mdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 -1
+  %f2 = load double *%ptr
+  %res = fmul double %f1, %f2
+  ret double %res
+}
+
+; Check that MDB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: mdb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%base, i64 %index
+  %ptr2 = getelementptr double *%ptr1, i64 100
+  %f2 = load double *%ptr2
+  %res = fmul double %f1, %f2
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-04.ll b/test/CodeGen/SystemZ/fp-mul-04.ll
new file mode 100644
index 0000000..712ead8
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-mul-04.ll
@@ -0,0 +1,103 @@
+; Test multiplication of two f64s, producing an f128 result.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register multiplication.  "mxdbr %f0, %f2" is not valid from LLVM's
+; point of view, because %f2 is the low register of the FP128 %f0.  Pass the
+; multiplier in %f4 instead.
+define void @f1(double %f1, double %dummy, double %f2, fp128 *%dst) {
+; CHECK: f1:
+; CHECK: mxdbr %f0, %f4
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = fmul fp128 %f1x, %f2x
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the low end of the MXDB range.
+define void @f2(double %f1, double *%ptr, fp128 *%dst) {
+; CHECK: f2:
+; CHECK: mxdb %f0, 0(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %f2 = load double *%ptr
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = fmul fp128 %f1x, %f2x
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the high end of the aligned MXDB range.
+define void @f3(double %f1, double *%base, fp128 *%dst) {
+; CHECK: f3:
+; CHECK: mxdb %f0, 4088(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 511
+  %f2 = load double *%ptr
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = fmul fp128 %f1x, %f2x
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f4(double %f1, double *%base, fp128 *%dst) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mxdb %f0, 0(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 512
+  %f2 = load double *%ptr
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = fmul fp128 %f1x, %f2x
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check negative displacements, which also need separate address logic.
+define void @f5(double %f1, double *%base, fp128 *%dst) {
+; CHECK: f5:
+; CHECK: aghi %r2, -8
+; CHECK: mxdb %f0, 0(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 -1
+  %f2 = load double *%ptr
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = fmul fp128 %f1x, %f2x
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check that MXDB allows indices.
+define void @f6(double %f1, double *%base, i64 %index, fp128 *%dst) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: mxdb %f0, 800(%r1,%r2)
+; CHECK: std %f0, 0(%r4)
+; CHECK: std %f2, 8(%r4)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%base, i64 %index
+  %ptr2 = getelementptr double *%ptr1, i64 100
+  %f2 = load double *%ptr2
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = fmul fp128 %f1x, %f2x
+  store fp128 %res, fp128 *%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-05.ll b/test/CodeGen/SystemZ/fp-mul-05.ll
new file mode 100644
index 0000000..df5bc4e
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-mul-05.ll
@@ -0,0 +1,20 @@
+; Test multiplication of two f128s.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; There is no memory form of 128-bit multiplication.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK: f1:
+; CHECK: lxebr %f0, %f0
+; CHECK: ld %f1, 0(%r2)
+; CHECK: ld %f3, 8(%r2)
+; CHECK: mxbr %f1, %f0
+; CHECK: std %f1, 0(%r2)
+; CHECK: std %f3, 8(%r2)
+; CHECK: br %r14
+  %f1 = load fp128 *%ptr
+  %f2x = fpext float %f2 to fp128
+  %diff = fmul fp128 %f1, %f2x
+  store fp128 %diff, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-06.ll b/test/CodeGen/SystemZ/fp-mul-06.ll
new file mode 100644
index 0000000..8124c68
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-mul-06.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
+
+define float @f1(float %f1, float %f2, float %acc) {
+; CHECK: f1:
+; CHECK: maebr %f4, %f0, %f2
+; CHECK: ler %f0, %f4
+; CHECK: br %r14
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
+  ret float %res
+}
+
+define float @f2(float %f1, float *%ptr, float %acc) {
+; CHECK: f2:
+; CHECK: maeb %f2, %f0, 0(%r2)
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %f2 = load float *%ptr
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
+  ret float %res
+}
+
+define float @f3(float %f1, float *%base, float %acc) {
+; CHECK: f3:
+; CHECK: maeb %f2, %f0, 4092(%r2)
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1023
+  %f2 = load float *%ptr
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
+  ret float %res
+}
+
+define float @f4(float %f1, float *%base, float %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: maeb %f2, %f0, 0(%r2)
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1024
+  %f2 = load float *%ptr
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
+  ret float %res
+}
+
+define float @f5(float %f1, float *%base, float %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK: f5:
+; CHECK: aghi %r2, -4
+; CHECK: maeb %f2, %f0, 0(%r2)
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 -1
+  %f2 = load float *%ptr
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
+  ret float %res
+}
+
+define float @f6(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: maeb %f2, %f0, 0(%r1,%r2)
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 %index
+  %f2 = load float *%ptr
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
+  ret float %res
+}
+
+define float @f7(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK: f7:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 1023
+  %ptr = getelementptr float *%base, i64 %index2
+  %f2 = load float *%ptr
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
+  ret float %res
+}
+
+define float @f8(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK: f8:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: maeb %f2, %f0, 0(%r1)
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 1024
+  %ptr = getelementptr float *%base, i64 %index2
+  %f2 = load float *%ptr
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
+  ret float %res
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-07.ll b/test/CodeGen/SystemZ/fp-mul-07.ll
new file mode 100644
index 0000000..b8e4483
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-mul-07.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
+
+define double @f1(double %f1, double %f2, double %acc) {
+; CHECK: f1:
+; CHECK: madbr %f4, %f0, %f2
+; CHECK: ldr %f0, %f4
+; CHECK: br %r14
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
+  ret double %res
+}
+
+define double @f2(double %f1, double *%ptr, double %acc) {
+; CHECK: f2:
+; CHECK: madb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %f2 = load double *%ptr
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
+  ret double %res
+}
+
+define double @f3(double %f1, double *%base, double %acc) {
+; CHECK: f3:
+; CHECK: madb %f2, %f0, 4088(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 511
+  %f2 = load double *%ptr
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
+  ret double %res
+}
+
+define double @f4(double %f1, double *%base, double %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: madb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 512
+  %f2 = load double *%ptr
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
+  ret double %res
+}
+
+define double @f5(double %f1, double *%base, double %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK: f5:
+; CHECK: aghi %r2, -8
+; CHECK: madb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 -1
+  %f2 = load double *%ptr
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
+  ret double %res
+}
+
+define double @f6(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: madb %f2, %f0, 0(%r1,%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 %index
+  %f2 = load double *%ptr
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
+  ret double %res
+}
+
+define double @f7(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK: f7:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: madb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}})
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 511
+  %ptr = getelementptr double *%base, i64 %index2
+  %f2 = load double *%ptr
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
+  ret double %res
+}
+
+define double @f8(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK: f8:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: madb %f2, %f0, 0(%r1)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 512
+  %ptr = getelementptr double *%base, i64 %index2
+  %f2 = load double *%ptr
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-08.ll b/test/CodeGen/SystemZ/fp-mul-08.ll
new file mode 100644
index 0000000..5c147406
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-mul-08.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare float @llvm.fma.f32(float %f1, float %f2, float %f3)
+
+define float @f1(float %f1, float %f2, float %acc) {
+; CHECK: f1:
+; CHECK: msebr %f4, %f0, %f2
+; CHECK: ler %f0, %f4
+; CHECK: br %r14
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
+  ret float %res
+}
+
+define float @f2(float %f1, float *%ptr, float %acc) {
+; CHECK: f2:
+; CHECK: mseb %f2, %f0, 0(%r2)
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %f2 = load float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
+  ret float %res
+}
+
+define float @f3(float %f1, float *%base, float %acc) {
+; CHECK: f3:
+; CHECK: mseb %f2, %f0, 4092(%r2)
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1023
+  %f2 = load float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
+  ret float %res
+}
+
+define float @f4(float %f1, float *%base, float %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mseb %f2, %f0, 0(%r2)
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1024
+  %f2 = load float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
+  ret float %res
+}
+
+define float @f5(float %f1, float *%base, float %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK: f5:
+; CHECK: aghi %r2, -4
+; CHECK: mseb %f2, %f0, 0(%r2)
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 -1
+  %f2 = load float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
+  ret float %res
+}
+
+define float @f6(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: mseb %f2, %f0, 0(%r1,%r2)
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 %index
+  %f2 = load float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
+  ret float %res
+}
+
+define float @f7(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK: f7:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 1023
+  %ptr = getelementptr float *%base, i64 %index2
+  %f2 = load float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
+  ret float %res
+}
+
+define float @f8(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK: f8:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: mseb %f2, %f0, 0(%r1)
+; CHECK: ler %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 1024
+  %ptr = getelementptr float *%base, i64 %index2
+  %f2 = load float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
+  ret float %res
+}
diff --git a/test/CodeGen/SystemZ/fp-mul-09.ll b/test/CodeGen/SystemZ/fp-mul-09.ll
new file mode 100644
index 0000000..bcae1e3
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-mul-09.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
+
+define double @f1(double %f1, double %f2, double %acc) {
+; CHECK: f1:
+; CHECK: msdbr %f4, %f0, %f2
+; CHECK: ldr %f0, %f4
+; CHECK: br %r14
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
+  ret double %res
+}
+
+define double @f2(double %f1, double *%ptr, double %acc) {
+; CHECK: f2:
+; CHECK: msdb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %f2 = load double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
+  ret double %res
+}
+
+define double @f3(double %f1, double *%base, double %acc) {
+; CHECK: f3:
+; CHECK: msdb %f2, %f0, 4088(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 511
+  %f2 = load double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
+  ret double %res
+}
+
+define double @f4(double %f1, double *%base, double %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: msdb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 512
+  %f2 = load double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
+  ret double %res
+}
+
+define double @f5(double %f1, double *%base, double %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK: f5:
+; CHECK: aghi %r2, -8
+; CHECK: msdb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 -1
+  %f2 = load double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
+  ret double %res
+}
+
+define double @f6(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: msdb %f2, %f0, 0(%r1,%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 %index
+  %f2 = load double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
+  ret double %res
+}
+
+define double @f7(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK: f7:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: msdb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}})
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 511
+  %ptr = getelementptr double *%base, i64 %index2
+  %f2 = load double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
+  ret double %res
+}
+
+define double @f8(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK: f8:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: msdb %f2, %f0, 0(%r1)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 512
+  %ptr = getelementptr double *%base, i64 %index2
+  %f2 = load double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/fp-neg-01.ll b/test/CodeGen/SystemZ/fp-neg-01.ll
new file mode 100644
index 0000000..09a4a53
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-neg-01.ll
@@ -0,0 +1,38 @@
+; Test floating-point negation.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test f32.
+define float @f1(float %f) {
+; CHECK: f1:
+; CHECK: lcebr %f0, %f0
+; CHECK: br %r14
+  %res = fsub float -0.0, %f
+  ret float %res
+}
+
+; Test f64.
+define double @f2(double %f) {
+; CHECK: f2:
+; CHECK: lcdbr %f0, %f0
+; CHECK: br %r14
+  %res = fsub double -0.0, %f
+  ret double %res
+}
+
+; Test f128.  With the loads and stores, a pure negation would probably
+; be better implemented using an XI on the upper byte.  Do some extra
+; processing so that using FPRs is unequivocally better.
+define void @f3(fp128 *%ptr, fp128 *%ptr2) {
+; CHECK: f3:
+; CHECK: lcxbr
+; CHECK: dxbr
+; CHECK: br %r14
+  %orig = load fp128 *%ptr
+  %negzero = fpext float -0.0 to fp128
+  %neg = fsub fp128 0xL00000000000000008000000000000000, %orig
+  %op2 = load fp128 *%ptr2
+  %res = fdiv fp128 %neg, %op2
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-round-01.ll b/test/CodeGen/SystemZ/fp-round-01.ll
new file mode 100644
index 0000000..20325c3
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-round-01.ll
@@ -0,0 +1,36 @@
+; Test rint()-like rounding, with non-integer values triggering an
+; inexact condition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test f32.
+declare float @llvm.rint.f32(float %f)
+define float @f1(float %f) {
+; CHECK: f1:
+; CHECK: fiebr %f0, 0, %f0
+; CHECK: br %r14
+  %res = call float @llvm.rint.f32(float %f)
+  ret float %res
+}
+
+; Test f64.
+declare double @llvm.rint.f64(double %f)
+define double @f2(double %f) {
+; CHECK: f2:
+; CHECK: fidbr %f0, 0, %f0
+; CHECK: br %r14
+  %res = call double @llvm.rint.f64(double %f)
+  ret double %res
+}
+
+; Test f128.
+declare fp128 @llvm.rint.f128(fp128 %f)
+define void @f3(fp128 *%ptr) {
+; CHECK: f3:
+; CHECK: fixbr %f0, 0, %f0
+; CHECK: br %r14
+  %src = load fp128 *%ptr
+  %res = call fp128 @llvm.rint.f128(fp128 %src)
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-sqrt-01.ll b/test/CodeGen/SystemZ/fp-sqrt-01.ll
new file mode 100644
index 0000000..7ed27f5
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-sqrt-01.ll
@@ -0,0 +1,73 @@
+; Test 32-bit square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare float @llvm.sqrt.f32(float %f)
+
+; Check register square root.
+define float @f1(float %val) {
+; CHECK: f1:
+; CHECK: sqebr %f0, %f0
+; CHECK: br %r14
+  %res = call float @llvm.sqrt.f32(float %val)
+  ret float %res
+}
+
+; Check the low end of the SQEB range.
+define float @f2(float *%ptr) {
+; CHECK: f2:
+; CHECK: sqeb %f0, 0(%r2)
+; CHECK: br %r14
+  %val = load float *%ptr
+  %res = call float @llvm.sqrt.f32(float %val)
+  ret float %res
+}
+
+; Check the high end of the aligned SQEB range.
+define float @f3(float *%base) {
+; CHECK: f3:
+; CHECK: sqeb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1023
+  %val = load float *%ptr
+  %res = call float @llvm.sqrt.f32(float %val)
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float *%base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: sqeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1024
+  %val = load float *%ptr
+  %res = call float @llvm.sqrt.f32(float %val)
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float *%base) {
+; CHECK: f5:
+; CHECK: aghi %r2, -4
+; CHECK: sqeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 -1
+  %val = load float *%ptr
+  %res = call float @llvm.sqrt.f32(float %val)
+  ret float %res
+}
+
+; Check that SQEB allows indices.
+define float @f6(float *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: sqeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%base, i64 %index
+  %ptr2 = getelementptr float *%ptr1, i64 100
+  %val = load float *%ptr2
+  %res = call float @llvm.sqrt.f32(float %val)
+  ret float %res
+}
diff --git a/test/CodeGen/SystemZ/fp-sqrt-02.ll b/test/CodeGen/SystemZ/fp-sqrt-02.ll
new file mode 100644
index 0000000..22a91ad
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-sqrt-02.ll
@@ -0,0 +1,73 @@
+; Test 64-bit square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare double @llvm.sqrt.f64(double %f)
+
+; Check register square root.
+define double @f1(double %val) {
+; CHECK: f1:
+; CHECK: sqdbr %f0, %f0
+; CHECK: br %r14
+  %res = call double @llvm.sqrt.f64(double %val)
+  ret double %res
+}
+
+; Check the low end of the SQDB range.
+define double @f2(double *%ptr) {
+; CHECK: f2:
+; CHECK: sqdb %f0, 0(%r2)
+; CHECK: br %r14
+  %val = load double *%ptr
+  %res = call double @llvm.sqrt.f64(double %val)
+  ret double %res
+}
+
+; Check the high end of the aligned SQDB range.
+define double @f3(double *%base) {
+; CHECK: f3:
+; CHECK: sqdb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 511
+  %val = load double *%ptr
+  %res = call double @llvm.sqrt.f64(double %val)
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double *%base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: sqdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 512
+  %val = load double *%ptr
+  %res = call double @llvm.sqrt.f64(double %val)
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double *%base) {
+; CHECK: f5:
+; CHECK: aghi %r2, -8
+; CHECK: sqdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 -1
+  %val = load double *%ptr
+  %res = call double @llvm.sqrt.f64(double %val)
+  ret double %res
+}
+
+; Check that SQDB allows indices.
+define double @f6(double *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: sqdb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%base, i64 %index
+  %ptr2 = getelementptr double *%ptr1, i64 100
+  %val = load double *%ptr2
+  %res = call double @llvm.sqrt.f64(double %val)
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/fp-sqrt-03.ll b/test/CodeGen/SystemZ/fp-sqrt-03.ll
new file mode 100644
index 0000000..1b49af4
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-sqrt-03.ll
@@ -0,0 +1,20 @@
+; Test 128-bit square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.sqrt.f128(fp128 %f)
+
+; There's no memory form of SQXBR.
+define void @f1(fp128 *%ptr) {
+; CHECK: f1:
+; CHECK: ld %f0, 0(%r2)
+; CHECK: ld %f2, 8(%r2)
+; CHECK: sqxbr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %orig = load fp128 *%ptr
+  %sqrt = call fp128 @llvm.sqrt.f128(fp128 %orig)
+  store fp128 %sqrt, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/fp-sub-01.ll b/test/CodeGen/SystemZ/fp-sub-01.ll
new file mode 100644
index 0000000..b03f04b
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-sub-01.ll
@@ -0,0 +1,71 @@
+; Test 32-bit floating-point subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register subtraction.
+define float @f1(float %f1, float %f2) {
+; CHECK: f1:
+; CHECK: sebr %f0, %f2
+; CHECK: br %r14
+  %res = fsub float %f1, %f2
+  ret float %res
+}
+
+; Check the low end of the SEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK: f2:
+; CHECK: seb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float *%ptr
+  %res = fsub float %f1, %f2
+  ret float %res
+}
+
+; Check the high end of the aligned SEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK: f3:
+; CHECK: seb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1023
+  %f2 = load float *%ptr
+  %res = fsub float %f1, %f2
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: seb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 1024
+  %f2 = load float *%ptr
+  %res = fsub float %f1, %f2
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK: f5:
+; CHECK: aghi %r2, -4
+; CHECK: seb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float *%base, i64 -1
+  %f2 = load float *%ptr
+  %res = fsub float %f1, %f2
+  ret float %res
+}
+
+; Check that SEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: seb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float *%base, i64 %index
+  %ptr2 = getelementptr float *%ptr1, i64 100
+  %f2 = load float *%ptr2
+  %res = fsub float %f1, %f2
+  ret float %res
+}
diff --git a/test/CodeGen/SystemZ/fp-sub-02.ll b/test/CodeGen/SystemZ/fp-sub-02.ll
new file mode 100644
index 0000000..bf9848c
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-sub-02.ll
@@ -0,0 +1,71 @@
+; Test 64-bit floating-point subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register subtraction.
+define double @f1(double %f1, double %f2) {
+; CHECK: f1:
+; CHECK: sdbr %f0, %f2
+; CHECK: br %r14
+  %res = fsub double %f1, %f2
+  ret double %res
+}
+
+; Check the low end of the SDB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK: f2:
+; CHECK: sdb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load double *%ptr
+  %res = fsub double %f1, %f2
+  ret double %res
+}
+
+; Check the high end of the aligned SDB range.
+define double @f3(double %f1, double *%base) {
+; CHECK: f3:
+; CHECK: sdb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 511
+  %f2 = load double *%ptr
+  %res = fsub double %f1, %f2
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: sdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 512
+  %f2 = load double *%ptr
+  %res = fsub double %f1, %f2
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK: f5:
+; CHECK: aghi %r2, -8
+; CHECK: sdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double *%base, i64 -1
+  %f2 = load double *%ptr
+  %res = fsub double %f1, %f2
+  ret double %res
+}
+
+; Check that SDB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: sdb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double *%base, i64 %index
+  %ptr2 = getelementptr double *%ptr1, i64 100
+  %f2 = load double *%ptr2
+  %res = fsub double %f1, %f2
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/fp-sub-03.ll b/test/CodeGen/SystemZ/fp-sub-03.ll
new file mode 100644
index 0000000..82bb94d
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-sub-03.ll
@@ -0,0 +1,20 @@
+; Test 128-bit floating-point subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; There is no memory form of 128-bit subtraction.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK: f1:
+; CHECK: lxebr %f0, %f0
+; CHECK: ld %f1, 0(%r2)
+; CHECK: ld %f3, 8(%r2)
+; CHECK: sxbr %f1, %f0
+; CHECK: std %f1, 0(%r2)
+; CHECK: std %f3, 8(%r2)
+; CHECK: br %r14
+  %f1 = load fp128 *%ptr
+  %f2x = fpext float %f2 to fp128
+  %sum = fsub fp128 %f1, %f2x
+  store fp128 %sum, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-01.ll b/test/CodeGen/SystemZ/frame-01.ll
new file mode 100644
index 0000000..0d34312
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-01.ll
@@ -0,0 +1,110 @@
+; Test the allocation of frames in cases where we do not need to save
+; registers in the prologue.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; The CFA offset is 160 (the caller-allocated part of the frame) + 168.
+define void @f1(i64 %x) {
+; CHECK: f1:
+; CHECK: aghi %r15, -168
+; CHECK: .cfi_def_cfa_offset 328
+; CHECK: stg %r2, 160(%r15)
+; CHECK: aghi %r15, 168
+; CHECK: br %r14
+  %y = alloca i64, align 8
+  store volatile i64 %x, i64* %y
+  ret void
+}
+
+; Check frames of size 32760, which is the largest size that can be both
+; allocated and freed using AGHI.  This size is big enough to require
+; an emergency spill slot at 160(%r15), for instructions with unsigned
+; 12-bit offsets that end up being out of range.  Fill the remaining
+; 32760 - 168 bytes by allocating (32760 - 168) / 8 = 4074 doublewords.
+define void @f2(i64 %x) {
+; CHECK: f2:
+; CHECK: aghi %r15, -32760
+; CHECK: .cfi_def_cfa_offset 32920
+; CHECK: stg %r2, 168(%r15)
+; CHECK: aghi %r15, 32760
+; CHECK: br %r14
+  %y = alloca [4074 x i64], align 8
+  %ptr = getelementptr inbounds [4074 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %ptr
+  ret void
+}
+
+; Allocate one more doubleword.  This is the one frame size that we can
+; allocate using AGHI but must free using AGFI.
+define void @f3(i64 %x) {
+; CHECK: f3:
+; CHECK: aghi %r15, -32768
+; CHECK: .cfi_def_cfa_offset 32928
+; CHECK: stg %r2, 168(%r15)
+; CHECK: agfi %r15, 32768
+; CHECK: br %r14
+  %y = alloca [4075 x i64], align 8
+  %ptr = getelementptr inbounds [4075 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %ptr
+  ret void
+}
+
+; Allocate another doubleword on top of that.  The allocation and free
+; must both use AGFI.
+define void @f4(i64 %x) {
+; CHECK: f4:
+; CHECK: agfi %r15, -32776
+; CHECK: .cfi_def_cfa_offset 32936
+; CHECK: stg %r2, 168(%r15)
+; CHECK: agfi %r15, 32776
+; CHECK: br %r14
+  %y = alloca [4076 x i64], align 8
+  %ptr = getelementptr inbounds [4076 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %ptr
+  ret void
+}
+
+; The largest size that can be both allocated and freed using AGFI.
+; At this point the frame is too big to represent properly in the CFI.
+define void @f5(i64 %x) {
+; CHECK: f5:
+; CHECK: agfi %r15, -2147483640
+; CHECK: stg %r2, 168(%r15)
+; CHECK: agfi %r15, 2147483640
+; CHECK: br %r14
+  %y = alloca [268435434 x i64], align 8
+  %ptr = getelementptr inbounds [268435434 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %ptr
+  ret void
+}
+
+; The only frame size that can be allocated using a single AGFI but which
+; must be freed using two instructions.
+define void @f6(i64 %x) {
+; CHECK: f6:
+; CHECK: agfi %r15, -2147483648
+; CHECK: stg %r2, 168(%r15)
+; CHECK: agfi %r15, 2147483640
+; CHECK: aghi %r15, 8
+; CHECK: br %r14
+  %y = alloca [268435435 x i64], align 8
+  %ptr = getelementptr inbounds [268435435 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %ptr
+  ret void
+}
+
+; The smallest frame size that needs two instructions to both allocate
+; and free the frame.
+define void @f7(i64 %x) {
+; CHECK: f7:
+; CHECK: agfi %r15, -2147483648
+; CHECK: aghi %r15, -8
+; CHECK: stg %r2, 168(%r15)
+; CHECK: agfi %r15, 2147483640
+; CHECK: aghi %r15, 16
+; CHECK: br %r14
+  %y = alloca [268435436 x i64], align 8
+  %ptr = getelementptr inbounds [268435436 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-02.ll b/test/CodeGen/SystemZ/frame-02.ll
new file mode 100644
index 0000000..589703e
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-02.ll
@@ -0,0 +1,257 @@
+; Test saving and restoring of call-saved FPRs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This function should require all FPRs, but no other spill slots.
+; We need to save and restore 8 of the 16 FPRs, so the frame size
+; should be exactly 160 + 8 * 8 = 224.  The CFA offset is 160
+; (the caller-allocated part of the frame) + 224.
+define void @f1(float *%ptr) {
+; CHECK: f1:
+; CHECK: aghi %r15, -224
+; CHECK: .cfi_def_cfa_offset 384
+; CHECK: std %f8, 216(%r15)
+; CHECK: std %f9, 208(%r15)
+; CHECK: std %f10, 200(%r15)
+; CHECK: std %f11, 192(%r15)
+; CHECK: std %f12, 184(%r15)
+; CHECK: std %f13, 176(%r15)
+; CHECK: std %f14, 168(%r15)
+; CHECK: std %f15, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK: .cfi_offset %f9, -176
+; CHECK: .cfi_offset %f10, -184
+; CHECK: .cfi_offset %f11, -192
+; CHECK: .cfi_offset %f12, -200
+; CHECK: .cfi_offset %f13, -208
+; CHECK: .cfi_offset %f14, -216
+; CHECK: .cfi_offset %f15, -224
+; ...main function body...
+; CHECK: ld %f8, 216(%r15)
+; CHECK: ld %f9, 208(%r15)
+; CHECK: ld %f10, 200(%r15)
+; CHECK: ld %f11, 192(%r15)
+; CHECK: ld %f12, 184(%r15)
+; CHECK: ld %f13, 176(%r15)
+; CHECK: ld %f14, 168(%r15)
+; CHECK: ld %f15, 160(%r15)
+; CHECK: aghi %r15, 224
+; CHECK: br %r14
+  %l0 = load volatile float *%ptr
+  %l1 = load volatile float *%ptr
+  %l2 = load volatile float *%ptr
+  %l3 = load volatile float *%ptr
+  %l4 = load volatile float *%ptr
+  %l5 = load volatile float *%ptr
+  %l6 = load volatile float *%ptr
+  %l7 = load volatile float *%ptr
+  %l8 = load volatile float *%ptr
+  %l9 = load volatile float *%ptr
+  %l10 = load volatile float *%ptr
+  %l11 = load volatile float *%ptr
+  %l12 = load volatile float *%ptr
+  %l13 = load volatile float *%ptr
+  %l14 = load volatile float *%ptr
+  %l15 = load volatile float *%ptr
+  %add0 = fadd float %l0, %l0
+  %add1 = fadd float %l1, %add0
+  %add2 = fadd float %l2, %add1
+  %add3 = fadd float %l3, %add2
+  %add4 = fadd float %l4, %add3
+  %add5 = fadd float %l5, %add4
+  %add6 = fadd float %l6, %add5
+  %add7 = fadd float %l7, %add6
+  %add8 = fadd float %l8, %add7
+  %add9 = fadd float %l9, %add8
+  %add10 = fadd float %l10, %add9
+  %add11 = fadd float %l11, %add10
+  %add12 = fadd float %l12, %add11
+  %add13 = fadd float %l13, %add12
+  %add14 = fadd float %l14, %add13
+  %add15 = fadd float %l15, %add14
+  store volatile float %add0, float *%ptr
+  store volatile float %add1, float *%ptr
+  store volatile float %add2, float *%ptr
+  store volatile float %add3, float *%ptr
+  store volatile float %add4, float *%ptr
+  store volatile float %add5, float *%ptr
+  store volatile float %add6, float *%ptr
+  store volatile float %add7, float *%ptr
+  store volatile float %add8, float *%ptr
+  store volatile float %add9, float *%ptr
+  store volatile float %add10, float *%ptr
+  store volatile float %add11, float *%ptr
+  store volatile float %add12, float *%ptr
+  store volatile float %add13, float *%ptr
+  store volatile float %add14, float *%ptr
+  store volatile float %add15, float *%ptr
+  ret void
+}
+
+; Like f1, but requires one fewer FPR.  We allocate in numerical order,
+; so %f15 is the one that gets dropped.
+define void @f2(float *%ptr) {
+; CHECK: f2:
+; CHECK: aghi %r15, -216
+; CHECK: .cfi_def_cfa_offset 376
+; CHECK: std %f8, 208(%r15)
+; CHECK: std %f9, 200(%r15)
+; CHECK: std %f10, 192(%r15)
+; CHECK: std %f11, 184(%r15)
+; CHECK: std %f12, 176(%r15)
+; CHECK: std %f13, 168(%r15)
+; CHECK: std %f14, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK: .cfi_offset %f9, -176
+; CHECK: .cfi_offset %f10, -184
+; CHECK: .cfi_offset %f11, -192
+; CHECK: .cfi_offset %f12, -200
+; CHECK: .cfi_offset %f13, -208
+; CHECK: .cfi_offset %f14, -216
+; CHECK-NOT: %f15
+; ...main function body...
+; CHECK: ld %f8, 208(%r15)
+; CHECK: ld %f9, 200(%r15)
+; CHECK: ld %f10, 192(%r15)
+; CHECK: ld %f11, 184(%r15)
+; CHECK: ld %f12, 176(%r15)
+; CHECK: ld %f13, 168(%r15)
+; CHECK: ld %f14, 160(%r15)
+; CHECK: aghi %r15, 216
+; CHECK: br %r14
+  %l0 = load volatile float *%ptr
+  %l1 = load volatile float *%ptr
+  %l2 = load volatile float *%ptr
+  %l3 = load volatile float *%ptr
+  %l4 = load volatile float *%ptr
+  %l5 = load volatile float *%ptr
+  %l6 = load volatile float *%ptr
+  %l7 = load volatile float *%ptr
+  %l8 = load volatile float *%ptr
+  %l9 = load volatile float *%ptr
+  %l10 = load volatile float *%ptr
+  %l11 = load volatile float *%ptr
+  %l12 = load volatile float *%ptr
+  %l13 = load volatile float *%ptr
+  %l14 = load volatile float *%ptr
+  %add0 = fadd float %l0, %l0
+  %add1 = fadd float %l1, %add0
+  %add2 = fadd float %l2, %add1
+  %add3 = fadd float %l3, %add2
+  %add4 = fadd float %l4, %add3
+  %add5 = fadd float %l5, %add4
+  %add6 = fadd float %l6, %add5
+  %add7 = fadd float %l7, %add6
+  %add8 = fadd float %l8, %add7
+  %add9 = fadd float %l9, %add8
+  %add10 = fadd float %l10, %add9
+  %add11 = fadd float %l11, %add10
+  %add12 = fadd float %l12, %add11
+  %add13 = fadd float %l13, %add12
+  %add14 = fadd float %l14, %add13
+  store volatile float %add0, float *%ptr
+  store volatile float %add1, float *%ptr
+  store volatile float %add2, float *%ptr
+  store volatile float %add3, float *%ptr
+  store volatile float %add4, float *%ptr
+  store volatile float %add5, float *%ptr
+  store volatile float %add6, float *%ptr
+  store volatile float %add7, float *%ptr
+  store volatile float %add8, float *%ptr
+  store volatile float %add9, float *%ptr
+  store volatile float %add10, float *%ptr
+  store volatile float %add11, float *%ptr
+  store volatile float %add12, float *%ptr
+  store volatile float %add13, float *%ptr
+  store volatile float %add14, float *%ptr
+  ret void
+}
+
+; Like f1, but should require only one call-saved FPR.
+define void @f3(float *%ptr) {
+; CHECK: f3:
+; CHECK: aghi %r15, -168
+; CHECK: .cfi_def_cfa_offset 328
+; CHECK: std %f8, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK-NOT: %f9
+; CHECK-NOT: %f10
+; CHECK-NOT: %f11
+; CHECK-NOT: %f12
+; CHECK-NOT: %f13
+; CHECK-NOT: %f14
+; CHECK-NOT: %f15
+; ...main function body...
+; CHECK: ld %f8, 160(%r15)
+; CHECK: aghi %r15, 168
+; CHECK: br %r14
+  %l0 = load volatile float *%ptr
+  %l1 = load volatile float *%ptr
+  %l2 = load volatile float *%ptr
+  %l3 = load volatile float *%ptr
+  %l4 = load volatile float *%ptr
+  %l5 = load volatile float *%ptr
+  %l6 = load volatile float *%ptr
+  %l7 = load volatile float *%ptr
+  %l8 = load volatile float *%ptr
+  %add0 = fadd float %l0, %l0
+  %add1 = fadd float %l1, %add0
+  %add2 = fadd float %l2, %add1
+  %add3 = fadd float %l3, %add2
+  %add4 = fadd float %l4, %add3
+  %add5 = fadd float %l5, %add4
+  %add6 = fadd float %l6, %add5
+  %add7 = fadd float %l7, %add6
+  %add8 = fadd float %l8, %add7
+  store volatile float %add0, float *%ptr
+  store volatile float %add1, float *%ptr
+  store volatile float %add2, float *%ptr
+  store volatile float %add3, float *%ptr
+  store volatile float %add4, float *%ptr
+  store volatile float %add5, float *%ptr
+  store volatile float %add6, float *%ptr
+  store volatile float %add7, float *%ptr
+  store volatile float %add8, float *%ptr
+  ret void
+}
+
+; This function should use all call-clobbered FPRs but no call-saved ones.
+; It shouldn't need to create a frame.
+define void @f4(float *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: %r15
+; CHECK-NOT: %f8
+; CHECK-NOT: %f9
+; CHECK-NOT: %f10
+; CHECK-NOT: %f11
+; CHECK-NOT: %f12
+; CHECK-NOT: %f13
+; CHECK-NOT: %f14
+; CHECK-NOT: %f15
+; CHECK: br %r14
+  %l0 = load volatile float *%ptr
+  %l1 = load volatile float *%ptr
+  %l2 = load volatile float *%ptr
+  %l3 = load volatile float *%ptr
+  %l4 = load volatile float *%ptr
+  %l5 = load volatile float *%ptr
+  %l6 = load volatile float *%ptr
+  %l7 = load volatile float *%ptr
+  %add0 = fadd float %l0, %l0
+  %add1 = fadd float %l1, %add0
+  %add2 = fadd float %l2, %add1
+  %add3 = fadd float %l3, %add2
+  %add4 = fadd float %l4, %add3
+  %add5 = fadd float %l5, %add4
+  %add6 = fadd float %l6, %add5
+  %add7 = fadd float %l7, %add6
+  store volatile float %add0, float *%ptr
+  store volatile float %add1, float *%ptr
+  store volatile float %add2, float *%ptr
+  store volatile float %add3, float *%ptr
+  store volatile float %add4, float *%ptr
+  store volatile float %add5, float *%ptr
+  store volatile float %add6, float *%ptr
+  store volatile float %add7, float *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-03.ll b/test/CodeGen/SystemZ/frame-03.ll
new file mode 100644
index 0000000..3c4a499
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-03.ll
@@ -0,0 +1,259 @@
+; Like frame-02.ll, but with doubles rather than floats.  Internally this
+; uses a different register class, but the set of saved and restored
+; registers should be the same.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This function should require all FPRs, but no other spill slots.
+; We need to save and restore 8 of the 16 FPRs, so the frame size
+; should be exactly 160 + 8 * 8 = 224.  The CFA offset is 160
+; (the caller-allocated part of the frame) + 224.
+define void @f1(double *%ptr) {
+; CHECK: f1:
+; CHECK: aghi %r15, -224
+; CHECK: .cfi_def_cfa_offset 384
+; CHECK: std %f8, 216(%r15)
+; CHECK: std %f9, 208(%r15)
+; CHECK: std %f10, 200(%r15)
+; CHECK: std %f11, 192(%r15)
+; CHECK: std %f12, 184(%r15)
+; CHECK: std %f13, 176(%r15)
+; CHECK: std %f14, 168(%r15)
+; CHECK: std %f15, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK: .cfi_offset %f9, -176
+; CHECK: .cfi_offset %f10, -184
+; CHECK: .cfi_offset %f11, -192
+; CHECK: .cfi_offset %f12, -200
+; CHECK: .cfi_offset %f13, -208
+; CHECK: .cfi_offset %f14, -216
+; CHECK: .cfi_offset %f15, -224
+; ...main function body...
+; CHECK: ld %f8, 216(%r15)
+; CHECK: ld %f9, 208(%r15)
+; CHECK: ld %f10, 200(%r15)
+; CHECK: ld %f11, 192(%r15)
+; CHECK: ld %f12, 184(%r15)
+; CHECK: ld %f13, 176(%r15)
+; CHECK: ld %f14, 168(%r15)
+; CHECK: ld %f15, 160(%r15)
+; CHECK: aghi %r15, 224
+; CHECK: br %r14
+  %l0 = load volatile double *%ptr
+  %l1 = load volatile double *%ptr
+  %l2 = load volatile double *%ptr
+  %l3 = load volatile double *%ptr
+  %l4 = load volatile double *%ptr
+  %l5 = load volatile double *%ptr
+  %l6 = load volatile double *%ptr
+  %l7 = load volatile double *%ptr
+  %l8 = load volatile double *%ptr
+  %l9 = load volatile double *%ptr
+  %l10 = load volatile double *%ptr
+  %l11 = load volatile double *%ptr
+  %l12 = load volatile double *%ptr
+  %l13 = load volatile double *%ptr
+  %l14 = load volatile double *%ptr
+  %l15 = load volatile double *%ptr
+  %add0 = fadd double %l0, %l0
+  %add1 = fadd double %l1, %add0
+  %add2 = fadd double %l2, %add1
+  %add3 = fadd double %l3, %add2
+  %add4 = fadd double %l4, %add3
+  %add5 = fadd double %l5, %add4
+  %add6 = fadd double %l6, %add5
+  %add7 = fadd double %l7, %add6
+  %add8 = fadd double %l8, %add7
+  %add9 = fadd double %l9, %add8
+  %add10 = fadd double %l10, %add9
+  %add11 = fadd double %l11, %add10
+  %add12 = fadd double %l12, %add11
+  %add13 = fadd double %l13, %add12
+  %add14 = fadd double %l14, %add13
+  %add15 = fadd double %l15, %add14
+  store volatile double %add0, double *%ptr
+  store volatile double %add1, double *%ptr
+  store volatile double %add2, double *%ptr
+  store volatile double %add3, double *%ptr
+  store volatile double %add4, double *%ptr
+  store volatile double %add5, double *%ptr
+  store volatile double %add6, double *%ptr
+  store volatile double %add7, double *%ptr
+  store volatile double %add8, double *%ptr
+  store volatile double %add9, double *%ptr
+  store volatile double %add10, double *%ptr
+  store volatile double %add11, double *%ptr
+  store volatile double %add12, double *%ptr
+  store volatile double %add13, double *%ptr
+  store volatile double %add14, double *%ptr
+  store volatile double %add15, double *%ptr
+  ret void
+}
+
+; Like f1, but requires one fewer FPR.  We allocate in numerical order,
+; so %f15 is the one that gets dropped.
+define void @f2(double *%ptr) {
+; CHECK: f2:
+; CHECK: aghi %r15, -216
+; CHECK: .cfi_def_cfa_offset 376
+; CHECK: std %f8, 208(%r15)
+; CHECK: std %f9, 200(%r15)
+; CHECK: std %f10, 192(%r15)
+; CHECK: std %f11, 184(%r15)
+; CHECK: std %f12, 176(%r15)
+; CHECK: std %f13, 168(%r15)
+; CHECK: std %f14, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK: .cfi_offset %f9, -176
+; CHECK: .cfi_offset %f10, -184
+; CHECK: .cfi_offset %f11, -192
+; CHECK: .cfi_offset %f12, -200
+; CHECK: .cfi_offset %f13, -208
+; CHECK: .cfi_offset %f14, -216
+; CHECK-NOT: %f15
+; ...main function body...
+; CHECK: ld %f8, 208(%r15)
+; CHECK: ld %f9, 200(%r15)
+; CHECK: ld %f10, 192(%r15)
+; CHECK: ld %f11, 184(%r15)
+; CHECK: ld %f12, 176(%r15)
+; CHECK: ld %f13, 168(%r15)
+; CHECK: ld %f14, 160(%r15)
+; CHECK: aghi %r15, 216
+; CHECK: br %r14
+  %l0 = load volatile double *%ptr
+  %l1 = load volatile double *%ptr
+  %l2 = load volatile double *%ptr
+  %l3 = load volatile double *%ptr
+  %l4 = load volatile double *%ptr
+  %l5 = load volatile double *%ptr
+  %l6 = load volatile double *%ptr
+  %l7 = load volatile double *%ptr
+  %l8 = load volatile double *%ptr
+  %l9 = load volatile double *%ptr
+  %l10 = load volatile double *%ptr
+  %l11 = load volatile double *%ptr
+  %l12 = load volatile double *%ptr
+  %l13 = load volatile double *%ptr
+  %l14 = load volatile double *%ptr
+  %add0 = fadd double %l0, %l0
+  %add1 = fadd double %l1, %add0
+  %add2 = fadd double %l2, %add1
+  %add3 = fadd double %l3, %add2
+  %add4 = fadd double %l4, %add3
+  %add5 = fadd double %l5, %add4
+  %add6 = fadd double %l6, %add5
+  %add7 = fadd double %l7, %add6
+  %add8 = fadd double %l8, %add7
+  %add9 = fadd double %l9, %add8
+  %add10 = fadd double %l10, %add9
+  %add11 = fadd double %l11, %add10
+  %add12 = fadd double %l12, %add11
+  %add13 = fadd double %l13, %add12
+  %add14 = fadd double %l14, %add13
+  store volatile double %add0, double *%ptr
+  store volatile double %add1, double *%ptr
+  store volatile double %add2, double *%ptr
+  store volatile double %add3, double *%ptr
+  store volatile double %add4, double *%ptr
+  store volatile double %add5, double *%ptr
+  store volatile double %add6, double *%ptr
+  store volatile double %add7, double *%ptr
+  store volatile double %add8, double *%ptr
+  store volatile double %add9, double *%ptr
+  store volatile double %add10, double *%ptr
+  store volatile double %add11, double *%ptr
+  store volatile double %add12, double *%ptr
+  store volatile double %add13, double *%ptr
+  store volatile double %add14, double *%ptr
+  ret void
+}
+
+; Like f1, but should require only one call-saved FPR.
+define void @f3(double *%ptr) {
+; CHECK: f3:
+; CHECK: aghi %r15, -168
+; CHECK: .cfi_def_cfa_offset 328
+; CHECK: std %f8, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK-NOT: %f9
+; CHECK-NOT: %f10
+; CHECK-NOT: %f11
+; CHECK-NOT: %f12
+; CHECK-NOT: %f13
+; CHECK-NOT: %f14
+; CHECK-NOT: %f15
+; ...main function body...
+; CHECK: ld %f8, 160(%r15)
+; CHECK: aghi %r15, 168
+; CHECK: br %r14
+  %l0 = load volatile double *%ptr
+  %l1 = load volatile double *%ptr
+  %l2 = load volatile double *%ptr
+  %l3 = load volatile double *%ptr
+  %l4 = load volatile double *%ptr
+  %l5 = load volatile double *%ptr
+  %l6 = load volatile double *%ptr
+  %l7 = load volatile double *%ptr
+  %l8 = load volatile double *%ptr
+  %add0 = fadd double %l0, %l0
+  %add1 = fadd double %l1, %add0
+  %add2 = fadd double %l2, %add1
+  %add3 = fadd double %l3, %add2
+  %add4 = fadd double %l4, %add3
+  %add5 = fadd double %l5, %add4
+  %add6 = fadd double %l6, %add5
+  %add7 = fadd double %l7, %add6
+  %add8 = fadd double %l8, %add7
+  store volatile double %add0, double *%ptr
+  store volatile double %add1, double *%ptr
+  store volatile double %add2, double *%ptr
+  store volatile double %add3, double *%ptr
+  store volatile double %add4, double *%ptr
+  store volatile double %add5, double *%ptr
+  store volatile double %add6, double *%ptr
+  store volatile double %add7, double *%ptr
+  store volatile double %add8, double *%ptr
+  ret void
+}
+
+; This function should use all call-clobbered FPRs but no call-saved ones.
+; It shouldn't need to create a frame.
+define void @f4(double *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: %r15
+; CHECK-NOT: %f8
+; CHECK-NOT: %f9
+; CHECK-NOT: %f10
+; CHECK-NOT: %f11
+; CHECK-NOT: %f12
+; CHECK-NOT: %f13
+; CHECK-NOT: %f14
+; CHECK-NOT: %f15
+; CHECK: br %r14
+  %l0 = load volatile double *%ptr
+  %l1 = load volatile double *%ptr
+  %l2 = load volatile double *%ptr
+  %l3 = load volatile double *%ptr
+  %l4 = load volatile double *%ptr
+  %l5 = load volatile double *%ptr
+  %l6 = load volatile double *%ptr
+  %l7 = load volatile double *%ptr
+  %add0 = fadd double %l0, %l0
+  %add1 = fadd double %l1, %add0
+  %add2 = fadd double %l2, %add1
+  %add3 = fadd double %l3, %add2
+  %add4 = fadd double %l4, %add3
+  %add5 = fadd double %l5, %add4
+  %add6 = fadd double %l6, %add5
+  %add7 = fadd double %l7, %add6
+  store volatile double %add0, double *%ptr
+  store volatile double %add1, double *%ptr
+  store volatile double %add2, double *%ptr
+  store volatile double %add3, double *%ptr
+  store volatile double %add4, double *%ptr
+  store volatile double %add5, double *%ptr
+  store volatile double %add6, double *%ptr
+  store volatile double %add7, double *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-04.ll b/test/CodeGen/SystemZ/frame-04.ll
new file mode 100644
index 0000000..360f85c
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-04.ll
@@ -0,0 +1,187 @@
+; Like frame-02.ll, but with long doubles rather than floats.  Some of the
+; cases are slightly different because we need to allocate pairs of FPRs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This function should require all FPRs, but no other spill slots.
+; We need to save and restore 8 of the 16 FPRs, so the frame size
+; should be exactly 160 + 8 * 8 = 224.  The CFA offset is 160
+; (the caller-allocated part of the frame) + 224.
+define void @f1(fp128 *%ptr) {
+; CHECK: f1:
+; CHECK: aghi %r15, -224
+; CHECK: .cfi_def_cfa_offset 384
+; CHECK: std %f8, 216(%r15)
+; CHECK: std %f9, 208(%r15)
+; CHECK: std %f10, 200(%r15)
+; CHECK: std %f11, 192(%r15)
+; CHECK: std %f12, 184(%r15)
+; CHECK: std %f13, 176(%r15)
+; CHECK: std %f14, 168(%r15)
+; CHECK: std %f15, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK: .cfi_offset %f9, -176
+; CHECK: .cfi_offset %f10, -184
+; CHECK: .cfi_offset %f11, -192
+; CHECK: .cfi_offset %f12, -200
+; CHECK: .cfi_offset %f13, -208
+; CHECK: .cfi_offset %f14, -216
+; CHECK: .cfi_offset %f15, -224
+; ...main function body...
+; CHECK: ld %f8, 216(%r15)
+; CHECK: ld %f9, 208(%r15)
+; CHECK: ld %f10, 200(%r15)
+; CHECK: ld %f11, 192(%r15)
+; CHECK: ld %f12, 184(%r15)
+; CHECK: ld %f13, 176(%r15)
+; CHECK: ld %f14, 168(%r15)
+; CHECK: ld %f15, 160(%r15)
+; CHECK: aghi %r15, 224
+; CHECK: br %r14
+  %l0 = load volatile fp128 *%ptr
+  %l1 = load volatile fp128 *%ptr
+  %l4 = load volatile fp128 *%ptr
+  %l5 = load volatile fp128 *%ptr
+  %l8 = load volatile fp128 *%ptr
+  %l9 = load volatile fp128 *%ptr
+  %l12 = load volatile fp128 *%ptr
+  %l13 = load volatile fp128 *%ptr
+  %add0 = fadd fp128 %l0, %l0
+  %add1 = fadd fp128 %l1, %add0
+  %add4 = fadd fp128 %l4, %add1
+  %add5 = fadd fp128 %l5, %add4
+  %add8 = fadd fp128 %l8, %add5
+  %add9 = fadd fp128 %l9, %add8
+  %add12 = fadd fp128 %l12, %add9
+  %add13 = fadd fp128 %l13, %add12
+  store volatile fp128 %add0, fp128 *%ptr
+  store volatile fp128 %add1, fp128 *%ptr
+  store volatile fp128 %add4, fp128 *%ptr
+  store volatile fp128 %add5, fp128 *%ptr
+  store volatile fp128 %add8, fp128 *%ptr
+  store volatile fp128 %add9, fp128 *%ptr
+  store volatile fp128 %add12, fp128 *%ptr
+  store volatile fp128 %add13, fp128 *%ptr
+  ret void
+}
+
+; Like f1, but requires one fewer FPR pair.  We allocate in numerical order,
+; so %f13+%f15 is the pair that gets dropped.
+define void @f2(fp128 *%ptr) {
+; CHECK: f2:
+; CHECK: aghi %r15, -208
+; CHECK: .cfi_def_cfa_offset 368
+; CHECK: std %f8, 200(%r15)
+; CHECK: std %f9, 192(%r15)
+; CHECK: std %f10, 184(%r15)
+; CHECK: std %f11, 176(%r15)
+; CHECK: std %f12, 168(%r15)
+; CHECK: std %f14, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK: .cfi_offset %f9, -176
+; CHECK: .cfi_offset %f10, -184
+; CHECK: .cfi_offset %f11, -192
+; CHECK: .cfi_offset %f12, -200
+; CHECK: .cfi_offset %f14, -208
+; CHECK-NOT: %f13
+; CHECK-NOT: %f15
+; ...main function body...
+; CHECK: ld %f8, 200(%r15)
+; CHECK: ld %f9, 192(%r15)
+; CHECK: ld %f10, 184(%r15)
+; CHECK: ld %f11, 176(%r15)
+; CHECK: ld %f12, 168(%r15)
+; CHECK: ld %f14, 160(%r15)
+; CHECK: aghi %r15, 208
+; CHECK: br %r14
+  %l0 = load volatile fp128 *%ptr
+  %l1 = load volatile fp128 *%ptr
+  %l4 = load volatile fp128 *%ptr
+  %l5 = load volatile fp128 *%ptr
+  %l8 = load volatile fp128 *%ptr
+  %l9 = load volatile fp128 *%ptr
+  %l12 = load volatile fp128 *%ptr
+  %add0 = fadd fp128 %l0, %l0
+  %add1 = fadd fp128 %l1, %add0
+  %add4 = fadd fp128 %l4, %add1
+  %add5 = fadd fp128 %l5, %add4
+  %add8 = fadd fp128 %l8, %add5
+  %add9 = fadd fp128 %l9, %add8
+  %add12 = fadd fp128 %l12, %add9
+  store volatile fp128 %add0, fp128 *%ptr
+  store volatile fp128 %add1, fp128 *%ptr
+  store volatile fp128 %add4, fp128 *%ptr
+  store volatile fp128 %add5, fp128 *%ptr
+  store volatile fp128 %add8, fp128 *%ptr
+  store volatile fp128 %add9, fp128 *%ptr
+  store volatile fp128 %add12, fp128 *%ptr
+  ret void
+}
+
+; Like f1, but requires only one call-saved FPR pair.  We allocate in
+; numerical order so the pair should be %f8+%f10.
+define void @f3(fp128 *%ptr) {
+; CHECK: f3:
+; CHECK: aghi %r15, -176
+; CHECK: .cfi_def_cfa_offset 336
+; CHECK: std %f8, 168(%r15)
+; CHECK: std %f10, 160(%r15)
+; CHECK: .cfi_offset %f8, -168
+; CHECK: .cfi_offset %f10, -176
+; CHECK-NOT: %f9
+; CHECK-NOT: %f11
+; CHECK-NOT: %f12
+; CHECK-NOT: %f13
+; CHECK-NOT: %f14
+; CHECK-NOT: %f15
+; ...main function body...
+; CHECK: ld %f8, 168(%r15)
+; CHECK: ld %f10, 160(%r15)
+; CHECK: aghi %r15, 176
+; CHECK: br %r14
+  %l0 = load volatile fp128 *%ptr
+  %l1 = load volatile fp128 *%ptr
+  %l4 = load volatile fp128 *%ptr
+  %l5 = load volatile fp128 *%ptr
+  %l8 = load volatile fp128 *%ptr
+  %add0 = fadd fp128 %l0, %l0
+  %add1 = fadd fp128 %l1, %add0
+  %add4 = fadd fp128 %l4, %add1
+  %add5 = fadd fp128 %l5, %add4
+  %add8 = fadd fp128 %l8, %add5
+  store volatile fp128 %add0, fp128 *%ptr
+  store volatile fp128 %add1, fp128 *%ptr
+  store volatile fp128 %add4, fp128 *%ptr
+  store volatile fp128 %add5, fp128 *%ptr
+  store volatile fp128 %add8, fp128 *%ptr
+  ret void
+}
+
+; This function should use all call-clobbered FPRs but no call-saved ones.
+; It shouldn't need to create a frame.
+define void @f4(fp128 *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: %r15
+; CHECK-NOT: %f8
+; CHECK-NOT: %f9
+; CHECK-NOT: %f10
+; CHECK-NOT: %f11
+; CHECK-NOT: %f12
+; CHECK-NOT: %f13
+; CHECK-NOT: %f14
+; CHECK-NOT: %f15
+; CHECK: br %r14
+  %l0 = load volatile fp128 *%ptr
+  %l1 = load volatile fp128 *%ptr
+  %l4 = load volatile fp128 *%ptr
+  %l5 = load volatile fp128 *%ptr
+  %add0 = fadd fp128 %l0, %l0
+  %add1 = fadd fp128 %l1, %add0
+  %add4 = fadd fp128 %l4, %add1
+  %add5 = fadd fp128 %l5, %add4
+  store volatile fp128 %add0, fp128 *%ptr
+  store volatile fp128 %add1, fp128 *%ptr
+  store volatile fp128 %add4, fp128 *%ptr
+  store volatile fp128 %add5, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-05.ll b/test/CodeGen/SystemZ/frame-05.ll
new file mode 100644
index 0000000..3a159fc
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-05.ll
@@ -0,0 +1,219 @@
+; Test saving and restoring of call-saved GPRs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This function should require all GPRs, but no other spill slots.  The caller
+; allocates room for the GPR save slots, so we shouldn't need to allocate any
+; extra space.
+;
+; The function only modifies the low 32 bits of each register, which in
+; itself would allow STM and LM to be used instead of STMG and LMG.
+; However, the ABI defines the offset of each register, so we always
+; use the 64-bit form.
+;
+; Use a different address for the final store, so that we can check that
+; %r15 isn't referenced again until after that.
+define void @f1(i32 *%ptr) {
+; CHECK: f1:
+; CHECK: stmg %r6, %r15, 48(%r15)
+; CHECK-NOT: %r15
+; CHECK: .cfi_offset %r6, -112
+; CHECK: .cfi_offset %r7, -104
+; CHECK: .cfi_offset %r8, -96
+; CHECK: .cfi_offset %r9, -88
+; CHECK: .cfi_offset %r10, -80
+; CHECK: .cfi_offset %r11, -72
+; CHECK: .cfi_offset %r12, -64
+; CHECK: .cfi_offset %r13, -56
+; CHECK: .cfi_offset %r14, -48
+; CHECK: .cfi_offset %r15, -40
+; ...main function body...
+; CHECK-NOT: %r15
+; CHECK: st {{.*}}, 4(%r2)
+; CHECK: lmg %r6, %r15, 48(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i32 *%ptr
+  %l1 = load volatile i32 *%ptr
+  %l3 = load volatile i32 *%ptr
+  %l4 = load volatile i32 *%ptr
+  %l5 = load volatile i32 *%ptr
+  %l6 = load volatile i32 *%ptr
+  %l7 = load volatile i32 *%ptr
+  %l8 = load volatile i32 *%ptr
+  %l9 = load volatile i32 *%ptr
+  %l10 = load volatile i32 *%ptr
+  %l11 = load volatile i32 *%ptr
+  %l12 = load volatile i32 *%ptr
+  %l13 = load volatile i32 *%ptr
+  %l14 = load volatile i32 *%ptr
+  %add0 = add i32 %l0, %l0
+  %add1 = add i32 %l1, %add0
+  %add3 = add i32 %l3, %add1
+  %add4 = add i32 %l4, %add3
+  %add5 = add i32 %l5, %add4
+  %add6 = add i32 %l6, %add5
+  %add7 = add i32 %l7, %add6
+  %add8 = add i32 %l8, %add7
+  %add9 = add i32 %l9, %add8
+  %add10 = add i32 %l10, %add9
+  %add11 = add i32 %l11, %add10
+  %add12 = add i32 %l12, %add11
+  %add13 = add i32 %l13, %add12
+  %add14 = add i32 %l14, %add13
+  store volatile i32 %add0, i32 *%ptr
+  store volatile i32 %add1, i32 *%ptr
+  store volatile i32 %add3, i32 *%ptr
+  store volatile i32 %add4, i32 *%ptr
+  store volatile i32 %add5, i32 *%ptr
+  store volatile i32 %add6, i32 *%ptr
+  store volatile i32 %add7, i32 *%ptr
+  store volatile i32 %add8, i32 *%ptr
+  store volatile i32 %add9, i32 *%ptr
+  store volatile i32 %add10, i32 *%ptr
+  store volatile i32 %add11, i32 *%ptr
+  store volatile i32 %add12, i32 *%ptr
+  store volatile i32 %add13, i32 *%ptr
+  %final = getelementptr i32 *%ptr, i32 1
+  store volatile i32 %add14, i32 *%final
+  ret void
+}
+
+; Like f1, but requires one fewer GPR.  We allocate the call-saved GPRs
+; from %r14 down, so that the STMG/LMG sequences aren't any longer than
+; they need to be.
+define void @f2(i32 *%ptr) {
+; CHECK: f2:
+; CHECK: stmg %r7, %r15, 56(%r15)
+; CHECK-NOT: %r15
+; CHECK: .cfi_offset %r7, -104
+; CHECK: .cfi_offset %r8, -96
+; CHECK: .cfi_offset %r9, -88
+; CHECK: .cfi_offset %r10, -80
+; CHECK: .cfi_offset %r11, -72
+; CHECK: .cfi_offset %r12, -64
+; CHECK: .cfi_offset %r13, -56
+; CHECK: .cfi_offset %r14, -48
+; CHECK: .cfi_offset %r15, -40
+; ...main function body...
+; CHECK-NOT: %r15
+; CHECK-NOT: %r6
+; CHECK: st {{.*}}, 4(%r2)
+; CHECK: lmg %r7, %r15, 56(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i32 *%ptr
+  %l1 = load volatile i32 *%ptr
+  %l3 = load volatile i32 *%ptr
+  %l4 = load volatile i32 *%ptr
+  %l5 = load volatile i32 *%ptr
+  %l7 = load volatile i32 *%ptr
+  %l8 = load volatile i32 *%ptr
+  %l9 = load volatile i32 *%ptr
+  %l10 = load volatile i32 *%ptr
+  %l11 = load volatile i32 *%ptr
+  %l12 = load volatile i32 *%ptr
+  %l13 = load volatile i32 *%ptr
+  %l14 = load volatile i32 *%ptr
+  %add0 = add i32 %l0, %l0
+  %add1 = add i32 %l1, %add0
+  %add3 = add i32 %l3, %add1
+  %add4 = add i32 %l4, %add3
+  %add5 = add i32 %l5, %add4
+  %add7 = add i32 %l7, %add5
+  %add8 = add i32 %l8, %add7
+  %add9 = add i32 %l9, %add8
+  %add10 = add i32 %l10, %add9
+  %add11 = add i32 %l11, %add10
+  %add12 = add i32 %l12, %add11
+  %add13 = add i32 %l13, %add12
+  %add14 = add i32 %l14, %add13
+  store volatile i32 %add0, i32 *%ptr
+  store volatile i32 %add1, i32 *%ptr
+  store volatile i32 %add3, i32 *%ptr
+  store volatile i32 %add4, i32 *%ptr
+  store volatile i32 %add5, i32 *%ptr
+  store volatile i32 %add7, i32 *%ptr
+  store volatile i32 %add8, i32 *%ptr
+  store volatile i32 %add9, i32 *%ptr
+  store volatile i32 %add10, i32 *%ptr
+  store volatile i32 %add11, i32 *%ptr
+  store volatile i32 %add12, i32 *%ptr
+  store volatile i32 %add13, i32 *%ptr
+  %final = getelementptr i32 *%ptr, i32 1
+  store volatile i32 %add14, i32 *%final
+  ret void
+}
+
+; Like f1, but only needs one call-saved GPR, which ought to be %r14.
+define void @f3(i32 *%ptr) {
+; CHECK: f3:
+; CHECK: stmg %r14, %r15, 112(%r15)
+; CHECK-NOT: %r15
+; CHECK: .cfi_offset %r14, -48
+; CHECK: .cfi_offset %r15, -40
+; ...main function body...
+; CHECK-NOT: %r15
+; CHECK-NOT: %r6
+; CHECK-NOT: %r7
+; CHECK-NOT: %r8
+; CHECK-NOT: %r9
+; CHECK-NOT: %r10
+; CHECK-NOT: %r11
+; CHECK-NOT: %r12
+; CHECK-NOT: %r13
+; CHECK: st {{.*}}, 4(%r2)
+; CHECK: lmg %r14, %r15, 112(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i32 *%ptr
+  %l1 = load volatile i32 *%ptr
+  %l3 = load volatile i32 *%ptr
+  %l4 = load volatile i32 *%ptr
+  %l5 = load volatile i32 *%ptr
+  %l14 = load volatile i32 *%ptr
+  %add0 = add i32 %l0, %l0
+  %add1 = add i32 %l1, %add0
+  %add3 = add i32 %l3, %add1
+  %add4 = add i32 %l4, %add3
+  %add5 = add i32 %l5, %add4
+  %add14 = add i32 %l14, %add5
+  store volatile i32 %add0, i32 *%ptr
+  store volatile i32 %add1, i32 *%ptr
+  store volatile i32 %add3, i32 *%ptr
+  store volatile i32 %add4, i32 *%ptr
+  store volatile i32 %add5, i32 *%ptr
+  %final = getelementptr i32 *%ptr, i32 1
+  store volatile i32 %add14, i32 *%final
+  ret void
+}
+
+; This function should use all call-clobbered GPRs but no call-saved ones.
+; It shouldn't need to touch the stack at all.
+define void @f4(i32 *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: %r15
+; CHECK-NOT: %r6
+; CHECK-NOT: %r7
+; CHECK-NOT: %r8
+; CHECK-NOT: %r9
+; CHECK-NOT: %r10
+; CHECK-NOT: %r11
+; CHECK-NOT: %r12
+; CHECK-NOT: %r13
+; CHECK: br %r14
+  %l0 = load volatile i32 *%ptr
+  %l1 = load volatile i32 *%ptr
+  %l3 = load volatile i32 *%ptr
+  %l4 = load volatile i32 *%ptr
+  %l5 = load volatile i32 *%ptr
+  %add0 = add i32 %l0, %l0
+  %add1 = add i32 %l1, %add0
+  %add3 = add i32 %l3, %add1
+  %add4 = add i32 %l4, %add3
+  %add5 = add i32 %l5, %add4
+  store volatile i32 %add0, i32 *%ptr
+  store volatile i32 %add1, i32 *%ptr
+  store volatile i32 %add3, i32 *%ptr
+  store volatile i32 %add4, i32 *%ptr
+  %final = getelementptr i32 *%ptr, i32 1
+  store volatile i32 %add5, i32 *%final
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-06.ll b/test/CodeGen/SystemZ/frame-06.ll
new file mode 100644
index 0000000..4c361f1
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-06.ll
@@ -0,0 +1,216 @@
+; Like frame-05.ll, but with i64s rather than i32s.  Internally this
+; uses a different register class, but the set of saved and restored
+; registers should be the same.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This function should require all GPRs, but no other spill slots.  The caller
+; allocates room for the GPR save slots, so we shouldn't need to allocate any
+; extra space.
+;
+; Use a different address for the final store, so that we can check that
+; %r15 isn't referenced again until after that.
+define void @f1(i64 *%ptr) {
+; CHECK: f1:
+; CHECK: stmg %r6, %r15, 48(%r15)
+; CHECK-NOT: %r15
+; CHECK: .cfi_offset %r6, -112
+; CHECK: .cfi_offset %r7, -104
+; CHECK: .cfi_offset %r8, -96
+; CHECK: .cfi_offset %r9, -88
+; CHECK: .cfi_offset %r10, -80
+; CHECK: .cfi_offset %r11, -72
+; CHECK: .cfi_offset %r12, -64
+; CHECK: .cfi_offset %r13, -56
+; CHECK: .cfi_offset %r14, -48
+; CHECK: .cfi_offset %r15, -40
+; ...main function body...
+; CHECK-NOT: %r15
+; CHECK: stg {{.*}}, 8(%r2)
+; CHECK: lmg %r6, %r15, 48(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i64 *%ptr
+  %l1 = load volatile i64 *%ptr
+  %l3 = load volatile i64 *%ptr
+  %l4 = load volatile i64 *%ptr
+  %l5 = load volatile i64 *%ptr
+  %l6 = load volatile i64 *%ptr
+  %l7 = load volatile i64 *%ptr
+  %l8 = load volatile i64 *%ptr
+  %l9 = load volatile i64 *%ptr
+  %l10 = load volatile i64 *%ptr
+  %l11 = load volatile i64 *%ptr
+  %l12 = load volatile i64 *%ptr
+  %l13 = load volatile i64 *%ptr
+  %l14 = load volatile i64 *%ptr
+  %add0 = add i64 %l0, %l0
+  %add1 = add i64 %l1, %add0
+  %add3 = add i64 %l3, %add1
+  %add4 = add i64 %l4, %add3
+  %add5 = add i64 %l5, %add4
+  %add6 = add i64 %l6, %add5
+  %add7 = add i64 %l7, %add6
+  %add8 = add i64 %l8, %add7
+  %add9 = add i64 %l9, %add8
+  %add10 = add i64 %l10, %add9
+  %add11 = add i64 %l11, %add10
+  %add12 = add i64 %l12, %add11
+  %add13 = add i64 %l13, %add12
+  %add14 = add i64 %l14, %add13
+  store volatile i64 %add0, i64 *%ptr
+  store volatile i64 %add1, i64 *%ptr
+  store volatile i64 %add3, i64 *%ptr
+  store volatile i64 %add4, i64 *%ptr
+  store volatile i64 %add5, i64 *%ptr
+  store volatile i64 %add6, i64 *%ptr
+  store volatile i64 %add7, i64 *%ptr
+  store volatile i64 %add8, i64 *%ptr
+  store volatile i64 %add9, i64 *%ptr
+  store volatile i64 %add10, i64 *%ptr
+  store volatile i64 %add11, i64 *%ptr
+  store volatile i64 %add12, i64 *%ptr
+  store volatile i64 %add13, i64 *%ptr
+  %final = getelementptr i64 *%ptr, i64 1
+  store volatile i64 %add14, i64 *%final
+  ret void
+}
+
+; Like f1, but requires one fewer GPR.  We allocate the call-saved GPRs
+; from %r14 down, so that the STMG/LMG sequences aren't any longer than
+; they need to be.
+define void @f2(i64 *%ptr) {
+; CHECK: f2:
+; CHECK: stmg %r7, %r15, 56(%r15)
+; CHECK-NOT: %r15
+; CHECK: .cfi_offset %r7, -104
+; CHECK: .cfi_offset %r8, -96
+; CHECK: .cfi_offset %r9, -88
+; CHECK: .cfi_offset %r10, -80
+; CHECK: .cfi_offset %r11, -72
+; CHECK: .cfi_offset %r12, -64
+; CHECK: .cfi_offset %r13, -56
+; CHECK: .cfi_offset %r14, -48
+; CHECK: .cfi_offset %r15, -40
+; ...main function body...
+; CHECK-NOT: %r15
+; CHECK-NOT: %r6
+; CHECK: stg {{.*}}, 8(%r2)
+; CHECK: lmg %r7, %r15, 56(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i64 *%ptr
+  %l1 = load volatile i64 *%ptr
+  %l3 = load volatile i64 *%ptr
+  %l4 = load volatile i64 *%ptr
+  %l5 = load volatile i64 *%ptr
+  %l7 = load volatile i64 *%ptr
+  %l8 = load volatile i64 *%ptr
+  %l9 = load volatile i64 *%ptr
+  %l10 = load volatile i64 *%ptr
+  %l11 = load volatile i64 *%ptr
+  %l12 = load volatile i64 *%ptr
+  %l13 = load volatile i64 *%ptr
+  %l14 = load volatile i64 *%ptr
+  %add0 = add i64 %l0, %l0
+  %add1 = add i64 %l1, %add0
+  %add3 = add i64 %l3, %add1
+  %add4 = add i64 %l4, %add3
+  %add5 = add i64 %l5, %add4
+  %add7 = add i64 %l7, %add5
+  %add8 = add i64 %l8, %add7
+  %add9 = add i64 %l9, %add8
+  %add10 = add i64 %l10, %add9
+  %add11 = add i64 %l11, %add10
+  %add12 = add i64 %l12, %add11
+  %add13 = add i64 %l13, %add12
+  %add14 = add i64 %l14, %add13
+  store volatile i64 %add0, i64 *%ptr
+  store volatile i64 %add1, i64 *%ptr
+  store volatile i64 %add3, i64 *%ptr
+  store volatile i64 %add4, i64 *%ptr
+  store volatile i64 %add5, i64 *%ptr
+  store volatile i64 %add7, i64 *%ptr
+  store volatile i64 %add8, i64 *%ptr
+  store volatile i64 %add9, i64 *%ptr
+  store volatile i64 %add10, i64 *%ptr
+  store volatile i64 %add11, i64 *%ptr
+  store volatile i64 %add12, i64 *%ptr
+  store volatile i64 %add13, i64 *%ptr
+  %final = getelementptr i64 *%ptr, i64 1
+  store volatile i64 %add14, i64 *%final
+  ret void
+}
+
+; Like f1, but only needs one call-saved GPR, which ought to be %r14.
+define void @f3(i64 *%ptr) {
+; CHECK: f3:
+; CHECK: stmg %r14, %r15, 112(%r15)
+; CHECK-NOT: %r15
+; CHECK: .cfi_offset %r14, -48
+; CHECK: .cfi_offset %r15, -40
+; ...main function body...
+; CHECK-NOT: %r15
+; CHECK-NOT: %r6
+; CHECK-NOT: %r7
+; CHECK-NOT: %r8
+; CHECK-NOT: %r9
+; CHECK-NOT: %r10
+; CHECK-NOT: %r11
+; CHECK-NOT: %r12
+; CHECK-NOT: %r13
+; CHECK: stg {{.*}}, 8(%r2)
+; CHECK: lmg %r14, %r15, 112(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i64 *%ptr
+  %l1 = load volatile i64 *%ptr
+  %l3 = load volatile i64 *%ptr
+  %l4 = load volatile i64 *%ptr
+  %l5 = load volatile i64 *%ptr
+  %l14 = load volatile i64 *%ptr
+  %add0 = add i64 %l0, %l0
+  %add1 = add i64 %l1, %add0
+  %add3 = add i64 %l3, %add1
+  %add4 = add i64 %l4, %add3
+  %add5 = add i64 %l5, %add4
+  %add14 = add i64 %l14, %add5
+  store volatile i64 %add0, i64 *%ptr
+  store volatile i64 %add1, i64 *%ptr
+  store volatile i64 %add3, i64 *%ptr
+  store volatile i64 %add4, i64 *%ptr
+  store volatile i64 %add5, i64 *%ptr
+  %final = getelementptr i64 *%ptr, i64 1
+  store volatile i64 %add14, i64 *%final
+  ret void
+}
+
+; This function should use all call-clobbered GPRs but no call-saved ones.
+; It shouldn't need to touch the stack at all.
+define void @f4(i64 *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: %r15
+; CHECK-NOT: %r6
+; CHECK-NOT: %r7
+; CHECK-NOT: %r8
+; CHECK-NOT: %r9
+; CHECK-NOT: %r10
+; CHECK-NOT: %r11
+; CHECK-NOT: %r12
+; CHECK-NOT: %r13
+; CHECK: br %r14
+  %l0 = load volatile i64 *%ptr
+  %l1 = load volatile i64 *%ptr
+  %l3 = load volatile i64 *%ptr
+  %l4 = load volatile i64 *%ptr
+  %l5 = load volatile i64 *%ptr
+  %add0 = add i64 %l0, %l0
+  %add1 = add i64 %l1, %add0
+  %add3 = add i64 %l3, %add1
+  %add4 = add i64 %l4, %add3
+  %add5 = add i64 %l5, %add4
+  store volatile i64 %add0, i64 *%ptr
+  store volatile i64 %add1, i64 *%ptr
+  store volatile i64 %add3, i64 *%ptr
+  store volatile i64 %add4, i64 *%ptr
+  %final = getelementptr i64 *%ptr, i64 1
+  store volatile i64 %add5, i64 *%final
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-07.ll b/test/CodeGen/SystemZ/frame-07.ll
new file mode 100644
index 0000000..cfe9f86
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-07.ll
@@ -0,0 +1,249 @@
+; Test the saving and restoring of FPRs in large frames.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+
+; Test a frame size that requires some FPRs to be saved and loaded using
+; the 20-bit STDY and LDY while others can use the 12-bit STD and LD.
+; The frame is big enough to require an emergency spill slot at 160(%r15),
+; as well as the 8 FPR save slots.  Get a frame of size 4128 by allocating
+; (4128 - 168 - 8 * 8) / 8 = 487 extra doublewords.
+define void @f1(double *%ptr, i64 %x) {
+; CHECK-NOFP: f1:
+; CHECK-NOFP: aghi %r15, -4128
+; CHECK-NOFP: .cfi_def_cfa_offset 4288
+; CHECK-NOFP: stdy %f8, 4120(%r15)
+; CHECK-NOFP: stdy %f9, 4112(%r15)
+; CHECK-NOFP: stdy %f10, 4104(%r15)
+; CHECK-NOFP: stdy %f11, 4096(%r15)
+; CHECK-NOFP: std %f12, 4088(%r15)
+; CHECK-NOFP: std %f13, 4080(%r15)
+; CHECK-NOFP: std %f14, 4072(%r15)
+; CHECK-NOFP: std %f15, 4064(%r15)
+; CHECK-NOFP: .cfi_offset %f8, -168
+; CHECK-NOFP: .cfi_offset %f9, -176
+; CHECK-NOFP: .cfi_offset %f10, -184
+; CHECK-NOFP: .cfi_offset %f11, -192
+; CHECK-NOFP: .cfi_offset %f12, -200
+; CHECK-NOFP: .cfi_offset %f13, -208
+; CHECK-NOFP: .cfi_offset %f14, -216
+; CHECK-NOFP: .cfi_offset %f15, -224
+; ...main function body...
+; CHECK-NOFP: ldy %f8, 4120(%r15)
+; CHECK-NOFP: ldy %f9, 4112(%r15)
+; CHECK-NOFP: ldy %f10, 4104(%r15)
+; CHECK-NOFP: ldy %f11, 4096(%r15)
+; CHECK-NOFP: ld %f12, 4088(%r15)
+; CHECK-NOFP: ld %f13, 4080(%r15)
+; CHECK-NOFP: ld %f14, 4072(%r15)
+; CHECK-NOFP: ld %f15, 4064(%r15)
+; CHECK-NOFP: aghi %r15, 4128
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f1:
+; CHECK-FP: stmg %r11, %r15, 88(%r15)
+; CHECK-FP: aghi %r15, -4128
+; CHECK-FP: .cfi_def_cfa_offset 4288
+; CHECK-FP: lgr %r11, %r15
+; CHECK-FP: .cfi_def_cfa_register %r11
+; CHECK-FP: stdy %f8, 4120(%r11)
+; CHECK-FP: stdy %f9, 4112(%r11)
+; CHECK-FP: stdy %f10, 4104(%r11)
+; CHECK-FP: stdy %f11, 4096(%r11)
+; CHECK-FP: std %f12, 4088(%r11)
+; CHECK-FP: std %f13, 4080(%r11)
+; CHECK-FP: std %f14, 4072(%r11)
+; CHECK-FP: std %f15, 4064(%r11)
+; ...main function body...
+; CHECK-FP: ldy %f8, 4120(%r11)
+; CHECK-FP: ldy %f9, 4112(%r11)
+; CHECK-FP: ldy %f10, 4104(%r11)
+; CHECK-FP: ldy %f11, 4096(%r11)
+; CHECK-FP: ld %f12, 4088(%r11)
+; CHECK-FP: ld %f13, 4080(%r11)
+; CHECK-FP: ld %f14, 4072(%r11)
+; CHECK-FP: ld %f15, 4064(%r11)
+; CHECK-FP: lmg %r11, %r15, 4216(%r11)
+; CHECK-FP: br %r14
+  %y = alloca [487 x i64], align 8
+  %elem = getelementptr inbounds [487 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %elem
+  %l0 = load volatile double *%ptr
+  %l1 = load volatile double *%ptr
+  %l2 = load volatile double *%ptr
+  %l3 = load volatile double *%ptr
+  %l4 = load volatile double *%ptr
+  %l5 = load volatile double *%ptr
+  %l6 = load volatile double *%ptr
+  %l7 = load volatile double *%ptr
+  %l8 = load volatile double *%ptr
+  %l9 = load volatile double *%ptr
+  %l10 = load volatile double *%ptr
+  %l11 = load volatile double *%ptr
+  %l12 = load volatile double *%ptr
+  %l13 = load volatile double *%ptr
+  %l14 = load volatile double *%ptr
+  %l15 = load volatile double *%ptr
+  %add0 = fadd double %l0, %l0
+  %add1 = fadd double %l1, %add0
+  %add2 = fadd double %l2, %add1
+  %add3 = fadd double %l3, %add2
+  %add4 = fadd double %l4, %add3
+  %add5 = fadd double %l5, %add4
+  %add6 = fadd double %l6, %add5
+  %add7 = fadd double %l7, %add6
+  %add8 = fadd double %l8, %add7
+  %add9 = fadd double %l9, %add8
+  %add10 = fadd double %l10, %add9
+  %add11 = fadd double %l11, %add10
+  %add12 = fadd double %l12, %add11
+  %add13 = fadd double %l13, %add12
+  %add14 = fadd double %l14, %add13
+  %add15 = fadd double %l15, %add14
+  store volatile double %add0, double *%ptr
+  store volatile double %add1, double *%ptr
+  store volatile double %add2, double *%ptr
+  store volatile double %add3, double *%ptr
+  store volatile double %add4, double *%ptr
+  store volatile double %add5, double *%ptr
+  store volatile double %add6, double *%ptr
+  store volatile double %add7, double *%ptr
+  store volatile double %add8, double *%ptr
+  store volatile double %add9, double *%ptr
+  store volatile double %add10, double *%ptr
+  store volatile double %add11, double *%ptr
+  store volatile double %add12, double *%ptr
+  store volatile double %add13, double *%ptr
+  store volatile double %add14, double *%ptr
+  store volatile double %add15, double *%ptr
+  ret void
+}
+
+; Test a frame size that requires some FPRs to be saved and loaded using
+; an indexed STD and LD while others can use the 20-bit STDY and LDY.
+; The index can be any call-clobbered GPR except %r0.
+;
+; Don't require the accesses to share the same LLILH; that would be a
+; good optimisation but is really a different test.
+;
+; As above, get a frame of size 524320 by allocating
+; (524320 - 168 - 8 * 8) / 8 = 65511 extra doublewords.
+define void @f2(double *%ptr, i64 %x) {
+; CHECK-NOFP: f2:
+; CHECK-NOFP: agfi %r15, -524320
+; CHECK-NOFP: .cfi_def_cfa_offset 524480
+; CHECK-NOFP: llilh [[INDEX:%r[1-5]]], 8
+; CHECK-NOFP: std %f8, 24([[INDEX]],%r15)
+; CHECK-NOFP: std %f9, 16({{%r[1-5]}},%r15)
+; CHECK-NOFP: std %f10, 8({{%r[1-5]}},%r15)
+; CHECK-NOFP: std %f11, 0({{%r[1-5]}},%r15)
+; CHECK-NOFP: stdy %f12, 524280(%r15)
+; CHECK-NOFP: stdy %f13, 524272(%r15)
+; CHECK-NOFP: stdy %f14, 524264(%r15)
+; CHECK-NOFP: stdy %f15, 524256(%r15)
+; CHECK-NOFP: .cfi_offset %f8, -168
+; CHECK-NOFP: .cfi_offset %f9, -176
+; CHECK-NOFP: .cfi_offset %f10, -184
+; CHECK-NOFP: .cfi_offset %f11, -192
+; CHECK-NOFP: .cfi_offset %f12, -200
+; CHECK-NOFP: .cfi_offset %f13, -208
+; CHECK-NOFP: .cfi_offset %f14, -216
+; CHECK-NOFP: .cfi_offset %f15, -224
+; ...main function body...
+; CHECK-NOFP: ld %f8, 24({{%r[1-5]}},%r15)
+; CHECK-NOFP: ld %f9, 16({{%r[1-5]}},%r15)
+; CHECK-NOFP: ld %f10, 8({{%r[1-5]}},%r15)
+; CHECK-NOFP: ld %f11, 0({{%r[1-5]}},%r15)
+; CHECK-NOFP: ldy %f12, 524280(%r15)
+; CHECK-NOFP: ldy %f13, 524272(%r15)
+; CHECK-NOFP: ldy %f14, 524264(%r15)
+; CHECK-NOFP: ldy %f15, 524256(%r15)
+; CHECK-NOFP: agfi %r15, 524320
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f2:
+; CHECK-FP: stmg %r11, %r15, 88(%r15)
+; CHECK-FP: agfi %r15, -524320
+; CHECK-FP: .cfi_def_cfa_offset 524480
+; CHECK-FP: llilh [[INDEX:%r[1-5]]], 8
+; CHECK-FP: std %f8, 24([[INDEX]],%r11)
+; CHECK-FP: std %f9, 16({{%r[1-5]}},%r11)
+; CHECK-FP: std %f10, 8({{%r[1-5]}},%r11)
+; CHECK-FP: std %f11, 0({{%r[1-5]}},%r11)
+; CHECK-FP: stdy %f12, 524280(%r11)
+; CHECK-FP: stdy %f13, 524272(%r11)
+; CHECK-FP: stdy %f14, 524264(%r11)
+; CHECK-FP: stdy %f15, 524256(%r11)
+; CHECK-FP: .cfi_offset %f8, -168
+; CHECK-FP: .cfi_offset %f9, -176
+; CHECK-FP: .cfi_offset %f10, -184
+; CHECK-FP: .cfi_offset %f11, -192
+; CHECK-FP: .cfi_offset %f12, -200
+; CHECK-FP: .cfi_offset %f13, -208
+; CHECK-FP: .cfi_offset %f14, -216
+; CHECK-FP: .cfi_offset %f15, -224
+; ...main function body...
+; CHECK-FP: ld %f8, 24({{%r[1-5]}},%r11)
+; CHECK-FP: ld %f9, 16({{%r[1-5]}},%r11)
+; CHECK-FP: ld %f10, 8({{%r[1-5]}},%r11)
+; CHECK-FP: ld %f11, 0({{%r[1-5]}},%r11)
+; CHECK-FP: ldy %f12, 524280(%r11)
+; CHECK-FP: ldy %f13, 524272(%r11)
+; CHECK-FP: ldy %f14, 524264(%r11)
+; CHECK-FP: ldy %f15, 524256(%r11)
+; CHECK-FP: aghi %r11, 128
+; CHECK-FP: lmg %r11, %r15, 524280(%r11)
+; CHECK-FP: br %r14
+  %y = alloca [65511 x i64], align 8
+  %elem = getelementptr inbounds [65511 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %elem
+  %l0 = load volatile double *%ptr
+  %l1 = load volatile double *%ptr
+  %l2 = load volatile double *%ptr
+  %l3 = load volatile double *%ptr
+  %l4 = load volatile double *%ptr
+  %l5 = load volatile double *%ptr
+  %l6 = load volatile double *%ptr
+  %l7 = load volatile double *%ptr
+  %l8 = load volatile double *%ptr
+  %l9 = load volatile double *%ptr
+  %l10 = load volatile double *%ptr
+  %l11 = load volatile double *%ptr
+  %l12 = load volatile double *%ptr
+  %l13 = load volatile double *%ptr
+  %l14 = load volatile double *%ptr
+  %l15 = load volatile double *%ptr
+  %add0 = fadd double %l0, %l0
+  %add1 = fadd double %l1, %add0
+  %add2 = fadd double %l2, %add1
+  %add3 = fadd double %l3, %add2
+  %add4 = fadd double %l4, %add3
+  %add5 = fadd double %l5, %add4
+  %add6 = fadd double %l6, %add5
+  %add7 = fadd double %l7, %add6
+  %add8 = fadd double %l8, %add7
+  %add9 = fadd double %l9, %add8
+  %add10 = fadd double %l10, %add9
+  %add11 = fadd double %l11, %add10
+  %add12 = fadd double %l12, %add11
+  %add13 = fadd double %l13, %add12
+  %add14 = fadd double %l14, %add13
+  %add15 = fadd double %l15, %add14
+  store volatile double %add0, double *%ptr
+  store volatile double %add1, double *%ptr
+  store volatile double %add2, double *%ptr
+  store volatile double %add3, double *%ptr
+  store volatile double %add4, double *%ptr
+  store volatile double %add5, double *%ptr
+  store volatile double %add6, double *%ptr
+  store volatile double %add7, double *%ptr
+  store volatile double %add8, double *%ptr
+  store volatile double %add9, double *%ptr
+  store volatile double %add10, double *%ptr
+  store volatile double %add11, double *%ptr
+  store volatile double %add12, double *%ptr
+  store volatile double %add13, double *%ptr
+  store volatile double %add14, double *%ptr
+  store volatile double %add15, double *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-08.ll b/test/CodeGen/SystemZ/frame-08.ll
new file mode 100644
index 0000000..6cf6378
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-08.ll
@@ -0,0 +1,277 @@
+; Test the saving and restoring of GPRs in large frames.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; This is the largest frame size that can use a plain LMG for %r6 and above.
+; It is big enough to require an emergency spill slot at 160(%r15),
+; so get a frame of size 524232 by allocating (524232 - 168) / 8 = 65508
+; extra doublewords.
+define void @f1(i32 *%ptr, i64 %x) {
+; CHECK: f1:
+; CHECK: stmg %r6, %r15, 48(%r15)
+; CHECK: .cfi_offset %r6, -112
+; CHECK: .cfi_offset %r7, -104
+; CHECK: .cfi_offset %r8, -96
+; CHECK: .cfi_offset %r9, -88
+; CHECK: .cfi_offset %r10, -80
+; CHECK: .cfi_offset %r11, -72
+; CHECK: .cfi_offset %r12, -64
+; CHECK: .cfi_offset %r13, -56
+; CHECK: .cfi_offset %r14, -48
+; CHECK: .cfi_offset %r15, -40
+; CHECK: agfi %r15, -524232
+; CHECK: .cfi_def_cfa_offset 524392
+; ...main function body...
+; CHECK-NOT: ag
+; CHECK: lmg %r6, %r15, 524280(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i32 *%ptr
+  %l1 = load volatile i32 *%ptr
+  %l4 = load volatile i32 *%ptr
+  %l5 = load volatile i32 *%ptr
+  %l6 = load volatile i32 *%ptr
+  %l7 = load volatile i32 *%ptr
+  %l8 = load volatile i32 *%ptr
+  %l9 = load volatile i32 *%ptr
+  %l10 = load volatile i32 *%ptr
+  %l11 = load volatile i32 *%ptr
+  %l12 = load volatile i32 *%ptr
+  %l13 = load volatile i32 *%ptr
+  %l14 = load volatile i32 *%ptr
+  %add0 = add i32 %l0, %l0
+  %add1 = add i32 %l1, %add0
+  %add4 = add i32 %l4, %add1
+  %add5 = add i32 %l5, %add4
+  %add6 = add i32 %l6, %add5
+  %add7 = add i32 %l7, %add6
+  %add8 = add i32 %l8, %add7
+  %add9 = add i32 %l9, %add8
+  %add10 = add i32 %l10, %add9
+  %add11 = add i32 %l11, %add10
+  %add12 = add i32 %l12, %add11
+  %add13 = add i32 %l13, %add12
+  %add14 = add i32 %l14, %add13
+  store volatile i32 %add0, i32 *%ptr
+  store volatile i32 %add1, i32 *%ptr
+  store volatile i32 %add4, i32 *%ptr
+  store volatile i32 %add5, i32 *%ptr
+  store volatile i32 %add6, i32 *%ptr
+  store volatile i32 %add7, i32 *%ptr
+  store volatile i32 %add8, i32 *%ptr
+  store volatile i32 %add9, i32 *%ptr
+  store volatile i32 %add10, i32 *%ptr
+  store volatile i32 %add11, i32 *%ptr
+  store volatile i32 %add12, i32 *%ptr
+  store volatile i32 %add13, i32 *%ptr
+  store volatile i32 %add14, i32 *%ptr
+  %y = alloca [65508 x i64], align 8
+  %entry = getelementptr inbounds [65508 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %entry
+  ret void
+}
+
+; This is the largest frame size that can use a plain LMG for %r14 and above
+; It is big enough to require an emergency spill slot at 160(%r15),
+; so get a frame of size 524168 by allocating (524168 - 168) / 8 = 65500
+; extra doublewords.
+define void @f2(i32 *%ptr, i64 %x) {
+; CHECK: f2:
+; CHECK: stmg %r14, %r15, 112(%r15)
+; CHECK: .cfi_offset %r14, -48
+; CHECK: .cfi_offset %r15, -40
+; CHECK: agfi %r15, -524168
+; CHECK: .cfi_def_cfa_offset 524328
+; ...main function body...
+; CHECK-NOT: ag
+; CHECK: lmg %r14, %r15, 524280(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i32 *%ptr
+  %l1 = load volatile i32 *%ptr
+  %l4 = load volatile i32 *%ptr
+  %l5 = load volatile i32 *%ptr
+  %l14 = load volatile i32 *%ptr
+  %add0 = add i32 %l0, %l0
+  %add1 = add i32 %l1, %add0
+  %add4 = add i32 %l4, %add1
+  %add5 = add i32 %l5, %add4
+  %add14 = add i32 %l14, %add5
+  store volatile i32 %add0, i32 *%ptr
+  store volatile i32 %add1, i32 *%ptr
+  store volatile i32 %add4, i32 *%ptr
+  store volatile i32 %add5, i32 *%ptr
+  store volatile i32 %add14, i32 *%ptr
+  %y = alloca [65500 x i64], align 8
+  %entry = getelementptr inbounds [65500 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %entry
+  ret void
+}
+
+; Like f1 but with a frame that is 8 bytes bigger.  This is the smallest
+; frame size that needs two instructions to perform the final LMG for
+; %r6 and above.
+define void @f3(i32 *%ptr, i64 %x) {
+; CHECK: f3:
+; CHECK: stmg %r6, %r15, 48(%r15)
+; CHECK: .cfi_offset %r6, -112
+; CHECK: .cfi_offset %r7, -104
+; CHECK: .cfi_offset %r8, -96
+; CHECK: .cfi_offset %r9, -88
+; CHECK: .cfi_offset %r10, -80
+; CHECK: .cfi_offset %r11, -72
+; CHECK: .cfi_offset %r12, -64
+; CHECK: .cfi_offset %r13, -56
+; CHECK: .cfi_offset %r14, -48
+; CHECK: .cfi_offset %r15, -40
+; CHECK: agfi %r15, -524240
+; CHECK: .cfi_def_cfa_offset 524400
+; ...main function body...
+; CHECK: aghi %r15, 8
+; CHECK: lmg %r6, %r15, 524280(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i32 *%ptr
+  %l1 = load volatile i32 *%ptr
+  %l4 = load volatile i32 *%ptr
+  %l5 = load volatile i32 *%ptr
+  %l6 = load volatile i32 *%ptr
+  %l7 = load volatile i32 *%ptr
+  %l8 = load volatile i32 *%ptr
+  %l9 = load volatile i32 *%ptr
+  %l10 = load volatile i32 *%ptr
+  %l11 = load volatile i32 *%ptr
+  %l12 = load volatile i32 *%ptr
+  %l13 = load volatile i32 *%ptr
+  %l14 = load volatile i32 *%ptr
+  %add0 = add i32 %l0, %l0
+  %add1 = add i32 %l1, %add0
+  %add4 = add i32 %l4, %add1
+  %add5 = add i32 %l5, %add4
+  %add6 = add i32 %l6, %add5
+  %add7 = add i32 %l7, %add6
+  %add8 = add i32 %l8, %add7
+  %add9 = add i32 %l9, %add8
+  %add10 = add i32 %l10, %add9
+  %add11 = add i32 %l11, %add10
+  %add12 = add i32 %l12, %add11
+  %add13 = add i32 %l13, %add12
+  %add14 = add i32 %l14, %add13
+  store volatile i32 %add0, i32 *%ptr
+  store volatile i32 %add1, i32 *%ptr
+  store volatile i32 %add4, i32 *%ptr
+  store volatile i32 %add5, i32 *%ptr
+  store volatile i32 %add6, i32 *%ptr
+  store volatile i32 %add7, i32 *%ptr
+  store volatile i32 %add8, i32 *%ptr
+  store volatile i32 %add9, i32 *%ptr
+  store volatile i32 %add10, i32 *%ptr
+  store volatile i32 %add11, i32 *%ptr
+  store volatile i32 %add12, i32 *%ptr
+  store volatile i32 %add13, i32 *%ptr
+  store volatile i32 %add14, i32 *%ptr
+  %y = alloca [65509 x i64], align 8
+  %entry = getelementptr inbounds [65509 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %entry
+  ret void
+}
+
+; Like f2 but with a frame that is 8 bytes bigger.  This is the smallest
+; frame size that needs two instructions to perform the final LMG for
+; %r14 and %r15.
+define void @f4(i32 *%ptr, i64 %x) {
+; CHECK: f4:
+; CHECK: stmg %r14, %r15, 112(%r15)
+; CHECK: .cfi_offset %r14, -48
+; CHECK: .cfi_offset %r15, -40
+; CHECK: agfi %r15, -524176
+; CHECK: .cfi_def_cfa_offset 524336
+; ...main function body...
+; CHECK: aghi %r15, 8
+; CHECK: lmg %r14, %r15, 524280(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i32 *%ptr
+  %l1 = load volatile i32 *%ptr
+  %l4 = load volatile i32 *%ptr
+  %l5 = load volatile i32 *%ptr
+  %l14 = load volatile i32 *%ptr
+  %add0 = add i32 %l0, %l0
+  %add1 = add i32 %l1, %add0
+  %add4 = add i32 %l4, %add1
+  %add5 = add i32 %l5, %add4
+  %add14 = add i32 %l14, %add5
+  store volatile i32 %add0, i32 *%ptr
+  store volatile i32 %add1, i32 *%ptr
+  store volatile i32 %add4, i32 *%ptr
+  store volatile i32 %add5, i32 *%ptr
+  store volatile i32 %add14, i32 *%ptr
+  %y = alloca [65501 x i64], align 8
+  %entry = getelementptr inbounds [65501 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %entry
+  ret void
+}
+
+; This is the largest frame size for which the prepatory increment for
+; "lmg %r14, %r15, ..." can be done using AGHI.
+define void @f5(i32 *%ptr, i64 %x) {
+; CHECK: f5:
+; CHECK: stmg %r14, %r15, 112(%r15)
+; CHECK: .cfi_offset %r14, -48
+; CHECK: .cfi_offset %r15, -40
+; CHECK: agfi %r15, -556928
+; CHECK: .cfi_def_cfa_offset 557088
+; ...main function body...
+; CHECK: aghi %r15, 32760
+; CHECK: lmg %r14, %r15, 524280(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i32 *%ptr
+  %l1 = load volatile i32 *%ptr
+  %l4 = load volatile i32 *%ptr
+  %l5 = load volatile i32 *%ptr
+  %l14 = load volatile i32 *%ptr
+  %add0 = add i32 %l0, %l0
+  %add1 = add i32 %l1, %add0
+  %add4 = add i32 %l4, %add1
+  %add5 = add i32 %l5, %add4
+  %add14 = add i32 %l14, %add5
+  store volatile i32 %add0, i32 *%ptr
+  store volatile i32 %add1, i32 *%ptr
+  store volatile i32 %add4, i32 *%ptr
+  store volatile i32 %add5, i32 *%ptr
+  store volatile i32 %add14, i32 *%ptr
+  %y = alloca [69595 x i64], align 8
+  %entry = getelementptr inbounds [69595 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %entry
+  ret void
+}
+
+; This is the smallest frame size for which the prepatory increment for
+; "lmg %r14, %r15, ..." needs to be done using AGFI.
+define void @f6(i32 *%ptr, i64 %x) {
+; CHECK: f6:
+; CHECK: stmg %r14, %r15, 112(%r15)
+; CHECK: .cfi_offset %r14, -48
+; CHECK: .cfi_offset %r15, -40
+; CHECK: agfi %r15, -556936
+; CHECK: .cfi_def_cfa_offset 557096
+; ...main function body...
+; CHECK: agfi %r15, 32768
+; CHECK: lmg %r14, %r15, 524280(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i32 *%ptr
+  %l1 = load volatile i32 *%ptr
+  %l4 = load volatile i32 *%ptr
+  %l5 = load volatile i32 *%ptr
+  %l14 = load volatile i32 *%ptr
+  %add0 = add i32 %l0, %l0
+  %add1 = add i32 %l1, %add0
+  %add4 = add i32 %l4, %add1
+  %add5 = add i32 %l5, %add4
+  %add14 = add i32 %l14, %add5
+  store volatile i32 %add0, i32 *%ptr
+  store volatile i32 %add1, i32 *%ptr
+  store volatile i32 %add4, i32 *%ptr
+  store volatile i32 %add5, i32 *%ptr
+  store volatile i32 %add14, i32 *%ptr
+  %y = alloca [69596 x i64], align 8
+  %entry = getelementptr inbounds [69596 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %entry
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-09.ll b/test/CodeGen/SystemZ/frame-09.ll
new file mode 100644
index 0000000..eac6336
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-09.ll
@@ -0,0 +1,153 @@
+; Test the handling of the frame pointer (%r11).
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck %s
+
+; We should always initialise %r11 when FP elimination is disabled.
+; We don't need to allocate any more than the caller-provided 160-byte
+; area though.
+define i32 @f1(i32 %x) {
+; CHECK: f1:
+; CHECK: stmg %r11, %r15, 88(%r15)
+; CHECK: .cfi_offset %r11, -72
+; CHECK: .cfi_offset %r15, -40
+; CHECK-NOT: ag
+; CHECK: lgr %r11, %r15
+; CHECK: .cfi_def_cfa_register %r11
+; CHECK: lmg %r11, %r15, 88(%r11)
+; CHECK: br %r14
+  %y = add i32 %x, 1
+  ret i32 %y
+}
+
+; Make sure that frame accesses after the initial allocation are relative
+; to %r11 rather than %r15.
+define void @f2(i64 %x) {
+; CHECK: f2:
+; CHECK: stmg %r11, %r15, 88(%r15)
+; CHECK: .cfi_offset %r11, -72
+; CHECK: .cfi_offset %r15, -40
+; CHECK: aghi %r15, -168
+; CHECK: .cfi_def_cfa_offset 328
+; CHECK: lgr %r11, %r15
+; CHECK: .cfi_def_cfa_register %r11
+; CHECK: stg %r2, 160(%r11)
+; CHECK: lmg %r11, %r15, 256(%r11)
+; CHECK: br %r14
+  %y = alloca i64, align 8
+  store volatile i64 %x, i64* %y
+  ret void
+}
+
+; This function should require all GPRs but no other spill slots.
+; It shouldn't need to allocate its own frame.
+define void @f3(i32 *%ptr) {
+; CHECK: f3:
+; CHECK: stmg %r6, %r15, 48(%r15)
+; CHECK-NOT: %r15
+; CHECK-NOT: %r11
+; CHECK: .cfi_offset %r6, -112
+; CHECK: .cfi_offset %r7, -104
+; CHECK: .cfi_offset %r8, -96
+; CHECK: .cfi_offset %r9, -88
+; CHECK: .cfi_offset %r10, -80
+; CHECK: .cfi_offset %r11, -72
+; CHECK: .cfi_offset %r12, -64
+; CHECK: .cfi_offset %r13, -56
+; CHECK: .cfi_offset %r14, -48
+; CHECK: .cfi_offset %r15, -40
+; CHECK-NOT: ag
+; CHECK: lgr %r11, %r15
+; CHECK: .cfi_def_cfa_register %r11
+; ...main function body...
+; CHECK-NOT: %r15
+; CHECK-NOT: %r11
+; CHECK: st {{.*}}, 4(%r2)
+; CHECK: lmg %r6, %r15, 48(%r11)
+; CHECK: br %r14
+  %l0 = load volatile i32 *%ptr
+  %l1 = load volatile i32 *%ptr
+  %l3 = load volatile i32 *%ptr
+  %l4 = load volatile i32 *%ptr
+  %l5 = load volatile i32 *%ptr
+  %l6 = load volatile i32 *%ptr
+  %l7 = load volatile i32 *%ptr
+  %l8 = load volatile i32 *%ptr
+  %l9 = load volatile i32 *%ptr
+  %l10 = load volatile i32 *%ptr
+  %l12 = load volatile i32 *%ptr
+  %l13 = load volatile i32 *%ptr
+  %l14 = load volatile i32 *%ptr
+  %add0 = add i32 %l0, %l0
+  %add1 = add i32 %l1, %add0
+  %add3 = add i32 %l3, %add1
+  %add4 = add i32 %l4, %add3
+  %add5 = add i32 %l5, %add4
+  %add6 = add i32 %l6, %add5
+  %add7 = add i32 %l7, %add6
+  %add8 = add i32 %l8, %add7
+  %add9 = add i32 %l9, %add8
+  %add10 = add i32 %l10, %add9
+  %add12 = add i32 %l12, %add10
+  %add13 = add i32 %l13, %add12
+  %add14 = add i32 %l14, %add13
+  store volatile i32 %add0, i32 *%ptr
+  store volatile i32 %add1, i32 *%ptr
+  store volatile i32 %add3, i32 *%ptr
+  store volatile i32 %add4, i32 *%ptr
+  store volatile i32 %add5, i32 *%ptr
+  store volatile i32 %add6, i32 *%ptr
+  store volatile i32 %add7, i32 *%ptr
+  store volatile i32 %add8, i32 *%ptr
+  store volatile i32 %add9, i32 *%ptr
+  store volatile i32 %add10, i32 *%ptr
+  store volatile i32 %add12, i32 *%ptr
+  store volatile i32 %add13, i32 *%ptr
+  %final = getelementptr i32 *%ptr, i32 1
+  store volatile i32 %add14, i32 *%final
+  ret void
+}
+
+; The largest frame for which the LMG is in range.  This frame has an
+; emergency spill slot at 160(%r11), so create a frame of size 524192
+; by allocating (524192 - 168) / 8 = 65503 doublewords.
+define void @f4(i64 %x) {
+; CHECK: f4:
+; CHECK: stmg %r11, %r15, 88(%r15)
+; CHECK: .cfi_offset %r11, -72
+; CHECK: .cfi_offset %r15, -40
+; CHECK: agfi %r15, -524192
+; CHECK: .cfi_def_cfa_offset 524352
+; CHECK: lgr %r11, %r15
+; CHECK: .cfi_def_cfa_register %r11
+; CHECK: stg %r2, 168(%r11)
+; CHECK-NOT: ag
+; CHECK: lmg %r11, %r15, 524280(%r11)
+; CHECK: br %r14
+  %y = alloca [65503 x i64], align 8
+  %ptr = getelementptr inbounds [65503 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %ptr
+  ret void
+}
+
+; The next frame size larger than f4.
+define void @f5(i64 %x) {
+; CHECK: f5:
+; CHECK: stmg %r11, %r15, 88(%r15)
+; CHECK: .cfi_offset %r11, -72
+; CHECK: .cfi_offset %r15, -40
+; CHECK: agfi %r15, -524200
+; CHECK: .cfi_def_cfa_offset 524360
+; CHECK: lgr %r11, %r15
+; CHECK: .cfi_def_cfa_register %r11
+; CHECK: stg %r2, 168(%r11)
+; CHECK: aghi %r11, 8
+; CHECK: lmg %r11, %r15, 524280(%r11)
+; CHECK: br %r14
+  %y = alloca [65504 x i64], align 8
+  %ptr = getelementptr inbounds [65504 x i64]* %y, i64 0, i64 0
+  store volatile i64 %x, i64* %ptr
+  ret void
+}
+
+; The tests above establish that %r11 is handled like %r15 for LMG.
+; Rely on the %r15-based tests in frame-08.ll for other cases.
diff --git a/test/CodeGen/SystemZ/frame-10.ll b/test/CodeGen/SystemZ/frame-10.ll
new file mode 100644
index 0000000..399a412
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-10.ll
@@ -0,0 +1,14 @@
+; Test the stacksave builtin.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i8 *@llvm.stacksave()
+
+define void @f1(i8 **%dest) {
+; CHECK: f1:
+; CHECK: stg %r15, 0(%r2)
+; CHECK: br %r14
+  %addr = call i8 *@llvm.stacksave()
+  store volatile i8 *%addr, i8 **%dest
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-11.ll b/test/CodeGen/SystemZ/frame-11.ll
new file mode 100644
index 0000000..8422205
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-11.ll
@@ -0,0 +1,18 @@
+; Test the stackrestore builtin.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @llvm.stackrestore(i8 *)
+
+; we should use a frame pointer and tear down the frame based on %r11
+; rather than %r15.
+define void @f1(i8 *%src) {
+; CHECK: f1:
+; CHECK: stmg %r11, %r15, 88(%r15)
+; CHECK: lgr %r11, %r15
+; CHECK: lgr %r15, %r2
+; CHECK: lmg %r11, %r15, 88(%r11)
+; CHECK: br %r14
+  call void @llvm.stackrestore(i8 *%src)
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-13.ll b/test/CodeGen/SystemZ/frame-13.ll
new file mode 100644
index 0000000..fa6b845
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-13.ll
@@ -0,0 +1,299 @@
+; Test the handling of base + 12-bit displacement addresses for large frames,
+; in cases where no 20-bit form exists.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+
+; This file tests what happens when a displacement is converted from
+; being relative to the start of a frame object to being relative to
+; the frame itself.  In some cases the test is only possible if two
+; objects are allocated.
+;
+; Rather than rely on a particular order for those objects, the tests
+; instead allocate two objects of the same size and apply the test to
+; both of them.  For consistency, all tests follow this model, even if
+; one object would actually be enough.
+
+; First check the highest in-range offset after conversion, which is 4092
+; for word-addressing instructions like MVHI.
+;
+; The last in-range doubleword offset is 4088.  Since the frame has an
+; emergency spill slot at 160(%r15), the amount that we need to allocate
+; in order to put another object at offset 4088 is (4088 - 168) / 4 = 980
+; words.
+define void @f1() {
+; CHECK-NOFP: f1:
+; CHECK-NOFP: mvhi 4092(%r15), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f1:
+; CHECK-FP: mvhi 4092(%r11), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [980 x i32], align 8
+  %region2 = alloca [980 x i32], align 8
+  %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 1
+  %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 1
+  store volatile i32 42, i32 *%ptr1
+  store volatile i32 42, i32 *%ptr2
+  ret void
+}
+
+; Test the first out-of-range offset.  We cannot use an index register here.
+define void @f2() {
+; CHECK-NOFP: f2:
+; CHECK-NOFP: lay %r1, 4096(%r15)
+; CHECK-NOFP: mvhi 0(%r1), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f2:
+; CHECK-FP: lay %r1, 4096(%r11)
+; CHECK-FP: mvhi 0(%r1), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [980 x i32], align 8
+  %region2 = alloca [980 x i32], align 8
+  %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2
+  store volatile i32 42, i32 *%ptr1
+  store volatile i32 42, i32 *%ptr2
+  ret void
+}
+
+; Test the next offset after that.
+define void @f3() {
+; CHECK-NOFP: f3:
+; CHECK-NOFP: lay %r1, 4096(%r15)
+; CHECK-NOFP: mvhi 4(%r1), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f3:
+; CHECK-FP: lay %r1, 4096(%r11)
+; CHECK-FP: mvhi 4(%r1), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [980 x i32], align 8
+  %region2 = alloca [980 x i32], align 8
+  %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 3
+  %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 3
+  store volatile i32 42, i32 *%ptr1
+  store volatile i32 42, i32 *%ptr2
+  ret void
+}
+
+; Add 4096 bytes (1024 words) to the size of each object and repeat.
+define void @f4() {
+; CHECK-NOFP: f4:
+; CHECK-NOFP: lay %r1, 4096(%r15)
+; CHECK-NOFP: mvhi 4092(%r1), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f4:
+; CHECK-FP: lay %r1, 4096(%r11)
+; CHECK-FP: mvhi 4092(%r1), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [2004 x i32], align 8
+  %region2 = alloca [2004 x i32], align 8
+  %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 1
+  %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 1
+  store volatile i32 42, i32 *%ptr1
+  store volatile i32 42, i32 *%ptr2
+  ret void
+}
+
+; ...as above.
+define void @f5() {
+; CHECK-NOFP: f5:
+; CHECK-NOFP: lay %r1, 8192(%r15)
+; CHECK-NOFP: mvhi 0(%r1), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f5:
+; CHECK-FP: lay %r1, 8192(%r11)
+; CHECK-FP: mvhi 0(%r1), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [2004 x i32], align 8
+  %region2 = alloca [2004 x i32], align 8
+  %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 2
+  store volatile i32 42, i32 *%ptr1
+  store volatile i32 42, i32 *%ptr2
+  ret void
+}
+
+; ...as above.
+define void @f6() {
+; CHECK-NOFP: f6:
+; CHECK-NOFP: lay %r1, 8192(%r15)
+; CHECK-NOFP: mvhi 4(%r1), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f6:
+; CHECK-FP: lay %r1, 8192(%r11)
+; CHECK-FP: mvhi 4(%r1), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [2004 x i32], align 8
+  %region2 = alloca [2004 x i32], align 8
+  %ptr1 = getelementptr inbounds [2004 x i32]* %region1, i64 0, i64 3
+  %ptr2 = getelementptr inbounds [2004 x i32]* %region2, i64 0, i64 3
+  store volatile i32 42, i32 *%ptr1
+  store volatile i32 42, i32 *%ptr2
+  ret void
+}
+
+; Now try an offset of 4092 from the start of the object, with the object
+; being at offset 8192.  This time we need objects of (8192 - 168) / 4 = 2006
+; words.
+define void @f7() {
+; CHECK-NOFP: f7:
+; CHECK-NOFP: lay %r1, 8192(%r15)
+; CHECK-NOFP: mvhi 4092(%r1), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f7:
+; CHECK-FP: lay %r1, 8192(%r11)
+; CHECK-FP: mvhi 4092(%r1), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [2006 x i32], align 8
+  %region2 = alloca [2006 x i32], align 8
+  %ptr1 = getelementptr inbounds [2006 x i32]* %region1, i64 0, i64 1023
+  %ptr2 = getelementptr inbounds [2006 x i32]* %region2, i64 0, i64 1023
+  store volatile i32 42, i32 *%ptr1
+  store volatile i32 42, i32 *%ptr2
+  ret void
+}
+
+; Keep the object-relative offset the same but bump the size of the
+; objects by one doubleword.
+define void @f8() {
+; CHECK-NOFP: f8:
+; CHECK-NOFP: lay %r1, 12288(%r15)
+; CHECK-NOFP: mvhi 4(%r1), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f8:
+; CHECK-FP: lay %r1, 12288(%r11)
+; CHECK-FP: mvhi 4(%r1), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [2008 x i32], align 8
+  %region2 = alloca [2008 x i32], align 8
+  %ptr1 = getelementptr inbounds [2008 x i32]* %region1, i64 0, i64 1023
+  %ptr2 = getelementptr inbounds [2008 x i32]* %region2, i64 0, i64 1023
+  store volatile i32 42, i32 *%ptr1
+  store volatile i32 42, i32 *%ptr2
+  ret void
+}
+
+; Check a case where the original displacement is out of range.  The backend
+; should force an LAY from the outset.  We don't yet do any kind of anchor
+; optimization, so there should be no offset on the MVHI itself.
+define void @f9() {
+; CHECK-NOFP: f9:
+; CHECK-NOFP: lay %r1, 12296(%r15)
+; CHECK-NOFP: mvhi 0(%r1), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f9:
+; CHECK-FP: lay %r1, 12296(%r11)
+; CHECK-FP: mvhi 0(%r1), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [2008 x i32], align 8
+  %region2 = alloca [2008 x i32], align 8
+  %ptr1 = getelementptr inbounds [2008 x i32]* %region1, i64 0, i64 1024
+  %ptr2 = getelementptr inbounds [2008 x i32]* %region2, i64 0, i64 1024
+  store volatile i32 42, i32 *%ptr1
+  store volatile i32 42, i32 *%ptr2
+  ret void
+}
+
+; Repeat f2 in a case that needs the emergency spill slot (because all
+; call-clobbered registers are live and no call-saved ones have been
+; allocated).
+define void @f10(i32 *%vptr) {
+; CHECK-NOFP: f10:
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP: lay [[REGISTER]], 4096(%r15)
+; CHECK-NOFP: mvhi 0([[REGISTER]]), 42
+; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f10:
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP: lay [[REGISTER]], 4096(%r11)
+; CHECK-FP: mvhi 0([[REGISTER]]), 42
+; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: br %r14
+  %i0 = load volatile i32 *%vptr
+  %i1 = load volatile i32 *%vptr
+  %i3 = load volatile i32 *%vptr
+  %i4 = load volatile i32 *%vptr
+  %i5 = load volatile i32 *%vptr
+  %region1 = alloca [980 x i32], align 8
+  %region2 = alloca [980 x i32], align 8
+  %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2
+  store volatile i32 42, i32 *%ptr1
+  store volatile i32 42, i32 *%ptr2
+  store volatile i32 %i0, i32 *%vptr
+  store volatile i32 %i1, i32 *%vptr
+  store volatile i32 %i3, i32 *%vptr
+  store volatile i32 %i4, i32 *%vptr
+  store volatile i32 %i5, i32 *%vptr
+  ret void
+}
+
+; And again with maximum register pressure.  The only spill slot that the
+; NOFP case needs is the emergency one, so the offsets are the same as for f2.
+; However, the FP case uses %r11 as the frame pointer and must therefore
+; spill a second register.  This leads to an extra displacement of 8.
+define void @f11(i32 *%vptr) {
+; CHECK-NOFP: f11:
+; CHECK-NOFP: stmg %r6, %r15,
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP: lay [[REGISTER]], 4096(%r15)
+; CHECK-NOFP: mvhi 0([[REGISTER]]), 42
+; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: lmg %r6, %r15,
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f11:
+; CHECK-FP: stmg %r6, %r15,
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP: lay [[REGISTER]], 4096(%r11)
+; CHECK-FP: mvhi 8([[REGISTER]]), 42
+; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: lmg %r6, %r15,
+; CHECK-FP: br %r14
+  %i0 = load volatile i32 *%vptr
+  %i1 = load volatile i32 *%vptr
+  %i3 = load volatile i32 *%vptr
+  %i4 = load volatile i32 *%vptr
+  %i5 = load volatile i32 *%vptr
+  %i6 = load volatile i32 *%vptr
+  %i7 = load volatile i32 *%vptr
+  %i8 = load volatile i32 *%vptr
+  %i9 = load volatile i32 *%vptr
+  %i10 = load volatile i32 *%vptr
+  %i11 = load volatile i32 *%vptr
+  %i12 = load volatile i32 *%vptr
+  %i13 = load volatile i32 *%vptr
+  %i14 = load volatile i32 *%vptr
+  %region1 = alloca [980 x i32], align 8
+  %region2 = alloca [980 x i32], align 8
+  %ptr1 = getelementptr inbounds [980 x i32]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [980 x i32]* %region2, i64 0, i64 2
+  store volatile i32 42, i32 *%ptr1
+  store volatile i32 42, i32 *%ptr2
+  store volatile i32 %i0, i32 *%vptr
+  store volatile i32 %i1, i32 *%vptr
+  store volatile i32 %i3, i32 *%vptr
+  store volatile i32 %i4, i32 *%vptr
+  store volatile i32 %i5, i32 *%vptr
+  store volatile i32 %i6, i32 *%vptr
+  store volatile i32 %i7, i32 *%vptr
+  store volatile i32 %i8, i32 *%vptr
+  store volatile i32 %i9, i32 *%vptr
+  store volatile i32 %i10, i32 *%vptr
+  store volatile i32 %i11, i32 *%vptr
+  store volatile i32 %i12, i32 *%vptr
+  store volatile i32 %i13, i32 *%vptr
+  store volatile i32 %i14, i32 *%vptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-14.ll b/test/CodeGen/SystemZ/frame-14.ll
new file mode 100644
index 0000000..d8ff0a5
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-14.ll
@@ -0,0 +1,322 @@
+; Test the handling of base + displacement addresses for large frames,
+; in cases where both 12-bit and 20-bit displacements are allowed.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+
+; This file tests what happens when a displacement is converted from
+; being relative to the start of a frame object to being relative to
+; the frame itself.  In some cases the test is only possible if two
+; objects are allocated.
+;
+; Rather than rely on a particular order for those objects, the tests
+; instead allocate two objects of the same size and apply the test to
+; both of them.  For consistency, all tests follow this model, even if
+; one object would actually be enough.
+
+; First check the highest offset that is in range of the 12-bit form.
+;
+; The last in-range doubleword offset is 4088.  Since the frame has an
+; emergency spill slot at 160(%r15), the amount that we need to allocate
+; in order to put another object at offset 4088 is 4088 - 168 = 3920 bytes.
+define void @f1() {
+; CHECK-NOFP: f1:
+; CHECK-NOFP: mvi 4095(%r15), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f1:
+; CHECK-FP: mvi 4095(%r11), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [3920 x i8], align 8
+  %region2 = alloca [3920 x i8], align 8
+  %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 7
+  %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 7
+  store volatile i8 42, i8 *%ptr1
+  store volatile i8 42, i8 *%ptr2
+  ret void
+}
+
+; Test the first offset that is out-of-range of the 12-bit form.
+define void @f2() {
+; CHECK-NOFP: f2:
+; CHECK-NOFP: mviy 4096(%r15), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f2:
+; CHECK-FP: mviy 4096(%r11), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [3920 x i8], align 8
+  %region2 = alloca [3920 x i8], align 8
+  %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 8
+  store volatile i8 42, i8 *%ptr1
+  store volatile i8 42, i8 *%ptr2
+  ret void
+}
+
+; Test the last offset that is in range of the 20-bit form.
+;
+; The last in-range doubleword offset is 524280, so by the same reasoning
+; as above, we need to allocate objects of 524280 - 168 = 524122 bytes.
+define void @f3() {
+; CHECK-NOFP: f3:
+; CHECK-NOFP: mviy 524287(%r15), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f3:
+; CHECK-FP: mviy 524287(%r11), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [524112 x i8], align 8
+  %region2 = alloca [524112 x i8], align 8
+  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 7
+  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 7
+  store volatile i8 42, i8 *%ptr1
+  store volatile i8 42, i8 *%ptr2
+  ret void
+}
+
+; Test the first out-of-range offset.  We can't use an index register here,
+; and the offset is also out of LAY's range, so expect a constant load
+; followed by an addition.
+define void @f4() {
+; CHECK-NOFP: f4:
+; CHECK-NOFP: llilh %r1, 8
+; CHECK-NOFP: agr %r1, %r15
+; CHECK-NOFP: mvi 0(%r1), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f4:
+; CHECK-FP: llilh %r1, 8
+; CHECK-FP: agr %r1, %r11
+; CHECK-FP: mvi 0(%r1), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [524112 x i8], align 8
+  %region2 = alloca [524112 x i8], align 8
+  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8
+  store volatile i8 42, i8 *%ptr1
+  store volatile i8 42, i8 *%ptr2
+  ret void
+}
+
+; Add 4095 to the previous offset, to test the other end of the MVI range.
+; The instruction will actually be STCY before frame lowering.
+define void @f5() {
+; CHECK-NOFP: f5:
+; CHECK-NOFP: llilh %r1, 8
+; CHECK-NOFP: agr %r1, %r15
+; CHECK-NOFP: mvi 4095(%r1), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f5:
+; CHECK-FP: llilh %r1, 8
+; CHECK-FP: agr %r1, %r11
+; CHECK-FP: mvi 4095(%r1), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [524112 x i8], align 8
+  %region2 = alloca [524112 x i8], align 8
+  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4103
+  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4103
+  store volatile i8 42, i8 *%ptr1
+  store volatile i8 42, i8 *%ptr2
+  ret void
+}
+
+; Test the next offset after that, which uses MVIY instead of MVI.
+define void @f6() {
+; CHECK-NOFP: f6:
+; CHECK-NOFP: llilh %r1, 8
+; CHECK-NOFP: agr %r1, %r15
+; CHECK-NOFP: mviy 4096(%r1), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f6:
+; CHECK-FP: llilh %r1, 8
+; CHECK-FP: agr %r1, %r11
+; CHECK-FP: mviy 4096(%r1), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [524112 x i8], align 8
+  %region2 = alloca [524112 x i8], align 8
+  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4104
+  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4104
+  store volatile i8 42, i8 *%ptr1
+  store volatile i8 42, i8 *%ptr2
+  ret void
+}
+
+; Now try an offset of 524287 from the start of the object, with the
+; object being at offset 1048576 (1 << 20).  The backend prefers to create
+; anchors 0x10000 bytes apart, so that the high part can be loaded using
+; LLILH while still using MVI in more cases than 0x40000 anchors would.
+define void @f7() {
+; CHECK-NOFP: f7:
+; CHECK-NOFP: llilh %r1, 23
+; CHECK-NOFP: agr %r1, %r15
+; CHECK-NOFP: mviy 65535(%r1), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f7:
+; CHECK-FP: llilh %r1, 23
+; CHECK-FP: agr %r1, %r11
+; CHECK-FP: mviy 65535(%r1), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [1048408 x i8], align 8
+  %region2 = alloca [1048408 x i8], align 8
+  %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287
+  %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287
+  store volatile i8 42, i8 *%ptr1
+  store volatile i8 42, i8 *%ptr2
+  ret void
+}
+
+; Keep the object-relative offset the same but bump the size of the
+; objects by one doubleword.
+define void @f8() {
+; CHECK-NOFP: f8:
+; CHECK-NOFP: llilh %r1, 24
+; CHECK-NOFP: agr %r1, %r15
+; CHECK-NOFP: mvi 7(%r1), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f8:
+; CHECK-FP: llilh %r1, 24
+; CHECK-FP: agr %r1, %r11
+; CHECK-FP: mvi 7(%r1), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [1048416 x i8], align 8
+  %region2 = alloca [1048416 x i8], align 8
+  %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524287
+  %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524287
+  store volatile i8 42, i8 *%ptr1
+  store volatile i8 42, i8 *%ptr2
+  ret void
+}
+
+; Check a case where the original displacement is out of range.  The backend
+; should force separate address logic from the outset.  We don't yet do any
+; kind of anchor optimization, so there should be no offset on the MVI itself.
+;
+; Before frame lowering this is an LA followed by the AGFI seen below.
+; The LA then gets lowered into the LLILH/LA form.  The exact sequence
+; isn't that important though.
+define void @f9() {
+; CHECK-NOFP: f9:
+; CHECK-NOFP: llilh [[R1:%r[1-5]]], 16
+; CHECK-NOFP: la [[R2:%r[1-5]]], 8([[R1]],%r15)
+; CHECK-NOFP: agfi [[R2]], 524288
+; CHECK-NOFP: mvi 0([[R2]]), 42
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f9:
+; CHECK-FP: llilh [[R1:%r[1-5]]], 16
+; CHECK-FP: la [[R2:%r[1-5]]], 8([[R1]],%r11)
+; CHECK-FP: agfi [[R2]], 524288
+; CHECK-FP: mvi 0([[R2]]), 42
+; CHECK-FP: br %r14
+  %region1 = alloca [1048416 x i8], align 8
+  %region2 = alloca [1048416 x i8], align 8
+  %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524288
+  %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524288
+  store volatile i8 42, i8 *%ptr1
+  store volatile i8 42, i8 *%ptr2
+  ret void
+}
+
+; Repeat f4 in a case that needs the emergency spill slot (because all
+; call-clobbered registers are live and no call-saved ones have been
+; allocated).
+define void @f10(i32 *%vptr) {
+; CHECK-NOFP: f10:
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP: llilh [[REGISTER]], 8
+; CHECK-NOFP: agr [[REGISTER]], %r15
+; CHECK-NOFP: mvi 0([[REGISTER]]), 42
+; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f10:
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP: llilh [[REGISTER]], 8
+; CHECK-FP: agr [[REGISTER]], %r11
+; CHECK-FP: mvi 0([[REGISTER]]), 42
+; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: br %r14
+  %i0 = load volatile i32 *%vptr
+  %i1 = load volatile i32 *%vptr
+  %i3 = load volatile i32 *%vptr
+  %i4 = load volatile i32 *%vptr
+  %i5 = load volatile i32 *%vptr
+  %region1 = alloca [524112 x i8], align 8
+  %region2 = alloca [524112 x i8], align 8
+  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8
+  store volatile i8 42, i8 *%ptr1
+  store volatile i8 42, i8 *%ptr2
+  store volatile i32 %i0, i32 *%vptr
+  store volatile i32 %i1, i32 *%vptr
+  store volatile i32 %i3, i32 *%vptr
+  store volatile i32 %i4, i32 *%vptr
+  store volatile i32 %i5, i32 *%vptr
+  ret void
+}
+
+; And again with maximum register pressure.  The only spill slot that the
+; NOFP case needs is the emergency one, so the offsets are the same as for f4.
+; However, the FP case uses %r11 as the frame pointer and must therefore
+; spill a second register.  This leads to an extra displacement of 8.
+define void @f11(i32 *%vptr) {
+; CHECK-NOFP: f11:
+; CHECK-NOFP: stmg %r6, %r15,
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP: llilh [[REGISTER]], 8
+; CHECK-NOFP: agr [[REGISTER]], %r15
+; CHECK-NOFP: mvi 0([[REGISTER]]), 42
+; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: lmg %r6, %r15,
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f11:
+; CHECK-FP: stmg %r6, %r15,
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP: llilh [[REGISTER]], 8
+; CHECK-FP: agr [[REGISTER]], %r11
+; CHECK-FP: mvi 8([[REGISTER]]), 42
+; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: lmg %r6, %r15,
+; CHECK-FP: br %r14
+  %i0 = load volatile i32 *%vptr
+  %i1 = load volatile i32 *%vptr
+  %i3 = load volatile i32 *%vptr
+  %i4 = load volatile i32 *%vptr
+  %i5 = load volatile i32 *%vptr
+  %i6 = load volatile i32 *%vptr
+  %i7 = load volatile i32 *%vptr
+  %i8 = load volatile i32 *%vptr
+  %i9 = load volatile i32 *%vptr
+  %i10 = load volatile i32 *%vptr
+  %i11 = load volatile i32 *%vptr
+  %i12 = load volatile i32 *%vptr
+  %i13 = load volatile i32 *%vptr
+  %i14 = load volatile i32 *%vptr
+  %region1 = alloca [524112 x i8], align 8
+  %region2 = alloca [524112 x i8], align 8
+  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8
+  store volatile i8 42, i8 *%ptr1
+  store volatile i8 42, i8 *%ptr2
+  store volatile i32 %i0, i32 *%vptr
+  store volatile i32 %i1, i32 *%vptr
+  store volatile i32 %i3, i32 *%vptr
+  store volatile i32 %i4, i32 *%vptr
+  store volatile i32 %i5, i32 *%vptr
+  store volatile i32 %i6, i32 *%vptr
+  store volatile i32 %i7, i32 *%vptr
+  store volatile i32 %i8, i32 *%vptr
+  store volatile i32 %i9, i32 *%vptr
+  store volatile i32 %i10, i32 *%vptr
+  store volatile i32 %i11, i32 *%vptr
+  store volatile i32 %i12, i32 *%vptr
+  store volatile i32 %i13, i32 *%vptr
+  store volatile i32 %i14, i32 *%vptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-15.ll b/test/CodeGen/SystemZ/frame-15.ll
new file mode 100644
index 0000000..bc87e17
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-15.ll
@@ -0,0 +1,352 @@
+; Test the handling of base + index + 12-bit displacement addresses for
+; large frames, in cases where no 20-bit form exists.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+
+declare void @foo(float *%ptr1, float *%ptr2)
+
+; This file tests what happens when a displacement is converted from
+; being relative to the start of a frame object to being relative to
+; the frame itself.  In some cases the test is only possible if two
+; objects are allocated.
+;
+; Rather than rely on a particular order for those objects, the tests
+; instead allocate two objects of the same size and apply the test to
+; both of them.  For consistency, all tests follow this model, even if
+; one object would actually be enough.
+
+; First check the highest in-range offset after conversion, which is 4092
+; for word-addressing instructions like LDEB.
+;
+; The last in-range doubleword offset is 4088.  Since the frame has an
+; emergency spill slot at 160(%r15), the amount that we need to allocate
+; in order to put another object at offset 4088 is (4088 - 168) / 4 = 980
+; words.
+define void @f1(double *%dst) {
+; CHECK-NOFP: f1:
+; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f1:
+; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [980 x float], align 8
+  %region2 = alloca [980 x float], align 8
+  %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0
+  call void @foo(float *%start1, float *%start2)
+  %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 1
+  %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 1
+  %float1 = load float *%ptr1
+  %float2 = load float *%ptr2
+  %double1 = fpext float %float1 to double
+  %double2 = fpext float %float2 to double
+  store volatile double %double1, double *%dst
+  store volatile double %double2, double *%dst
+  ret void
+}
+
+; Test the first out-of-range offset.
+define void @f2(double *%dst) {
+; CHECK-NOFP: f2:
+; CHECK-NOFP: lghi %r1, 4096
+; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1,%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f2:
+; CHECK-FP: lghi %r1, 4096
+; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1,%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [980 x float], align 8
+  %region2 = alloca [980 x float], align 8
+  %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0
+  call void @foo(float *%start1, float *%start2)
+  %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 2
+  %float1 = load float *%ptr1
+  %float2 = load float *%ptr2
+  %double1 = fpext float %float1 to double
+  %double2 = fpext float %float2 to double
+  store volatile double %double1, double *%dst
+  store volatile double %double2, double *%dst
+  ret void
+}
+
+; Test the next offset after that.
+define void @f3(double *%dst) {
+; CHECK-NOFP: f3:
+; CHECK-NOFP: lghi %r1, 4096
+; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f3:
+; CHECK-FP: lghi %r1, 4096
+; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [980 x float], align 8
+  %region2 = alloca [980 x float], align 8
+  %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0
+  call void @foo(float *%start1, float *%start2)
+  %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 3
+  %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 3
+  %float1 = load float *%ptr1
+  %float2 = load float *%ptr2
+  %double1 = fpext float %float1 to double
+  %double2 = fpext float %float2 to double
+  store volatile double %double1, double *%dst
+  store volatile double %double2, double *%dst
+  ret void
+}
+
+; Add 4096 bytes (1024 words) to the size of each object and repeat.
+define void @f4(double *%dst) {
+; CHECK-NOFP: f4:
+; CHECK-NOFP: lghi %r1, 4096
+; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r1,%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f4:
+; CHECK-FP: lghi %r1, 4096
+; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r1,%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [2004 x float], align 8
+  %region2 = alloca [2004 x float], align 8
+  %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0
+  call void @foo(float *%start1, float *%start2)
+  %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 1
+  %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 1
+  %float1 = load float *%ptr1
+  %float2 = load float *%ptr2
+  %double1 = fpext float %float1 to double
+  %double2 = fpext float %float2 to double
+  store volatile double %double1, double *%dst
+  store volatile double %double2, double *%dst
+  ret void
+}
+
+; ...as above.
+define void @f5(double *%dst) {
+; CHECK-NOFP: f5:
+; CHECK-NOFP: lghi %r1, 8192
+; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1,%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f5:
+; CHECK-FP: lghi %r1, 8192
+; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1,%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [2004 x float], align 8
+  %region2 = alloca [2004 x float], align 8
+  %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0
+  call void @foo(float *%start1, float *%start2)
+  %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 2
+  %float1 = load float *%ptr1
+  %float2 = load float *%ptr2
+  %double1 = fpext float %float1 to double
+  %double2 = fpext float %float2 to double
+  store volatile double %double1, double *%dst
+  store volatile double %double2, double *%dst
+  ret void
+}
+
+; ...as above.
+define void @f6(double *%dst) {
+; CHECK-NOFP: f6:
+; CHECK-NOFP: lghi %r1, 8192
+; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f6:
+; CHECK-FP: lghi %r1, 8192
+; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [2004 x float], align 8
+  %region2 = alloca [2004 x float], align 8
+  %start1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 0
+  call void @foo(float *%start1, float *%start2)
+  %ptr1 = getelementptr inbounds [2004 x float]* %region1, i64 0, i64 3
+  %ptr2 = getelementptr inbounds [2004 x float]* %region2, i64 0, i64 3
+  %float1 = load float *%ptr1
+  %float2 = load float *%ptr2
+  %double1 = fpext float %float1 to double
+  %double2 = fpext float %float2 to double
+  store volatile double %double1, double *%dst
+  store volatile double %double2, double *%dst
+  ret void
+}
+
+; Now try an offset of 4092 from the start of the object, with the object
+; being at offset 8192.  This time we need objects of (8192 - 168) / 4 = 2006
+; words.
+define void @f7(double *%dst) {
+; CHECK-NOFP: f7:
+; CHECK-NOFP: lghi %r1, 8192
+; CHECK-NOFP: ldeb {{%f[0-7]}}, 4092(%r1,%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f7:
+; CHECK-FP: lghi %r1, 8192
+; CHECK-FP: ldeb {{%f[0-7]}}, 4092(%r1,%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [2006 x float], align 8
+  %region2 = alloca [2006 x float], align 8
+  %start1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 0
+  call void @foo(float *%start1, float *%start2)
+  %ptr1 = getelementptr inbounds [2006 x float]* %region1, i64 0, i64 1023
+  %ptr2 = getelementptr inbounds [2006 x float]* %region2, i64 0, i64 1023
+  %float1 = load float *%ptr1
+  %float2 = load float *%ptr2
+  %double1 = fpext float %float1 to double
+  %double2 = fpext float %float2 to double
+  store volatile double %double1, double *%dst
+  store volatile double %double2, double *%dst
+  ret void
+}
+
+; Keep the object-relative offset the same but bump the size of the
+; objects by one doubleword.
+define void @f8(double *%dst) {
+; CHECK-NOFP: f8:
+; CHECK-NOFP: lghi %r1, 12288
+; CHECK-NOFP: ldeb {{%f[0-7]}}, 4(%r1,%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f8:
+; CHECK-FP: lghi %r1, 12288
+; CHECK-FP: ldeb {{%f[0-7]}}, 4(%r1,%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [2008 x float], align 8
+  %region2 = alloca [2008 x float], align 8
+  %start1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 0
+  call void @foo(float *%start1, float *%start2)
+  %ptr1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 1023
+  %ptr2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 1023
+  %float1 = load float *%ptr1
+  %float2 = load float *%ptr2
+  %double1 = fpext float %float1 to double
+  %double2 = fpext float %float2 to double
+  store volatile double %double1, double *%dst
+  store volatile double %double2, double *%dst
+  ret void
+}
+
+; Check a case where the original displacement is out of range.  The backend
+; should force an LAY from the outset.  We don't yet do any kind of anchor
+; optimization, so there should be no offset on the LDEB itself.
+define void @f9(double *%dst) {
+; CHECK-NOFP: f9:
+; CHECK-NOFP: lay %r1, 12296(%r15)
+; CHECK-NOFP: ldeb {{%f[0-7]}}, 0(%r1)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f9:
+; CHECK-FP: lay %r1, 12296(%r11)
+; CHECK-FP: ldeb {{%f[0-7]}}, 0(%r1)
+; CHECK-FP: br %r14
+  %region1 = alloca [2008 x float], align 8
+  %region2 = alloca [2008 x float], align 8
+  %start1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 0
+  call void @foo(float *%start1, float *%start2)
+  %ptr1 = getelementptr inbounds [2008 x float]* %region1, i64 0, i64 1024
+  %ptr2 = getelementptr inbounds [2008 x float]* %region2, i64 0, i64 1024
+  %float1 = load float *%ptr1
+  %float2 = load float *%ptr2
+  %double1 = fpext float %float1 to double
+  %double2 = fpext float %float2 to double
+  store volatile double %double1, double *%dst
+  store volatile double %double2, double *%dst
+  ret void
+}
+
+; Repeat f2 in a case that needs the emergency spill slot, because all
+; call-clobbered and allocated call-saved registers are live.  Note that
+; %vptr and %dst are copied to call-saved registers, freeing up %r2 and
+; %r3 during the main test.
+define void @f10(i32 *%vptr, double *%dst) {
+; CHECK-NOFP: f10:
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP: lghi [[REGISTER]], 4096
+; CHECK-NOFP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r15)
+; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f10:
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP: lghi [[REGISTER]], 4096
+; CHECK-FP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r11)
+; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [980 x float], align 8
+  %region2 = alloca [980 x float], align 8
+  %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0
+  call void @foo(float *%start1, float *%start2)
+  %ptr1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 2
+  %ptr2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 2
+  %i0 = load volatile i32 *%vptr
+  %i1 = load volatile i32 *%vptr
+  %i2 = load volatile i32 *%vptr
+  %i3 = load volatile i32 *%vptr
+  %i4 = load volatile i32 *%vptr
+  %i5 = load volatile i32 *%vptr
+  %i14 = load volatile i32 *%vptr
+  %float1 = load float *%ptr1
+  %float2 = load float *%ptr2
+  %double1 = fpext float %float1 to double
+  %double2 = fpext float %float2 to double
+  store volatile double %double1, double *%dst
+  store volatile double %double2, double *%dst
+  store volatile i32 %i0, i32 *%vptr
+  store volatile i32 %i1, i32 *%vptr
+  store volatile i32 %i2, i32 *%vptr
+  store volatile i32 %i3, i32 *%vptr
+  store volatile i32 %i4, i32 *%vptr
+  store volatile i32 %i5, i32 *%vptr
+  store volatile i32 %i14, i32 *%vptr
+  ret void
+}
+
+; Repeat f2 in a case where the index register is already occupied.
+define void @f11(double *%dst, i64 %index) {
+; CHECK-NOFP: f11:
+; CHECK-NOFP: lgr [[REGISTER:%r[1-9][0-5]?]], %r3
+; CHECK-NOFP: lay %r1, 4096(%r15)
+; CHECK-NOFP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r1)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f11:
+; CHECK-FP: lgr [[REGISTER:%r[1-9][0-5]?]], %r3
+; CHECK-FP: lay %r1, 4096(%r11)
+; CHECK-FP: ldeb {{%f[0-7]}}, 0([[REGISTER]],%r1)
+; CHECK-FP: br %r14
+  %region1 = alloca [980 x float], align 8
+  %region2 = alloca [980 x float], align 8
+  %start1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 0
+  %start2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 0
+  call void @foo(float *%start1, float *%start2)
+  %elem1 = getelementptr inbounds [980 x float]* %region1, i64 0, i64 2
+  %elem2 = getelementptr inbounds [980 x float]* %region2, i64 0, i64 2
+  %base1 = ptrtoint float *%elem1 to i64
+  %base2 = ptrtoint float *%elem2 to i64
+  %addr1 = add i64 %base1, %index
+  %addr2 = add i64 %base2, %index
+  %ptr1 = inttoptr i64 %addr1 to float *
+  %ptr2 = inttoptr i64 %addr2 to float *
+  %float1 = load float *%ptr1
+  %float2 = load float *%ptr2
+  %double1 = fpext float %float1 to double
+  %double2 = fpext float %float2 to double
+  store volatile double %double1, double *%dst
+  store volatile double %double2, double *%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-16.ll b/test/CodeGen/SystemZ/frame-16.ll
new file mode 100644
index 0000000..cc5529f
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-16.ll
@@ -0,0 +1,327 @@
+; Test the handling of base + index + displacement addresses for large frames,
+; in cases where both 12-bit and 20-bit displacements are allowed.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+
+; This file tests what happens when a displacement is converted from
+; being relative to the start of a frame object to being relative to
+; the frame itself.  In some cases the test is only possible if two
+; objects are allocated.
+;
+; Rather than rely on a particular order for those objects, the tests
+; instead allocate two objects of the same size and apply the test to
+; both of them.  For consistency, all tests follow this model, even if
+; one object would actually be enough.
+
+; First check the highest offset that is in range of the 12-bit form.
+;
+; The last in-range doubleword offset is 4088.  Since the frame has an
+; emergency spill slot at 160(%r15), the amount that we need to allocate
+; in order to put another object at offset 4088 is 4088 - 168 = 3920 bytes.
+define void @f1(i8 %byte) {
+; CHECK-NOFP: f1:
+; CHECK-NOFP: stc %r2, 4095(%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f1:
+; CHECK-FP: stc %r2, 4095(%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [3920 x i8], align 8
+  %region2 = alloca [3920 x i8], align 8
+  %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 7
+  %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 7
+  store volatile i8 %byte, i8 *%ptr1
+  store volatile i8 %byte, i8 *%ptr2
+  ret void
+}
+
+; Test the first offset that is out-of-range of the 12-bit form.
+define void @f2(i8 %byte) {
+; CHECK-NOFP: f2:
+; CHECK-NOFP: stcy %r2, 4096(%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f2:
+; CHECK-FP: stcy %r2, 4096(%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [3920 x i8], align 8
+  %region2 = alloca [3920 x i8], align 8
+  %ptr1 = getelementptr inbounds [3920 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [3920 x i8]* %region2, i64 0, i64 8
+  store volatile i8 %byte, i8 *%ptr1
+  store volatile i8 %byte, i8 *%ptr2
+  ret void
+}
+
+; Test the last offset that is in range of the 20-bit form.
+;
+; The last in-range doubleword offset is 524280, so by the same reasoning
+; as above, we need to allocate objects of 524280 - 168 = 524122 bytes.
+define void @f3(i8 %byte) {
+; CHECK-NOFP: f3:
+; CHECK-NOFP: stcy %r2, 524287(%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f3:
+; CHECK-FP: stcy %r2, 524287(%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [524112 x i8], align 8
+  %region2 = alloca [524112 x i8], align 8
+  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 7
+  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 7
+  store volatile i8 %byte, i8 *%ptr1
+  store volatile i8 %byte, i8 *%ptr2
+  ret void
+}
+
+; Test the first out-of-range offset.  We can't use an index register here,
+; and the offset is also out of LAY's range, so expect a constant load
+; followed by an addition.
+define void @f4(i8 %byte) {
+; CHECK-NOFP: f4:
+; CHECK-NOFP: llilh %r1, 8
+; CHECK-NOFP: stc %r2, 0(%r1,%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f4:
+; CHECK-FP: llilh %r1, 8
+; CHECK-FP: stc %r2, 0(%r1,%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [524112 x i8], align 8
+  %region2 = alloca [524112 x i8], align 8
+  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8
+  store volatile i8 %byte, i8 *%ptr1
+  store volatile i8 %byte, i8 *%ptr2
+  ret void
+}
+
+; Add 4095 to the previous offset, to test the other end of the STC range.
+; The instruction will actually be STCY before frame lowering.
+define void @f5(i8 %byte) {
+; CHECK-NOFP: f5:
+; CHECK-NOFP: llilh %r1, 8
+; CHECK-NOFP: stc %r2, 4095(%r1,%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f5:
+; CHECK-FP: llilh %r1, 8
+; CHECK-FP: stc %r2, 4095(%r1,%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [524112 x i8], align 8
+  %region2 = alloca [524112 x i8], align 8
+  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4103
+  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4103
+  store volatile i8 %byte, i8 *%ptr1
+  store volatile i8 %byte, i8 *%ptr2
+  ret void
+}
+
+; Test the next offset after that, which uses STCY instead of STC.
+define void @f6(i8 %byte) {
+; CHECK-NOFP: f6:
+; CHECK-NOFP: llilh %r1, 8
+; CHECK-NOFP: stcy %r2, 4096(%r1,%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f6:
+; CHECK-FP: llilh %r1, 8
+; CHECK-FP: stcy %r2, 4096(%r1,%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [524112 x i8], align 8
+  %region2 = alloca [524112 x i8], align 8
+  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 4104
+  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 4104
+  store volatile i8 %byte, i8 *%ptr1
+  store volatile i8 %byte, i8 *%ptr2
+  ret void
+}
+
+; Now try an offset of 524287 from the start of the object, with the
+; object being at offset 1048576 (1 << 20).  The backend prefers to create
+; anchors 0x10000 bytes apart, so that the high part can be loaded using
+; LLILH while still using STC in more cases than 0x40000 anchors would.
+define void @f7(i8 %byte) {
+; CHECK-NOFP: f7:
+; CHECK-NOFP: llilh %r1, 23
+; CHECK-NOFP: stcy %r2, 65535(%r1,%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f7:
+; CHECK-FP: llilh %r1, 23
+; CHECK-FP: stcy %r2, 65535(%r1,%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [1048408 x i8], align 8
+  %region2 = alloca [1048408 x i8], align 8
+  %ptr1 = getelementptr inbounds [1048408 x i8]* %region1, i64 0, i64 524287
+  %ptr2 = getelementptr inbounds [1048408 x i8]* %region2, i64 0, i64 524287
+  store volatile i8 %byte, i8 *%ptr1
+  store volatile i8 %byte, i8 *%ptr2
+  ret void
+}
+
+; Keep the object-relative offset the same but bump the size of the
+; objects by one doubleword.
+define void @f8(i8 %byte) {
+; CHECK-NOFP: f8:
+; CHECK-NOFP: llilh %r1, 24
+; CHECK-NOFP: stc %r2, 7(%r1,%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f8:
+; CHECK-FP: llilh %r1, 24
+; CHECK-FP: stc %r2, 7(%r1,%r11)
+; CHECK-FP: br %r14
+  %region1 = alloca [1048416 x i8], align 8
+  %region2 = alloca [1048416 x i8], align 8
+  %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524287
+  %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524287
+  store volatile i8 %byte, i8 *%ptr1
+  store volatile i8 %byte, i8 *%ptr2
+  ret void
+}
+
+; Check a case where the original displacement is out of range.  The backend
+; should force separate address logic from the outset.  We don't yet do any
+; kind of anchor optimization, so there should be no offset on the STC itself.
+;
+; Before frame lowering this is an LA followed by the AGFI seen below.
+; The LA then gets lowered into the LLILH/LA form.  The exact sequence
+; isn't that important though.
+define void @f9(i8 %byte) {
+; CHECK-NOFP: f9:
+; CHECK-NOFP: llilh [[R1:%r[1-5]]], 16
+; CHECK-NOFP: la [[R2:%r[1-5]]], 8([[R1]],%r15)
+; CHECK-NOFP: agfi [[R2]], 524288
+; CHECK-NOFP: stc %r2, 0([[R2]])
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f9:
+; CHECK-FP: llilh [[R1:%r[1-5]]], 16
+; CHECK-FP: la [[R2:%r[1-5]]], 8([[R1]],%r11)
+; CHECK-FP: agfi [[R2]], 524288
+; CHECK-FP: stc %r2, 0([[R2]])
+; CHECK-FP: br %r14
+  %region1 = alloca [1048416 x i8], align 8
+  %region2 = alloca [1048416 x i8], align 8
+  %ptr1 = getelementptr inbounds [1048416 x i8]* %region1, i64 0, i64 524288
+  %ptr2 = getelementptr inbounds [1048416 x i8]* %region2, i64 0, i64 524288
+  store volatile i8 %byte, i8 *%ptr1
+  store volatile i8 %byte, i8 *%ptr2
+  ret void
+}
+
+; Repeat f4 in a case that needs the emergency spill slot (because all
+; call-clobbered registers are live and no call-saved ones have been
+; allocated).
+define void @f10(i32 *%vptr, i8 %byte) {
+; CHECK-NOFP: f10:
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP: llilh [[REGISTER]], 8
+; CHECK-NOFP: stc %r3, 0([[REGISTER]],%r15)
+; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f10:
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP: llilh [[REGISTER]], 8
+; CHECK-FP: stc %r3, 0([[REGISTER]],%r11)
+; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: br %r14
+  %i0 = load volatile i32 *%vptr
+  %i1 = load volatile i32 *%vptr
+  %i4 = load volatile i32 *%vptr
+  %i5 = load volatile i32 *%vptr
+  %region1 = alloca [524112 x i8], align 8
+  %region2 = alloca [524112 x i8], align 8
+  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8
+  store volatile i8 %byte, i8 *%ptr1
+  store volatile i8 %byte, i8 *%ptr2
+  store volatile i32 %i0, i32 *%vptr
+  store volatile i32 %i1, i32 *%vptr
+  store volatile i32 %i4, i32 *%vptr
+  store volatile i32 %i5, i32 *%vptr
+  ret void
+}
+
+; And again with maximum register pressure.  The only spill slot that the
+; NOFP case needs is the emergency one, so the offsets are the same as for f4.
+; However, the FP case uses %r11 as the frame pointer and must therefore
+; spill a second register.  This leads to an extra displacement of 8.
+define void @f11(i32 *%vptr, i8 %byte) {
+; CHECK-NOFP: f11:
+; CHECK-NOFP: stmg %r6, %r15,
+; CHECK-NOFP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r15)
+; CHECK-NOFP: llilh [[REGISTER]], 8
+; CHECK-NOFP: stc %r3, 0([[REGISTER]],%r15)
+; CHECK-NOFP: lg [[REGISTER]], 160(%r15)
+; CHECK-NOFP: lmg %r6, %r15,
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f11:
+; CHECK-FP: stmg %r6, %r15,
+; CHECK-FP: stg [[REGISTER:%r[1-9][0-4]?]], 160(%r11)
+; CHECK-FP: llilh [[REGISTER]], 8
+; CHECK-FP: stc %r3, 8([[REGISTER]],%r11)
+; CHECK-FP: lg [[REGISTER]], 160(%r11)
+; CHECK-FP: lmg %r6, %r15,
+; CHECK-FP: br %r14
+  %i0 = load volatile i32 *%vptr
+  %i1 = load volatile i32 *%vptr
+  %i4 = load volatile i32 *%vptr
+  %i5 = load volatile i32 *%vptr
+  %i6 = load volatile i32 *%vptr
+  %i7 = load volatile i32 *%vptr
+  %i8 = load volatile i32 *%vptr
+  %i9 = load volatile i32 *%vptr
+  %i10 = load volatile i32 *%vptr
+  %i11 = load volatile i32 *%vptr
+  %i12 = load volatile i32 *%vptr
+  %i13 = load volatile i32 *%vptr
+  %i14 = load volatile i32 *%vptr
+  %region1 = alloca [524112 x i8], align 8
+  %region2 = alloca [524112 x i8], align 8
+  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 8
+  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 8
+  store volatile i8 %byte, i8 *%ptr1
+  store volatile i8 %byte, i8 *%ptr2
+  store volatile i32 %i0, i32 *%vptr
+  store volatile i32 %i1, i32 *%vptr
+  store volatile i32 %i4, i32 *%vptr
+  store volatile i32 %i5, i32 *%vptr
+  store volatile i32 %i6, i32 *%vptr
+  store volatile i32 %i7, i32 *%vptr
+  store volatile i32 %i8, i32 *%vptr
+  store volatile i32 %i9, i32 *%vptr
+  store volatile i32 %i10, i32 *%vptr
+  store volatile i32 %i11, i32 *%vptr
+  store volatile i32 %i12, i32 *%vptr
+  store volatile i32 %i13, i32 *%vptr
+  store volatile i32 %i14, i32 *%vptr
+  ret void
+}
+
+; Repeat f4 in a case where the index register is already occupied.
+define void @f12(i8 %byte, i64 %index) {
+; CHECK-NOFP: f12:
+; CHECK-NOFP: llilh %r1, 8
+; CHECK-NOFP: agr %r1, %r15
+; CHECK-NOFP: stc %r2, 0(%r3,%r1)
+; CHECK-NOFP: br %r14
+;
+; CHECK-FP: f12:
+; CHECK-FP: llilh %r1, 8
+; CHECK-FP: agr %r1, %r11
+; CHECK-FP: stc %r2, 0(%r3,%r1)
+; CHECK-FP: br %r14
+  %region1 = alloca [524112 x i8], align 8
+  %region2 = alloca [524112 x i8], align 8
+  %index1 = add i64 %index, 8
+  %ptr1 = getelementptr inbounds [524112 x i8]* %region1, i64 0, i64 %index1
+  %ptr2 = getelementptr inbounds [524112 x i8]* %region2, i64 0, i64 %index1
+  store volatile i8 %byte, i8 *%ptr1
+  store volatile i8 %byte, i8 *%ptr2
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-17.ll b/test/CodeGen/SystemZ/frame-17.ll
new file mode 100644
index 0000000..613d9f8
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-17.ll
@@ -0,0 +1,177 @@
+; Test spilling of FPRs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; We need to save and restore 8 of the 16 FPRs and allocate an additional
+; 4-byte spill slot, rounded to 8 bytes.  The frame size should be exactly
+; 160 + 8 * 8 = 232.
+define void @f1(float *%ptr) {
+; CHECK: f1:
+; CHECK: aghi %r15, -232
+; CHECK: std %f8, 224(%r15)
+; CHECK: std %f9, 216(%r15)
+; CHECK: std %f10, 208(%r15)
+; CHECK: std %f11, 200(%r15)
+; CHECK: std %f12, 192(%r15)
+; CHECK: std %f13, 184(%r15)
+; CHECK: std %f14, 176(%r15)
+; CHECK: std %f15, 168(%r15)
+; CHECK-NOT: 160(%r15)
+; CHECK: ste [[REGISTER:%f[0-9]+]], 164(%r15)
+; CHECK-NOT: 160(%r15)
+; CHECK: le [[REGISTER]], 164(%r15)
+; CHECK-NOT: 160(%r15)
+; CHECK: ld %f8, 224(%r15)
+; CHECK: ld %f9, 216(%r15)
+; CHECK: ld %f10, 208(%r15)
+; CHECK: ld %f11, 200(%r15)
+; CHECK: ld %f12, 192(%r15)
+; CHECK: ld %f13, 184(%r15)
+; CHECK: ld %f14, 176(%r15)
+; CHECK: ld %f15, 168(%r15)
+; CHECK: aghi %r15, 232
+; CHECK: br %r14
+  %l0 = load volatile float *%ptr
+  %l1 = load volatile float *%ptr
+  %l2 = load volatile float *%ptr
+  %l3 = load volatile float *%ptr
+  %l4 = load volatile float *%ptr
+  %l5 = load volatile float *%ptr
+  %l6 = load volatile float *%ptr
+  %l7 = load volatile float *%ptr
+  %l8 = load volatile float *%ptr
+  %l9 = load volatile float *%ptr
+  %l10 = load volatile float *%ptr
+  %l11 = load volatile float *%ptr
+  %l12 = load volatile float *%ptr
+  %l13 = load volatile float *%ptr
+  %l14 = load volatile float *%ptr
+  %l15 = load volatile float *%ptr
+  %lx = load volatile float *%ptr
+  store volatile float %lx, float *%ptr
+  store volatile float %l15, float *%ptr
+  store volatile float %l14, float *%ptr
+  store volatile float %l13, float *%ptr
+  store volatile float %l12, float *%ptr
+  store volatile float %l11, float *%ptr
+  store volatile float %l10, float *%ptr
+  store volatile float %l9, float *%ptr
+  store volatile float %l8, float *%ptr
+  store volatile float %l7, float *%ptr
+  store volatile float %l6, float *%ptr
+  store volatile float %l5, float *%ptr
+  store volatile float %l4, float *%ptr
+  store volatile float %l3, float *%ptr
+  store volatile float %l2, float *%ptr
+  store volatile float %l1, float *%ptr
+  store volatile float %l0, float *%ptr
+  ret void
+}
+
+; Same for doubles, except that the full spill slot is used.
+define void @f2(double *%ptr) {
+; CHECK: f2:
+; CHECK: aghi %r15, -232
+; CHECK: std %f8, 224(%r15)
+; CHECK: std %f9, 216(%r15)
+; CHECK: std %f10, 208(%r15)
+; CHECK: std %f11, 200(%r15)
+; CHECK: std %f12, 192(%r15)
+; CHECK: std %f13, 184(%r15)
+; CHECK: std %f14, 176(%r15)
+; CHECK: std %f15, 168(%r15)
+; CHECK: std [[REGISTER:%f[0-9]+]], 160(%r15)
+; CHECK: ld [[REGISTER]], 160(%r15)
+; CHECK: ld %f8, 224(%r15)
+; CHECK: ld %f9, 216(%r15)
+; CHECK: ld %f10, 208(%r15)
+; CHECK: ld %f11, 200(%r15)
+; CHECK: ld %f12, 192(%r15)
+; CHECK: ld %f13, 184(%r15)
+; CHECK: ld %f14, 176(%r15)
+; CHECK: ld %f15, 168(%r15)
+; CHECK: aghi %r15, 232
+; CHECK: br %r14
+  %l0 = load volatile double *%ptr
+  %l1 = load volatile double *%ptr
+  %l2 = load volatile double *%ptr
+  %l3 = load volatile double *%ptr
+  %l4 = load volatile double *%ptr
+  %l5 = load volatile double *%ptr
+  %l6 = load volatile double *%ptr
+  %l7 = load volatile double *%ptr
+  %l8 = load volatile double *%ptr
+  %l9 = load volatile double *%ptr
+  %l10 = load volatile double *%ptr
+  %l11 = load volatile double *%ptr
+  %l12 = load volatile double *%ptr
+  %l13 = load volatile double *%ptr
+  %l14 = load volatile double *%ptr
+  %l15 = load volatile double *%ptr
+  %lx = load volatile double *%ptr
+  store volatile double %lx, double *%ptr
+  store volatile double %l15, double *%ptr
+  store volatile double %l14, double *%ptr
+  store volatile double %l13, double *%ptr
+  store volatile double %l12, double *%ptr
+  store volatile double %l11, double *%ptr
+  store volatile double %l10, double *%ptr
+  store volatile double %l9, double *%ptr
+  store volatile double %l8, double *%ptr
+  store volatile double %l7, double *%ptr
+  store volatile double %l6, double *%ptr
+  store volatile double %l5, double *%ptr
+  store volatile double %l4, double *%ptr
+  store volatile double %l3, double *%ptr
+  store volatile double %l2, double *%ptr
+  store volatile double %l1, double *%ptr
+  store volatile double %l0, double *%ptr
+  ret void
+}
+
+; The long double case needs a 16-byte spill slot.
+define void @f3(fp128 *%ptr) {
+; CHECK: f3:
+; CHECK: aghi %r15, -240
+; CHECK: std %f8, 232(%r15)
+; CHECK: std %f9, 224(%r15)
+; CHECK: std %f10, 216(%r15)
+; CHECK: std %f11, 208(%r15)
+; CHECK: std %f12, 200(%r15)
+; CHECK: std %f13, 192(%r15)
+; CHECK: std %f14, 184(%r15)
+; CHECK: std %f15, 176(%r15)
+; CHECK: std [[REGISTER1:%f[0-9]+]], 160(%r15)
+; CHECK: std [[REGISTER2:%f[0-9]+]], 168(%r15)
+; CHECK: ld [[REGISTER1]], 160(%r15)
+; CHECK: ld [[REGISTER2]], 168(%r15)
+; CHECK: ld %f8, 232(%r15)
+; CHECK: ld %f9, 224(%r15)
+; CHECK: ld %f10, 216(%r15)
+; CHECK: ld %f11, 208(%r15)
+; CHECK: ld %f12, 200(%r15)
+; CHECK: ld %f13, 192(%r15)
+; CHECK: ld %f14, 184(%r15)
+; CHECK: ld %f15, 176(%r15)
+; CHECK: aghi %r15, 240
+; CHECK: br %r14
+  %l0 = load volatile fp128 *%ptr
+  %l1 = load volatile fp128 *%ptr
+  %l4 = load volatile fp128 *%ptr
+  %l5 = load volatile fp128 *%ptr
+  %l8 = load volatile fp128 *%ptr
+  %l9 = load volatile fp128 *%ptr
+  %l12 = load volatile fp128 *%ptr
+  %l13 = load volatile fp128 *%ptr
+  %lx = load volatile fp128 *%ptr
+  store volatile fp128 %lx, fp128 *%ptr
+  store volatile fp128 %l13, fp128 *%ptr
+  store volatile fp128 %l12, fp128 *%ptr
+  store volatile fp128 %l9, fp128 *%ptr
+  store volatile fp128 %l8, fp128 *%ptr
+  store volatile fp128 %l5, fp128 *%ptr
+  store volatile fp128 %l4, fp128 *%ptr
+  store volatile fp128 %l1, fp128 *%ptr
+  store volatile fp128 %l0, fp128 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/frame-18.ll b/test/CodeGen/SystemZ/frame-18.ll
new file mode 100644
index 0000000..a9977ed
--- /dev/null
+++ b/test/CodeGen/SystemZ/frame-18.ll
@@ -0,0 +1,91 @@
+; Test spilling of GPRs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; We need to allocate a 4-byte spill slot, rounded to 8 bytes.  The frame
+; size should be exactly 160 + 8 = 168.
+define void @f1(i32 *%ptr) {
+; CHECK: f1:
+; CHECK: stmg %r6, %r15, 48(%r15)
+; CHECK: aghi %r15, -168
+; CHECK-NOT: 160(%r15)
+; CHECK: st [[REGISTER:%r[0-9]+]], 164(%r15)
+; CHECK-NOT: 160(%r15)
+; CHECK: l [[REGISTER]], 164(%r15)
+; CHECK-NOT: 160(%r15)
+; CHECK: lmg %r6, %r15, 216(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i32 *%ptr
+  %l1 = load volatile i32 *%ptr
+  %l3 = load volatile i32 *%ptr
+  %l4 = load volatile i32 *%ptr
+  %l5 = load volatile i32 *%ptr
+  %l6 = load volatile i32 *%ptr
+  %l7 = load volatile i32 *%ptr
+  %l8 = load volatile i32 *%ptr
+  %l9 = load volatile i32 *%ptr
+  %l10 = load volatile i32 *%ptr
+  %l11 = load volatile i32 *%ptr
+  %l12 = load volatile i32 *%ptr
+  %l13 = load volatile i32 *%ptr
+  %l14 = load volatile i32 *%ptr
+  %lx = load volatile i32 *%ptr
+  store volatile i32 %lx, i32 *%ptr
+  store volatile i32 %l14, i32 *%ptr
+  store volatile i32 %l13, i32 *%ptr
+  store volatile i32 %l12, i32 *%ptr
+  store volatile i32 %l11, i32 *%ptr
+  store volatile i32 %l10, i32 *%ptr
+  store volatile i32 %l9, i32 *%ptr
+  store volatile i32 %l8, i32 *%ptr
+  store volatile i32 %l7, i32 *%ptr
+  store volatile i32 %l6, i32 *%ptr
+  store volatile i32 %l5, i32 *%ptr
+  store volatile i32 %l4, i32 *%ptr
+  store volatile i32 %l3, i32 *%ptr
+  store volatile i32 %l1, i32 *%ptr
+  store volatile i32 %l0, i32 *%ptr
+  ret void
+}
+
+; Same for i64, except that the full spill slot is used.
+define void @f2(i64 *%ptr) {
+; CHECK: f2:
+; CHECK: stmg %r6, %r15, 48(%r15)
+; CHECK: aghi %r15, -168
+; CHECK: stg [[REGISTER:%r[0-9]+]], 160(%r15)
+; CHECK: lg [[REGISTER]], 160(%r15)
+; CHECK: lmg %r6, %r15, 216(%r15)
+; CHECK: br %r14
+  %l0 = load volatile i64 *%ptr
+  %l1 = load volatile i64 *%ptr
+  %l3 = load volatile i64 *%ptr
+  %l4 = load volatile i64 *%ptr
+  %l5 = load volatile i64 *%ptr
+  %l6 = load volatile i64 *%ptr
+  %l7 = load volatile i64 *%ptr
+  %l8 = load volatile i64 *%ptr
+  %l9 = load volatile i64 *%ptr
+  %l10 = load volatile i64 *%ptr
+  %l11 = load volatile i64 *%ptr
+  %l12 = load volatile i64 *%ptr
+  %l13 = load volatile i64 *%ptr
+  %l14 = load volatile i64 *%ptr
+  %lx = load volatile i64 *%ptr
+  store volatile i64 %lx, i64 *%ptr
+  store volatile i64 %l14, i64 *%ptr
+  store volatile i64 %l13, i64 *%ptr
+  store volatile i64 %l12, i64 *%ptr
+  store volatile i64 %l11, i64 *%ptr
+  store volatile i64 %l10, i64 *%ptr
+  store volatile i64 %l9, i64 *%ptr
+  store volatile i64 %l8, i64 *%ptr
+  store volatile i64 %l7, i64 *%ptr
+  store volatile i64 %l6, i64 *%ptr
+  store volatile i64 %l5, i64 *%ptr
+  store volatile i64 %l4, i64 *%ptr
+  store volatile i64 %l3, i64 *%ptr
+  store volatile i64 %l1, i64 *%ptr
+  store volatile i64 %l0, i64 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/insert-01.ll b/test/CodeGen/SystemZ/insert-01.ll
new file mode 100644
index 0000000..98ddf56
--- /dev/null
+++ b/test/CodeGen/SystemZ/insert-01.ll
@@ -0,0 +1,230 @@
+; Test insertions of memory into the low byte of an i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check a plain insertion with (or (and ... -0xff) (zext (load ....))).
+; The whole sequence can be performed by IC.
+define i32 @f1(i32 %orig, i8 *%ptr) {
+; CHECK: f1:
+; CHECK-NOT: ni
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i32
+  %ptr1 = and i32 %orig, -256
+  %or = or i32 %ptr1, %ptr2
+  ret i32 %or
+}
+
+; Like f1, but with the operands reversed.
+define i32 @f2(i32 %orig, i8 *%ptr) {
+; CHECK: f2:
+; CHECK-NOT: ni
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i32
+  %ptr1 = and i32 %orig, -256
+  %or = or i32 %ptr2, %ptr1
+  ret i32 %or
+}
+
+; Check a case where more bits than lower 8 are masked out of the
+; register value.  We can use IC but must keep the original mask.
+define i32 @f3(i32 %orig, i8 *%ptr) {
+; CHECK: f3:
+; CHECK: nill %r2, 65024
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i32
+  %ptr1 = and i32 %orig, -512
+  %or = or i32 %ptr1, %ptr2
+  ret i32 %or
+}
+
+; Like f3, but with the operands reversed.
+define i32 @f4(i32 %orig, i8 *%ptr) {
+; CHECK: f4:
+; CHECK: nill %r2, 65024
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i32
+  %ptr1 = and i32 %orig, -512
+  %or = or i32 %ptr2, %ptr1
+  ret i32 %or
+}
+
+; Check a case where the low 8 bits are cleared by a shift left.
+define i32 @f5(i32 %orig, i8 *%ptr) {
+; CHECK: f5:
+; CHECK: sll %r2, 8
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i32
+  %ptr1 = shl i32 %orig, 8
+  %or = or i32 %ptr1, %ptr2
+  ret i32 %or
+}
+
+; Like f5, but with the operands reversed.
+define i32 @f6(i32 %orig, i8 *%ptr) {
+; CHECK: f6:
+; CHECK: sll %r2, 8
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i32
+  %ptr1 = shl i32 %orig, 8
+  %or = or i32 %ptr2, %ptr1
+  ret i32 %or
+}
+
+; Check insertions into a constant.
+define i32 @f7(i32 %orig, i8 *%ptr) {
+; CHECK: f7:
+; CHECK: lhi %r2, 256
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i32
+  %or = or i32 %ptr2, 256
+  ret i32 %or
+}
+
+; Like f7, but with the operands reversed.
+define i32 @f8(i32 %orig, i8 *%ptr) {
+; CHECK: f8:
+; CHECK: lhi %r2, 256
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i32
+  %or = or i32 256, %ptr2
+  ret i32 %or
+}
+
+; Check the high end of the IC range.
+define i32 @f9(i32 %orig, i8 *%src) {
+; CHECK: f9:
+; CHECK: ic %r2, 4095(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4095
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i32
+  %src1 = and i32 %orig, -256
+  %or = or i32 %src2, %src1
+  ret i32 %or
+}
+
+; Check the next byte up, which should use ICY instead of IC.
+define i32 @f10(i32 %orig, i8 *%src) {
+; CHECK: f10:
+; CHECK: icy %r2, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4096
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i32
+  %src1 = and i32 %orig, -256
+  %or = or i32 %src2, %src1
+  ret i32 %or
+}
+
+; Check the high end of the ICY range.
+define i32 @f11(i32 %orig, i8 *%src) {
+; CHECK: f11:
+; CHECK: icy %r2, 524287(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524287
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i32
+  %src1 = and i32 %orig, -256
+  %or = or i32 %src2, %src1
+  ret i32 %or
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f12(i32 %orig, i8 *%src) {
+; CHECK: f12:
+; CHECK: agfi %r3, 524288
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524288
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i32
+  %src1 = and i32 %orig, -256
+  %or = or i32 %src2, %src1
+  ret i32 %or
+}
+
+; Check the high end of the negative ICY range.
+define i32 @f13(i32 %orig, i8 *%src) {
+; CHECK: f13:
+; CHECK: icy %r2, -1(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -1
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i32
+  %src1 = and i32 %orig, -256
+  %or = or i32 %src2, %src1
+  ret i32 %or
+}
+
+; Check the low end of the ICY range.
+define i32 @f14(i32 %orig, i8 *%src) {
+; CHECK: f14:
+; CHECK: icy %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524288
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i32
+  %src1 = and i32 %orig, -256
+  %or = or i32 %src2, %src1
+  ret i32 %or
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f15(i32 %orig, i8 *%src) {
+; CHECK: f15:
+; CHECK: agfi %r3, -524289
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524289
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i32
+  %src1 = and i32 %orig, -256
+  %or = or i32 %src2, %src1
+  ret i32 %or
+}
+
+; Check that IC allows an index.
+define i32 @f16(i32 %orig, i8 *%src, i64 %index) {
+; CHECK: f16:
+; CHECK: ic %r2, 4095({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %ptr1 = getelementptr i8 *%src, i64 %index
+  %ptr2 = getelementptr i8 *%ptr1, i64 4095
+  %val = load i8 *%ptr2
+  %src2 = zext i8 %val to i32
+  %src1 = and i32 %orig, -256
+  %or = or i32 %src2, %src1
+  ret i32 %or
+}
+
+; Check that ICY allows an index.
+define i32 @f17(i32 %orig, i8 *%src, i64 %index) {
+; CHECK: f17:
+; CHECK: icy %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %ptr1 = getelementptr i8 *%src, i64 %index
+  %ptr2 = getelementptr i8 *%ptr1, i64 4096
+  %val = load i8 *%ptr2
+  %src2 = zext i8 %val to i32
+  %src1 = and i32 %orig, -256
+  %or = or i32 %src2, %src1
+  ret i32 %or
+}
diff --git a/test/CodeGen/SystemZ/insert-02.ll b/test/CodeGen/SystemZ/insert-02.ll
new file mode 100644
index 0000000..471889d
--- /dev/null
+++ b/test/CodeGen/SystemZ/insert-02.ll
@@ -0,0 +1,230 @@
+; Test insertions of memory into the low byte of an i64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check a plain insertion with (or (and ... -0xff) (zext (load ....))).
+; The whole sequence can be performed by IC.
+define i64 @f1(i64 %orig, i8 *%ptr) {
+; CHECK: f1:
+; CHECK-NOT: ni
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i64
+  %ptr1 = and i64 %orig, -256
+  %or = or i64 %ptr1, %ptr2
+  ret i64 %or
+}
+
+; Like f1, but with the operands reversed.
+define i64 @f2(i64 %orig, i8 *%ptr) {
+; CHECK: f2:
+; CHECK-NOT: ni
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i64
+  %ptr1 = and i64 %orig, -256
+  %or = or i64 %ptr2, %ptr1
+  ret i64 %or
+}
+
+; Check a case where more bits than lower 8 are masked out of the
+; register value.  We can use IC but must keep the original mask.
+define i64 @f3(i64 %orig, i8 *%ptr) {
+; CHECK: f3:
+; CHECK: nill %r2, 65024
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i64
+  %ptr1 = and i64 %orig, -512
+  %or = or i64 %ptr1, %ptr2
+  ret i64 %or
+}
+
+; Like f3, but with the operands reversed.
+define i64 @f4(i64 %orig, i8 *%ptr) {
+; CHECK: f4:
+; CHECK: nill %r2, 65024
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i64
+  %ptr1 = and i64 %orig, -512
+  %or = or i64 %ptr2, %ptr1
+  ret i64 %or
+}
+
+; Check a case where the low 8 bits are cleared by a shift left.
+define i64 @f5(i64 %orig, i8 *%ptr) {
+; CHECK: f5:
+; CHECK: sllg %r2, %r2, 8
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i64
+  %ptr1 = shl i64 %orig, 8
+  %or = or i64 %ptr1, %ptr2
+  ret i64 %or
+}
+
+; Like f5, but with the operands reversed.
+define i64 @f6(i64 %orig, i8 *%ptr) {
+; CHECK: f6:
+; CHECK: sllg %r2, %r2, 8
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i64
+  %ptr1 = shl i64 %orig, 8
+  %or = or i64 %ptr2, %ptr1
+  ret i64 %or
+}
+
+; Check insertions into a constant.
+define i64 @f7(i64 %orig, i8 *%ptr) {
+; CHECK: f7:
+; CHECK: lghi %r2, 256
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i64
+  %or = or i64 %ptr2, 256
+  ret i64 %or
+}
+
+; Like f7, but with the operands reversed.
+define i64 @f8(i64 %orig, i8 *%ptr) {
+; CHECK: f8:
+; CHECK: lghi %r2, 256
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ptr2 = zext i8 %val to i64
+  %or = or i64 256, %ptr2
+  ret i64 %or
+}
+
+; Check the high end of the IC range.
+define i64 @f9(i64 %orig, i8 *%src) {
+; CHECK: f9:
+; CHECK: ic %r2, 4095(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4095
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i64
+  %src1 = and i64 %orig, -256
+  %or = or i64 %src2, %src1
+  ret i64 %or
+}
+
+; Check the next byte up, which should use ICY instead of IC.
+define i64 @f10(i64 %orig, i8 *%src) {
+; CHECK: f10:
+; CHECK: icy %r2, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4096
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i64
+  %src1 = and i64 %orig, -256
+  %or = or i64 %src2, %src1
+  ret i64 %or
+}
+
+; Check the high end of the ICY range.
+define i64 @f11(i64 %orig, i8 *%src) {
+; CHECK: f11:
+; CHECK: icy %r2, 524287(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524287
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i64
+  %src1 = and i64 %orig, -256
+  %or = or i64 %src2, %src1
+  ret i64 %or
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f12(i64 %orig, i8 *%src) {
+; CHECK: f12:
+; CHECK: agfi %r3, 524288
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524288
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i64
+  %src1 = and i64 %orig, -256
+  %or = or i64 %src2, %src1
+  ret i64 %or
+}
+
+; Check the high end of the negative ICY range.
+define i64 @f13(i64 %orig, i8 *%src) {
+; CHECK: f13:
+; CHECK: icy %r2, -1(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -1
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i64
+  %src1 = and i64 %orig, -256
+  %or = or i64 %src2, %src1
+  ret i64 %or
+}
+
+; Check the low end of the ICY range.
+define i64 @f14(i64 %orig, i8 *%src) {
+; CHECK: f14:
+; CHECK: icy %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524288
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i64
+  %src1 = and i64 %orig, -256
+  %or = or i64 %src2, %src1
+  ret i64 %or
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f15(i64 %orig, i8 *%src) {
+; CHECK: f15:
+; CHECK: agfi %r3, -524289
+; CHECK: ic %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524289
+  %val = load i8 *%ptr
+  %src2 = zext i8 %val to i64
+  %src1 = and i64 %orig, -256
+  %or = or i64 %src2, %src1
+  ret i64 %or
+}
+
+; Check that IC allows an index.
+define i64 @f16(i64 %orig, i8 *%src, i64 %index) {
+; CHECK: f16:
+; CHECK: ic %r2, 4095({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %ptr1 = getelementptr i8 *%src, i64 %index
+  %ptr2 = getelementptr i8 *%ptr1, i64 4095
+  %val = load i8 *%ptr2
+  %src2 = zext i8 %val to i64
+  %src1 = and i64 %orig, -256
+  %or = or i64 %src2, %src1
+  ret i64 %or
+}
+
+; Check that ICY allows an index.
+define i64 @f17(i64 %orig, i8 *%src, i64 %index) {
+; CHECK: f17:
+; CHECK: icy %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %ptr1 = getelementptr i8 *%src, i64 %index
+  %ptr2 = getelementptr i8 *%ptr1, i64 4096
+  %val = load i8 *%ptr2
+  %src2 = zext i8 %val to i64
+  %src1 = and i64 %orig, -256
+  %or = or i64 %src2, %src1
+  ret i64 %or
+}
diff --git a/test/CodeGen/SystemZ/insert-03.ll b/test/CodeGen/SystemZ/insert-03.ll
new file mode 100644
index 0000000..261eabd
--- /dev/null
+++ b/test/CodeGen/SystemZ/insert-03.ll
@@ -0,0 +1,71 @@
+; Test insertions of 16-bit constants into one half of an i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the lowest useful IILL value.  (We use NILL rather than IILL
+; to clear 16 bits.)
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK-NOT: ni
+; CHECK: iill %r2, 1
+; CHECK: br %r14
+  %and = and i32 %a, 4294901760
+  %or = or i32 %and, 1
+  ret i32 %or
+}
+
+; Check a middle value.
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK-NOT: ni
+; CHECK: iill %r2, 32769
+; CHECK: br %r14
+  %and = and i32 %a, -65536
+  %or = or i32 %and, 32769
+  ret i32 %or
+}
+
+; Check the highest useful IILL value.  (We use OILL rather than IILL
+; to set 16 bits.)
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK-NOT: ni
+; CHECK: iill %r2, 65534
+; CHECK: br %r14
+  %and = and i32 %a, 4294901760
+  %or = or i32 %and, 65534
+  ret i32 %or
+}
+
+; Check the lowest useful IILH value.
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK-NOT: ni
+; CHECK: iilh %r2, 1
+; CHECK: br %r14
+  %and = and i32 %a, 65535
+  %or = or i32 %and, 65536
+  ret i32 %or
+}
+
+; Check a middle value.
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK-NOT: ni
+; CHECK: iilh %r2, 32767
+; CHECK: br %r14
+  %and = and i32 %a, 65535
+  %or = or i32 %and, 2147418112
+  ret i32 %or
+}
+
+; Check the highest useful IILH value.
+define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK-NOT: ni
+; CHECK: iilh %r2, 65534
+; CHECK: br %r14
+  %and = and i32 %a, 65535
+  %or = or i32 %and, -131072
+  ret i32 %or
+}
diff --git a/test/CodeGen/SystemZ/insert-04.ll b/test/CodeGen/SystemZ/insert-04.ll
new file mode 100644
index 0000000..07f88b9
--- /dev/null
+++ b/test/CodeGen/SystemZ/insert-04.ll
@@ -0,0 +1,137 @@
+; Test insertions of 16-bit constants into an i64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the lowest useful IILL value.  (We use NILL rather than IILL
+; to clear 16 bits.)
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK-NOT: ni
+; CHECK: iill %r2, 1
+; CHECK: br %r14
+  %and = and i64 %a, 18446744073709486080
+  %or = or i64 %and, 1
+  ret i64 %or
+}
+
+; Check a middle value.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK-NOT: ni
+; CHECK: iill %r2, 32769
+; CHECK: br %r14
+  %and = and i64 %a, -65536
+  %or = or i64 %and, 32769
+  ret i64 %or
+}
+
+; Check the highest useful IILL value.  (We use OILL rather than IILL
+; to set 16 bits.)
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK-NOT: ni
+; CHECK: iill %r2, 65534
+; CHECK: br %r14
+  %and = and i64 %a, 18446744073709486080
+  %or = or i64 %and, 65534
+  ret i64 %or
+}
+
+; Check the lowest useful IILH value.
+define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK-NOT: ni
+; CHECK: iilh %r2, 1
+; CHECK: br %r14
+  %and = and i64 %a, 18446744069414649855
+  %or = or i64 %and, 65536
+  ret i64 %or
+}
+
+; Check a middle value.
+define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK-NOT: ni
+; CHECK: iilh %r2, 32767
+; CHECK: br %r14
+  %and = and i64 %a, -4294901761
+  %or = or i64 %and, 2147418112
+  ret i64 %or
+}
+
+; Check the highest useful IILH value.
+define i64 @f6(i64 %a) {
+; CHECK: f6:
+; CHECK-NOT: ni
+; CHECK: iilh %r2, 65534
+; CHECK: br %r14
+  %and = and i64 %a, 18446744069414649855
+  %or = or i64 %and, 4294836224
+  ret i64 %or
+}
+
+; Check the lowest useful IIHL value.
+define i64 @f7(i64 %a) {
+; CHECK: f7:
+; CHECK-NOT: ni
+; CHECK: iihl %r2, 1
+; CHECK: br %r14
+  %and = and i64 %a, 18446462603027808255
+  %or = or i64 %and, 4294967296
+  ret i64 %or
+}
+
+; Check a middle value.
+define i64 @f8(i64 %a) {
+; CHECK: f8:
+; CHECK-NOT: ni
+; CHECK: iihl %r2, 32767
+; CHECK: br %r14
+  %and = and i64 %a, -281470681743361
+  %or = or i64 %and, 140733193388032
+  ret i64 %or
+}
+
+; Check the highest useful IIHL value.
+define i64 @f9(i64 %a) {
+; CHECK: f9:
+; CHECK-NOT: ni
+; CHECK: iihl %r2, 65534
+; CHECK: br %r14
+  %and = and i64 %a, 18446462603027808255
+  %or = or i64 %and, 281466386776064
+  ret i64 %or
+}
+
+; Check the lowest useful IIHH value.
+define i64 @f10(i64 %a) {
+; CHECK: f10:
+; CHECK-NOT: ni
+; CHECK: iihh %r2, 1
+; CHECK: br %r14
+  %and = and i64 %a, 281474976710655
+  %or = or i64 %and, 281474976710656
+  ret i64 %or
+}
+
+; Check a middle value.
+define i64 @f11(i64 %a) {
+; CHECK: f11:
+; CHECK-NOT: ni
+; CHECK: iihh %r2, 32767
+; CHECK: br %r14
+  %and = and i64 %a, 281474976710655
+  %or = or i64 %and, 9223090561878065152
+  ret i64 %or
+}
+
+; Check the highest useful IIHH value.
+define i64 @f12(i64 %a) {
+; CHECK: f12:
+; CHECK-NOT: ni
+; CHECK: iihh %r2, 65534
+; CHECK: br %r14
+  %and = and i64 %a, 281474976710655
+  %or = or i64 %and, 18446181123756130304
+  ret i64 %or
+}
diff --git a/test/CodeGen/SystemZ/insert-05.ll b/test/CodeGen/SystemZ/insert-05.ll
new file mode 100644
index 0000000..da51676
--- /dev/null
+++ b/test/CodeGen/SystemZ/insert-05.ll
@@ -0,0 +1,224 @@
+; Test insertions of 32-bit constants into one half of an i64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Prefer LHI over IILF for signed 16-bit constants.
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK-NOT: ni
+; CHECK: lhi %r2, 1
+; CHECK: br %r14
+  %and = and i64 %a, 18446744069414584320
+  %or = or i64 %and, 1
+  ret i64 %or
+}
+
+; Check the high end of the LHI range.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK-NOT: ni
+; CHECK: lhi %r2, 32767
+; CHECK: br %r14
+  %and = and i64 %a, 18446744069414584320
+  %or = or i64 %and, 32767
+  ret i64 %or
+}
+
+; Check the next value up, which should use IILF instead.
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK-NOT: ni
+; CHECK: iilf %r2, 32768
+; CHECK: br %r14
+  %and = and i64 %a, 18446744069414584320
+  %or = or i64 %and, 32768
+  ret i64 %or
+}
+
+; Check a value in which the lower 16 bits are clear.
+define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK-NOT: ni
+; CHECK: iilf %r2, 65536
+; CHECK: br %r14
+  %and = and i64 %a, 18446744069414584320
+  %or = or i64 %and, 65536
+  ret i64 %or
+}
+
+; Check the highest useful IILF value (-0x8001).
+define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK-NOT: ni
+; CHECK: iilf %r2, 4294934527
+; CHECK: br %r14
+  %and = and i64 %a, 18446744069414584320
+  %or = or i64 %and, 4294934527
+  ret i64 %or
+}
+
+; Check the next value up, which should use LHI instead.
+define i64 @f6(i64 %a) {
+; CHECK: f6:
+; CHECK-NOT: ni
+; CHECK: lhi %r2, -32768
+; CHECK: br %r14
+  %and = and i64 %a, 18446744069414584320
+  %or = or i64 %and, 4294934528
+  ret i64 %or
+}
+
+; Check the highest useful LHI value.  (We use OILF for -1 instead, although
+; LHI might be better there too.)
+define i64 @f7(i64 %a) {
+; CHECK: f7:
+; CHECK-NOT: ni
+; CHECK: lhi %r2, -2
+; CHECK: br %r14
+  %and = and i64 %a, 18446744069414584320
+  %or = or i64 %and, 4294967294
+  ret i64 %or
+}
+
+; Check that SRLG is still used if some of the high bits are known to be 0
+; (and so might be removed from the mask).
+define i64 @f8(i64 %a) {
+; CHECK: f8:
+; CHECK: srlg %r2, %r2, 1
+; CHECK-NEXT: iilf %r2, 32768
+; CHECK: br %r14
+  %shifted = lshr i64 %a, 1
+  %and = and i64 %shifted, 18446744069414584320
+  %or = or i64 %and, 32768
+  ret i64 %or
+}
+
+; Repeat f8 with addition, which is known to be equivalent to OR in this case.
+define i64 @f9(i64 %a) {
+; CHECK: f9:
+; CHECK: srlg %r2, %r2, 1
+; CHECK-NEXT: iilf %r2, 32768
+; CHECK: br %r14
+  %shifted = lshr i64 %a, 1
+  %and = and i64 %shifted, 18446744069414584320
+  %or = add i64 %and, 32768
+  ret i64 %or
+}
+
+; Repeat f8 with already-zero bits removed from the mask.
+define i64 @f10(i64 %a) {
+; CHECK: f10:
+; CHECK: srlg %r2, %r2, 1
+; CHECK-NEXT: iilf %r2, 32768
+; CHECK: br %r14
+  %shifted = lshr i64 %a, 1
+  %and = and i64 %shifted, 9223372032559808512
+  %or = or i64 %and, 32768
+  ret i64 %or
+}
+
+; Repeat f10 with addition, which is known to be equivalent to OR in this case.
+define i64 @f11(i64 %a) {
+; CHECK: f11:
+; CHECK: srlg %r2, %r2, 1
+; CHECK-NEXT: iilf %r2, 32768
+; CHECK: br %r14
+  %shifted = lshr i64 %a, 1
+  %and = and i64 %shifted, 9223372032559808512
+  %or = add i64 %and, 32768
+  ret i64 %or
+}
+
+; Check the lowest useful IIHF value.
+define i64 @f12(i64 %a) {
+; CHECK: f12:
+; CHECK-NOT: ni
+; CHECK: iihf %r2, 1
+; CHECK: br %r14
+  %and = and i64 %a, 4294967295
+  %or = or i64 %and, 4294967296
+  ret i64 %or
+}
+
+; Check a value in which the lower 16 bits are clear.
+define i64 @f13(i64 %a) {
+; CHECK: f13:
+; CHECK-NOT: ni
+; CHECK: iihf %r2, 2147483648
+; CHECK: br %r14
+  %and = and i64 %a, 4294967295
+  %or = or i64 %and, 9223372036854775808
+  ret i64 %or
+}
+
+; Check the highest useful IIHF value (0xfffffffe).
+define i64 @f14(i64 %a) {
+; CHECK: f14:
+; CHECK-NOT: ni
+; CHECK: iihf %r2, 4294967294
+; CHECK: br %r14
+  %and = and i64 %a, 4294967295
+  %or = or i64 %and, 18446744065119617024
+  ret i64 %or
+}
+
+; Check a case in which some of the low 32 bits are known to be clear,
+; and so could be removed from the AND mask.
+define i64 @f15(i64 %a) {
+; CHECK: f15:
+; CHECK: sllg %r2, %r2, 1
+; CHECK-NEXT: iihf %r2, 1
+; CHECK: br %r14
+  %shifted = shl i64 %a, 1
+  %and = and i64 %shifted, 4294967295
+  %or = or i64 %and, 4294967296
+  ret i64 %or
+}
+
+; Repeat f15 with the zero bits explicitly removed from the mask.
+define i64 @f16(i64 %a) {
+; CHECK: f16:
+; CHECK: sllg %r2, %r2, 1
+; CHECK-NEXT: iihf %r2, 1
+; CHECK: br %r14
+  %shifted = shl i64 %a, 1
+  %and = and i64 %shifted, 4294967294
+  %or = or i64 %and, 4294967296
+  ret i64 %or
+}
+
+; Check concatenation of two i32s.
+define i64 @f17(i32 %a) {
+; CHECK: f17:
+; CHECK: msr %r2, %r2
+; CHECK-NEXT: iihf %r2, 1
+; CHECK: br %r14
+  %mul = mul i32 %a, %a
+  %ext = zext i32 %mul to i64
+  %or = or i64 %ext, 4294967296
+  ret i64 %or
+}
+
+; Repeat f17 with the operands reversed.
+define i64 @f18(i32 %a) {
+; CHECK: f18:
+; CHECK: msr %r2, %r2
+; CHECK-NEXT: iihf %r2, 1
+; CHECK: br %r14
+  %mul = mul i32 %a, %a
+  %ext = zext i32 %mul to i64
+  %or = or i64 4294967296, %ext
+  ret i64 %or
+}
+
+; The truncation here isn't free; we need an explicit zero extension.
+define i64 @f19(i32 %a) {
+; CHECK: f19:
+; CHECK: llgcr %r2, %r2
+; CHECK: oihl %r2, 1
+; CHECK: br %r14
+  %trunc = trunc i32 %a to i8
+  %ext = zext i8 %trunc to i64
+  %or = or i64 %ext, 4294967296
+  ret i64 %or
+}
diff --git a/test/CodeGen/SystemZ/insert-06.ll b/test/CodeGen/SystemZ/insert-06.ll
new file mode 100644
index 0000000..4a13ef4
--- /dev/null
+++ b/test/CodeGen/SystemZ/insert-06.ll
@@ -0,0 +1,167 @@
+; Test insertions of i32s into the low half of an i64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Insertion of an i32 can be done using LR.
+define i64 @f1(i64 %a, i32 %b) {
+; CHECK: f1:
+; CHECK-NOT: {{%r[23]}}
+; CHECK: lr %r2, %r3
+; CHECK: br %r14
+  %low = zext i32 %b to i64
+  %high = and i64 %a, -4294967296
+  %res = or i64 %high, %low
+  ret i64 %res
+}
+
+; ... and again with the operands reversed.
+define i64 @f2(i64 %a, i32 %b) {
+; CHECK: f2:
+; CHECK-NOT: {{%r[23]}}
+; CHECK: lr %r2, %r3
+; CHECK: br %r14
+  %low = zext i32 %b to i64
+  %high = and i64 %a, -4294967296
+  %res = or i64 %low, %high
+  ret i64 %res
+}
+
+; Like f1, but with "in register" zero extension.
+define i64 @f3(i64 %a, i64 %b) {
+; CHECK: f3:
+; CHECK-NOT: {{%r[23]}}
+; CHECK: lr %r2, %r3
+; CHECK: br %r14
+  %low = and i64 %b, 4294967295
+  %high = and i64 %a, -4294967296
+  %res = or i64 %high, %low
+  ret i64 %res
+}
+
+; ... and again with the operands reversed.
+define i64 @f4(i64 %a, i64 %b) {
+; CHECK: f4:
+; CHECK-NOT: {{%r[23]}}
+; CHECK: lr %r2, %r3
+; CHECK: br %r14
+  %low = and i64 %b, 4294967295
+  %high = and i64 %a, -4294967296
+  %res = or i64 %low, %high
+  ret i64 %res
+}
+
+; Unary operations can be done directly into the low half.
+define i64 @f5(i64 %a, i32 %b) {
+; CHECK: f5:
+; CHECK-NOT: {{%r[23]}}
+; CHECK: lcr %r2, %r3
+; CHECK: br %r14
+  %neg = sub i32 0, %b
+  %low = zext i32 %neg to i64
+  %high = and i64 %a, -4294967296
+  %res = or i64 %high, %low
+  ret i64 %res
+}
+
+; ...likewise three-operand binary operations like RLL.
+define i64 @f6(i64 %a, i32 %b) {
+; CHECK: f6:
+; CHECK-NOT: {{%r[23]}}
+; CHECK: rll %r2, %r3, 1
+; CHECK: br %r14
+  %parta = shl i32 %b, 1
+  %partb = lshr i32 %b, 31
+  %rot = or i32 %parta, %partb
+  %low = zext i32 %rot to i64
+  %high = and i64 %a, -4294967296
+  %res = or i64 %low, %high
+  ret i64 %res
+}
+
+; Loads can be done directly into the low half.  The range of L is checked
+; in the move tests.
+define i64 @f7(i64 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK-NOT: {{%r[23]}}
+; CHECK: l %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %low = zext i32 %b to i64
+  %high = and i64 %a, -4294967296
+  %res = or i64 %high, %low
+  ret i64 %res
+}
+
+; ...likewise extending loads.
+define i64 @f8(i64 %a, i8 *%src) {
+; CHECK: f8:
+; CHECK-NOT: {{%r[23]}}
+; CHECK: lb %r2, 0(%r3)
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %b = sext i8 %byte to i32
+  %low = zext i32 %b to i64
+  %high = and i64 %a, -4294967296
+  %res = or i64 %high, %low
+  ret i64 %res
+}
+
+; Check a case like f1 in which there is no AND.  We simply know from context
+; that the upper half of one OR operand and the lower half of the other are
+; both clear.
+define i64 @f9(i64 %a, i32 %b) {
+; CHECK: f9:
+; CHECK: sllg %r2, %r2, 32
+; CHECK: lr %r2, %r3
+; CHECK: br %r14
+  %shift = shl i64 %a, 32
+  %low = zext i32 %b to i64
+  %or = or i64 %shift, %low
+  ret i64 %or
+}
+
+; ...and again with the operands reversed.
+define i64 @f10(i64 %a, i32 %b) {
+; CHECK: f10:
+; CHECK: sllg %r2, %r2, 32
+; CHECK: lr %r2, %r3
+; CHECK: br %r14
+  %shift = shl i64 %a, 32
+  %low = zext i32 %b to i64
+  %or = or i64 %low, %shift
+  ret i64 %or
+}
+
+; Like f9, but with "in register" zero extension.
+define i64 @f11(i64 %a, i64 %b) {
+; CHECK: f11:
+; CHECK: lr %r2, %r3
+; CHECK: br %r14
+  %shift = shl i64 %a, 32
+  %low = and i64 %b, 4294967295
+  %or = or i64 %shift, %low
+  ret i64 %or
+}
+
+; ...and again with the operands reversed.
+define i64 @f12(i64 %a, i64 %b) {
+; CHECK: f12:
+; CHECK: lr %r2, %r3
+; CHECK: br %r14
+  %shift = shl i64 %a, 32
+  %low = and i64 %b, 4294967295
+  %or = or i64 %low, %shift
+  ret i64 %or
+}
+
+; Like f9, but for larger shifts than 32.
+define i64 @f13(i64 %a, i32 %b) {
+; CHECK: f13:
+; CHECK: sllg %r2, %r2, 60
+; CHECK: lr %r2, %r3
+; CHECK: br %r14
+  %shift = shl i64 %a, 60
+  %low = zext i32 %b to i64
+  %or = or i64 %shift, %low
+  ret i64 %or
+}
diff --git a/test/CodeGen/SystemZ/int-add-01.ll b/test/CodeGen/SystemZ/int-add-01.ll
new file mode 100644
index 0000000..d12ac22
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-01.ll
@@ -0,0 +1,131 @@
+; Test 32-bit addition in which the second operand is a sign-extended
+; i16 memory value.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the AH range.
+define i32 @f1(i32 %lhs, i16 *%src) {
+; CHECK: f1:
+; CHECK: ah %r2, 0(%r3)
+; CHECK: br %r14
+  %half = load i16 *%src
+  %rhs = sext i16 %half to i32
+  %res = add i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the high end of the aligned AH range.
+define i32 @f2(i32 %lhs, i16 *%src) {
+; CHECK: f2:
+; CHECK: ah %r2, 4094(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 2047
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = add i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the next halfword up, which should use AHY instead of AH.
+define i32 @f3(i32 %lhs, i16 *%src) {
+; CHECK: f3:
+; CHECK: ahy %r2, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 2048
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = add i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the high end of the aligned AHY range.
+define i32 @f4(i32 %lhs, i16 *%src) {
+; CHECK: f4:
+; CHECK: ahy %r2, 524286(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262143
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = add i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f5(i32 %lhs, i16 *%src) {
+; CHECK: f5:
+; CHECK: agfi %r3, 524288
+; CHECK: ah %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262144
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = add i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the high end of the negative aligned AHY range.
+define i32 @f6(i32 %lhs, i16 *%src) {
+; CHECK: f6:
+; CHECK: ahy %r2, -2(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -1
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = add i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the low end of the AHY range.
+define i32 @f7(i32 %lhs, i16 *%src) {
+; CHECK: f7:
+; CHECK: ahy %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262144
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = add i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the next halfword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f8(i32 %lhs, i16 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r3, -524290
+; CHECK: ah %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262145
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = add i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check that AH allows an index.
+define i32 @f9(i32 %lhs, i64 %src, i64 %index) {
+; CHECK: f9:
+; CHECK: ah %r2, 4094({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4094
+  %ptr = inttoptr i64 %add2 to i16 *
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = add i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check that AHY allows an index.
+define i32 @f10(i32 %lhs, i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: ahy %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i16 *
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = add i32 %lhs, %rhs
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-add-02.ll b/test/CodeGen/SystemZ/int-add-02.ll
new file mode 100644
index 0000000..568ad1c
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-02.ll
@@ -0,0 +1,129 @@
+; Test 32-bit addition in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check AR.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: ar %r2, %r3
+; CHECK: br %r14
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+; Check the low end of the A range.
+define i32 @f2(i32 %a, i32 *%src) {
+; CHECK: f2:
+; CHECK: a %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+; Check the high end of the aligned A range.
+define i32 @f3(i32 %a, i32 *%src) {
+; CHECK: f3:
+; CHECK: a %r2, 4092(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1023
+  %b = load i32 *%ptr
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+; Check the next word up, which should use AY instead of A.
+define i32 @f4(i32 %a, i32 *%src) {
+; CHECK: f4:
+; CHECK: ay %r2, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1024
+  %b = load i32 *%ptr
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+; Check the high end of the aligned AY range.
+define i32 @f5(i32 %a, i32 *%src) {
+; CHECK: f5:
+; CHECK: ay %r2, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f6(i32 %a, i32 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r3, 524288
+; CHECK: a %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+; Check the high end of the negative aligned AY range.
+define i32 @f7(i32 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK: ay %r2, -4(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+; Check the low end of the AY range.
+define i32 @f8(i32 %a, i32 *%src) {
+; CHECK: f8:
+; CHECK: ay %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f9(i32 %a, i32 *%src) {
+; CHECK: f9:
+; CHECK: agfi %r3, -524292
+; CHECK: a %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+; Check that A allows an index.
+define i32 @f10(i32 %a, i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: a %r2, 4092({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4092
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+; Check that AY allows an index.
+define i32 @f11(i32 %a, i64 %src, i64 %index) {
+; CHECK: f11:
+; CHECK: ay %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %add = add i32 %a, %b
+  ret i32 %add
+}
diff --git a/test/CodeGen/SystemZ/int-add-03.ll b/test/CodeGen/SystemZ/int-add-03.ll
new file mode 100644
index 0000000..4610357
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-03.ll
@@ -0,0 +1,102 @@
+; Test additions between an i64 and a sign-extended i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check AGFR.
+define i64 @f1(i64 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: agfr %r2, %r3
+; CHECK: br %r14
+  %bext = sext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check AGF with no displacement.
+define i64 @f2(i64 %a, i32 *%src) {
+; CHECK: f2:
+; CHECK: agf %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %bext = sext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check the high end of the aligned AGF range.
+define i64 @f3(i64 %a, i32 *%src) {
+; CHECK: f3:
+; CHECK: agf %r2, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i32 *%src) {
+; CHECK: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: agf %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check the high end of the negative aligned AGF range.
+define i64 @f5(i64 %a, i32 *%src) {
+; CHECK: f5:
+; CHECK: agf %r2, -4(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check the low end of the AGF range.
+define i64 @f6(i64 %a, i32 *%src) {
+; CHECK: f6:
+; CHECK: agf %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f7(i64 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK: agfi %r3, -524292
+; CHECK: agf %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check that AGF allows an index.
+define i64 @f8(i64 %a, i64 %src, i64 %index) {
+; CHECK: f8:
+; CHECK: agf %r2, 524284({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524284
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
diff --git a/test/CodeGen/SystemZ/int-add-04.ll b/test/CodeGen/SystemZ/int-add-04.ll
new file mode 100644
index 0000000..1c2dc76
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-04.ll
@@ -0,0 +1,102 @@
+; Test additions between an i64 and a zero-extended i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check ALGFR.
+define i64 @f1(i64 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: algfr %r2, %r3
+; CHECK: br %r14
+  %bext = zext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check ALGF with no displacement.
+define i64 @f2(i64 %a, i32 *%src) {
+; CHECK: f2:
+; CHECK: algf %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %bext = zext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check the high end of the aligned ALGF range.
+define i64 @f3(i64 %a, i32 *%src) {
+; CHECK: f3:
+; CHECK: algf %r2, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i32 *%src) {
+; CHECK: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: algf %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check the high end of the negative aligned ALGF range.
+define i64 @f5(i64 %a, i32 *%src) {
+; CHECK: f5:
+; CHECK: algf %r2, -4(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check the low end of the ALGF range.
+define i64 @f6(i64 %a, i32 *%src) {
+; CHECK: f6:
+; CHECK: algf %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f7(i64 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK: agfi %r3, -524292
+; CHECK: algf %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
+
+; Check that ALGF allows an index.
+define i64 @f8(i64 %a, i64 %src, i64 %index) {
+; CHECK: f8:
+; CHECK: algf %r2, 524284({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524284
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i64
+  %add = add i64 %a, %bext
+  ret i64 %add
+}
diff --git a/test/CodeGen/SystemZ/int-add-05.ll b/test/CodeGen/SystemZ/int-add-05.ll
new file mode 100644
index 0000000..ae32cc4
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-05.ll
@@ -0,0 +1,94 @@
+; Test 64-bit addition in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check AGR.
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: agr %r2, %r3
+; CHECK: br %r14
+  %add = add i64 %a, %b
+  ret i64 %add
+}
+
+; Check AG with no displacement.
+define i64 @f2(i64 %a, i64 *%src) {
+; CHECK: f2:
+; CHECK: ag %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i64 *%src
+  %add = add i64 %a, %b
+  ret i64 %add
+}
+
+; Check the high end of the aligned AG range.
+define i64 @f3(i64 %a, i64 *%src) {
+; CHECK: f3:
+; CHECK: ag %r2, 524280(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %b = load i64 *%ptr
+  %add = add i64 %a, %b
+  ret i64 %add
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i64 *%src) {
+; CHECK: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: ag %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %b = load i64 *%ptr
+  %add = add i64 %a, %b
+  ret i64 %add
+}
+
+; Check the high end of the negative aligned AG range.
+define i64 @f5(i64 %a, i64 *%src) {
+; CHECK: f5:
+; CHECK: ag %r2, -8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -1
+  %b = load i64 *%ptr
+  %add = add i64 %a, %b
+  ret i64 %add
+}
+
+; Check the low end of the AG range.
+define i64 @f6(i64 %a, i64 *%src) {
+; CHECK: f6:
+; CHECK: ag %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %b = load i64 *%ptr
+  %add = add i64 %a, %b
+  ret i64 %add
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f7(i64 %a, i64 *%src) {
+; CHECK: f7:
+; CHECK: agfi %r3, -524296
+; CHECK: ag %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %b = load i64 *%ptr
+  %add = add i64 %a, %b
+  ret i64 %add
+}
+
+; Check that AG allows an index.
+define i64 @f8(i64 %a, i64 %src, i64 %index) {
+; CHECK: f8:
+; CHECK: ag %r2, 524280({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524280
+  %ptr = inttoptr i64 %add2 to i64 *
+  %b = load i64 *%ptr
+  %add = add i64 %a, %b
+  ret i64 %add
+}
diff --git a/test/CodeGen/SystemZ/int-add-06.ll b/test/CodeGen/SystemZ/int-add-06.ll
new file mode 100644
index 0000000..3a9c698
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-06.ll
@@ -0,0 +1,93 @@
+; Test 32-bit addition in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check additions of 1.
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: ahi %r2, 1
+; CHECK: br %r14
+  %add = add i32 %a, 1
+  ret i32 %add
+}
+
+; Check the high end of the AHI range.
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: ahi %r2, 32767
+; CHECK: br %r14
+  %add = add i32 %a, 32767
+  ret i32 %add
+}
+
+; Check the next value up, which must use AFI instead.
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: afi %r2, 32768
+; CHECK: br %r14
+  %add = add i32 %a, 32768
+  ret i32 %add
+}
+
+; Check the high end of the signed 32-bit range.
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: afi %r2, 2147483647
+; CHECK: br %r14
+  %add = add i32 %a, 2147483647
+  ret i32 %add
+}
+
+; Check the next value up, which is treated as a negative value.
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: afi %r2, -2147483648
+; CHECK: br %r14
+  %add = add i32 %a, 2147483648
+  ret i32 %add
+}
+
+; Check the high end of the negative AHI range.
+define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK: ahi %r2, -1
+; CHECK: br %r14
+  %add = add i32 %a, -1
+  ret i32 %add
+}
+
+; Check the low end of the AHI range.
+define i32 @f7(i32 %a) {
+; CHECK: f7:
+; CHECK: ahi %r2, -32768
+; CHECK: br %r14
+  %add = add i32 %a, -32768
+  ret i32 %add
+}
+
+; Check the next value down, which must use AFI instead.
+define i32 @f8(i32 %a) {
+; CHECK: f8:
+; CHECK: afi %r2, -32769
+; CHECK: br %r14
+  %add = add i32 %a, -32769
+  ret i32 %add
+}
+
+; Check the low end of the signed 32-bit range.
+define i32 @f9(i32 %a) {
+; CHECK: f9:
+; CHECK: afi %r2, -2147483648
+; CHECK: br %r14
+  %add = add i32 %a, -2147483648
+  ret i32 %add
+}
+
+; Check the next value down, which is treated as a positive value.
+define i32 @f10(i32 %a) {
+; CHECK: f10:
+; CHECK: afi %r2, 2147483647
+; CHECK: br %r14
+  %add = add i32 %a, -2147483649
+  ret i32 %add
+}
diff --git a/test/CodeGen/SystemZ/int-add-07.ll b/test/CodeGen/SystemZ/int-add-07.ll
new file mode 100644
index 0000000..a065bb2
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-07.ll
@@ -0,0 +1,131 @@
+; Test 64-bit addition in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check additions of 1.
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: {{aghi %r2, 1|la %r[0-5], 1\(%r2\)}}
+; CHECK: br %r14
+  %add = add i64 %a, 1
+  ret i64 %add
+}
+
+; Check the high end of the AGHI range.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: aghi %r2, 32767
+; CHECK: br %r14
+  %add = add i64 %a, 32767
+  ret i64 %add
+}
+
+; Check the next value up, which must use AGFI instead.
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: {{agfi %r2, 32768|lay %r[0-5], 32768\(%r2\)}}
+; CHECK: br %r14
+  %add = add i64 %a, 32768
+  ret i64 %add
+}
+
+; Check the high end of the AGFI range.
+define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: agfi %r2, 2147483647
+; CHECK: br %r14
+  %add = add i64 %a, 2147483647
+  ret i64 %add
+}
+
+; Check the next value up, which must use ALGFI instead.
+define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: algfi %r2, 2147483648
+; CHECK: br %r14
+  %add = add i64 %a, 2147483648
+  ret i64 %add
+}
+
+; Check the high end of the ALGFI range.
+define i64 @f6(i64 %a) {
+; CHECK: f6:
+; CHECK: algfi %r2, 4294967295
+; CHECK: br %r14
+  %add = add i64 %a, 4294967295
+  ret i64 %add
+}
+
+; Check the next value up, which must be loaded into a register first.
+define i64 @f7(i64 %a) {
+; CHECK: f7:
+; CHECK: llihl %r0, 1
+; CHECK: agr
+; CHECK: br %r14
+  %add = add i64 %a, 4294967296
+  ret i64 %add
+}
+
+; Check the high end of the negative AGHI range.
+define i64 @f8(i64 %a) {
+; CHECK: f8:
+; CHECK: aghi %r2, -1
+; CHECK: br %r14
+  %add = add i64 %a, -1
+  ret i64 %add
+}
+
+; Check the low end of the AGHI range.
+define i64 @f9(i64 %a) {
+; CHECK: f9:
+; CHECK: aghi %r2, -32768
+; CHECK: br %r14
+  %add = add i64 %a, -32768
+  ret i64 %add
+}
+
+; Check the next value down, which must use AGFI instead.
+define i64 @f10(i64 %a) {
+; CHECK: f10:
+; CHECK: {{agfi %r2, -32769|lay %r[0-5]+, -32769\(%r2\)}}
+; CHECK: br %r14
+  %add = add i64 %a, -32769
+  ret i64 %add
+}
+
+; Check the low end of the AGFI range.
+define i64 @f11(i64 %a) {
+; CHECK: f11:
+; CHECK: agfi %r2, -2147483648
+; CHECK: br %r14
+  %add = add i64 %a, -2147483648
+  ret i64 %add
+}
+
+; Check the next value down, which must use SLGFI instead.
+define i64 @f12(i64 %a) {
+; CHECK: f12:
+; CHECK: slgfi %r2, 2147483649
+; CHECK: br %r14
+  %add = add i64 %a, -2147483649
+  ret i64 %add
+}
+
+; Check the low end of the SLGFI range.
+define i64 @f13(i64 %a) {
+; CHECK: f13:
+; CHECK: slgfi %r2, 4294967295
+; CHECK: br %r14
+  %add = add i64 %a, -4294967295
+  ret i64 %add
+}
+
+; Check the next value down, which must use register addition instead.
+define i64 @f14(i64 %a) {
+; CHECK: f14:
+; CHECK: llihf %r0, 4294967295
+; CHECK: agr
+; CHECK: br %r14
+  %add = add i64 %a, -4294967296
+  ret i64 %add
+}
diff --git a/test/CodeGen/SystemZ/int-add-08.ll b/test/CodeGen/SystemZ/int-add-08.ll
new file mode 100644
index 0000000..b1f820f
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-08.ll
@@ -0,0 +1,110 @@
+; Test 128-bit addition in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register addition.
+define void @f1(i128 *%ptr) {
+; CHECK: f1:
+; CHECK: algr
+; CHECK: alcgr
+; CHECK: br %r14
+  %value = load i128 *%ptr
+  %add = add i128 %value, %value
+  store i128 %add, i128 *%ptr
+  ret void
+}
+
+; Test memory addition with no offset.  Making the load of %a volatile
+; should force the memory operand to be %b.
+define void @f2(i128 *%aptr, i64 %addr) {
+; CHECK: f2:
+; CHECK: alg {{%r[0-5]}}, 8(%r3)
+; CHECK: alcg {{%r[0-5]}}, 0(%r3)
+; CHECK: br %r14
+  %bptr = inttoptr i64 %addr to i128 *
+  %a = load volatile i128 *%aptr
+  %b = load i128 *%bptr
+  %add = add i128 %a, %b
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Test the highest aligned offset that is in range of both ALG and ALCG.
+define void @f3(i128 *%aptr, i64 %base) {
+; CHECK: f3:
+; CHECK: alg {{%r[0-5]}}, 524280(%r3)
+; CHECK: alcg {{%r[0-5]}}, 524272(%r3)
+; CHECK: br %r14
+  %addr = add i64 %base, 524272
+  %bptr = inttoptr i64 %addr to i128 *
+  %a = load volatile i128 *%aptr
+  %b = load i128 *%bptr
+  %add = add i128 %a, %b
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Test the next doubleword up, which requires separate address logic for ALG.
+define void @f4(i128 *%aptr, i64 %base) {
+; CHECK: f4:
+; CHECK: lgr [[BASE:%r[1-5]]], %r3
+; CHECK: agfi [[BASE]], 524288
+; CHECK: alg {{%r[0-5]}}, 0([[BASE]])
+; CHECK: alcg {{%r[0-5]}}, 524280(%r3)
+; CHECK: br %r14
+  %addr = add i64 %base, 524280
+  %bptr = inttoptr i64 %addr to i128 *
+  %a = load volatile i128 *%aptr
+  %b = load i128 *%bptr
+  %add = add i128 %a, %b
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Test the next doubleword after that, which requires separate logic for
+; both instructions.  It would be better to create an anchor at 524288
+; that both instructions can use, but that isn't implemented yet.
+define void @f5(i128 *%aptr, i64 %base) {
+; CHECK: f5:
+; CHECK: alg {{%r[0-5]}}, 0({{%r[1-5]}})
+; CHECK: alcg {{%r[0-5]}}, 0({{%r[1-5]}})
+; CHECK: br %r14
+  %addr = add i64 %base, 524288
+  %bptr = inttoptr i64 %addr to i128 *
+  %a = load volatile i128 *%aptr
+  %b = load i128 *%bptr
+  %add = add i128 %a, %b
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Test the lowest displacement that is in range of both ALG and ALCG.
+define void @f6(i128 *%aptr, i64 %base) {
+; CHECK: f6:
+; CHECK: alg {{%r[0-5]}}, -524280(%r3)
+; CHECK: alcg {{%r[0-5]}}, -524288(%r3)
+; CHECK: br %r14
+  %addr = add i64 %base, -524288
+  %bptr = inttoptr i64 %addr to i128 *
+  %a = load volatile i128 *%aptr
+  %b = load i128 *%bptr
+  %add = add i128 %a, %b
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Test the next doubleword down, which is out of range of the ALCG.
+define void @f7(i128 *%aptr, i64 %base) {
+; CHECK: f7:
+; CHECK: alg {{%r[0-5]}}, -524288(%r3)
+; CHECK: alcg {{%r[0-5]}}, 0({{%r[1-5]}})
+; CHECK: br %r14
+  %addr = add i64 %base, -524296
+  %bptr = inttoptr i64 %addr to i128 *
+  %a = load volatile i128 *%aptr
+  %b = load i128 *%bptr
+  %add = add i128 %a, %b
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
diff --git a/test/CodeGen/SystemZ/int-add-09.ll b/test/CodeGen/SystemZ/int-add-09.ll
new file mode 100644
index 0000000..bfe6338
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-09.ll
@@ -0,0 +1,56 @@
+; Test 128-bit addition in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check additions of 1.  The XOR ensures that we don't instead load the
+; constant into a register and use memory addition.
+define void @f1(i128 *%aptr) {
+; CHECK: f1:
+; CHECK: algfi {{%r[0-5]}}, 1
+; CHECK: alcgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 128
+  %add = add i128 %xor, 1
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Check the high end of the ALGFI range.
+define void @f2(i128 *%aptr) {
+; CHECK: f2:
+; CHECK: algfi {{%r[0-5]}}, 4294967295
+; CHECK: alcgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 128
+  %add = add i128 %xor, 4294967295
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Check the next value up, which must use register addition.
+define void @f3(i128 *%aptr) {
+; CHECK: f3:
+; CHECK: algr
+; CHECK: alcgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 128
+  %add = add i128 %xor, 4294967296
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Check addition of -1, which must also use register addition.
+define void @f4(i128 *%aptr) {
+; CHECK: f4:
+; CHECK: algr
+; CHECK: alcgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 128
+  %add = add i128 %xor, -1
+  store i128 %add, i128 *%aptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-add-10.ll b/test/CodeGen/SystemZ/int-add-10.ll
new file mode 100644
index 0000000..17cfdbe
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-10.ll
@@ -0,0 +1,165 @@
+; Test 128-bit addition in which the second operand is a zero-extended i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register additions.  The XOR ensures that we don't instead zero-extend
+; %b into a register and use memory addition.
+define void @f1(i128 *%aptr, i32 %b) {
+; CHECK: f1:
+; CHECK: algfr {{%r[0-5]}}, %r3
+; CHECK: alcgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %bext = zext i32 %b to i128
+  %add = add i128 %xor, %bext
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Like f1, but using an "in-register" extension.
+define void @f2(i128 *%aptr, i64 %b) {
+; CHECK: f2:
+; CHECK: algfr {{%r[0-5]}}, %r3
+; CHECK: alcgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %trunc = trunc i64 %b to i32
+  %bext = zext i32 %trunc to i128
+  %add = add i128 %xor, %bext
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Test register addition in cases where the second operand is zero extended
+; from i64 rather than i32, but is later masked to i32 range.
+define void @f3(i128 *%aptr, i64 %b) {
+; CHECK: f3:
+; CHECK: algfr {{%r[0-5]}}, %r3
+; CHECK: alcgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %bext = zext i64 %b to i128
+  %and = and i128 %bext, 4294967295
+  %add = add i128 %xor, %and
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Test ALGF with no offset.
+define void @f4(i128 *%aptr, i32 *%bsrc) {
+; CHECK: f4:
+; CHECK: algf {{%r[0-5]}}, 0(%r3)
+; CHECK: alcgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %b = load i32 *%bsrc
+  %bext = zext i32 %b to i128
+  %add = add i128 %xor, %bext
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Check the high end of the ALGF range.
+define void @f5(i128 *%aptr, i32 *%bsrc) {
+; CHECK: f5:
+; CHECK: algf {{%r[0-5]}}, 524284(%r3)
+; CHECK: alcgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %ptr = getelementptr i32 *%bsrc, i64 131071
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i128
+  %add = add i128 %xor, %bext
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Check the next word up, which must use separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(i128 *%aptr, i32 *%bsrc) {
+; CHECK: f6:
+; CHECK: agfi %r3, 524288
+; CHECK: algf {{%r[0-5]}}, 0(%r3)
+; CHECK: alcgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %ptr = getelementptr i32 *%bsrc, i64 131072
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i128
+  %add = add i128 %xor, %bext
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Check the high end of the negative aligned ALGF range.
+define void @f7(i128 *%aptr, i32 *%bsrc) {
+; CHECK: f7:
+; CHECK: algf {{%r[0-5]}}, -4(%r3)
+; CHECK: alcgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %ptr = getelementptr i32 *%bsrc, i128 -1
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i128
+  %add = add i128 %xor, %bext
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Check the low end of the ALGF range.
+define void @f8(i128 *%aptr, i32 *%bsrc) {
+; CHECK: f8:
+; CHECK: algf {{%r[0-5]}}, -524288(%r3)
+; CHECK: alcgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %ptr = getelementptr i32 *%bsrc, i128 -131072
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i128
+  %add = add i128 %xor, %bext
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f9(i128 *%aptr, i32 *%bsrc) {
+; CHECK: f9:
+; CHECK: agfi %r3, -524292
+; CHECK: algf {{%r[0-5]}}, 0(%r3)
+; CHECK: alcgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %ptr = getelementptr i32 *%bsrc, i128 -131073
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i128
+  %add = add i128 %xor, %bext
+  store i128 %add, i128 *%aptr
+  ret void
+}
+
+; Check that ALGF allows an index.
+define void @f10(i128 *%aptr, i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: algf {{%r[0-5]}}, 524284({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524284
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i128
+  %add = add i128 %xor, %bext
+  store i128 %add, i128 *%aptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-add-11.ll b/test/CodeGen/SystemZ/int-add-11.ll
new file mode 100644
index 0000000..47a776e
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-11.ll
@@ -0,0 +1,128 @@
+; Test 32-bit additions of constants to memory.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check additions of 1.
+define void @f1(i32 *%ptr) {
+; CHECK: f1:
+; CHECK: asi 0(%r2), 1
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %add = add i32 %val, 127
+  store i32 %add, i32 *%ptr
+  ret void
+}
+
+; Check the high end of the constant range.
+define void @f2(i32 *%ptr) {
+; CHECK: f2:
+; CHECK: asi 0(%r2), 127
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %add = add i32 %val, 127
+  store i32 %add, i32 *%ptr
+  ret void
+}
+
+; Check the next constant up, which must use an addition and a store.
+; Both L/AHI and LHI/A would be OK.
+define void @f3(i32 *%ptr) {
+; CHECK: f3:
+; CHECK-NOT: asi
+; CHECK: st %r0, 0(%r2)
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %add = add i32 %val, 128
+  store i32 %add, i32 *%ptr
+  ret void
+}
+
+; Check the low end of the constant range.
+define void @f4(i32 *%ptr) {
+; CHECK: f4:
+; CHECK: asi 0(%r2), -128
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %add = add i32 %val, -128
+  store i32 %add, i32 *%ptr
+  ret void
+}
+
+; Check the next value down, with the same comment as f3.
+define void @f5(i32 *%ptr) {
+; CHECK: f5:
+; CHECK-NOT: asi
+; CHECK: st %r0, 0(%r2)
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %add = add i32 %val, -129
+  store i32 %add, i32 *%ptr
+  ret void
+}
+
+; Check the high end of the aligned ASI range.
+define void @f6(i32 *%base) {
+; CHECK: f6:
+; CHECK: asi 524284(%r2), 1
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131071
+  %val = load i32 *%ptr
+  %add = add i32 %val, 1
+  store i32 %add, i32 *%ptr
+  ret void
+}
+
+; Check the next word up, which must use separate address logic.
+; Other sequences besides this one would be OK.
+define void @f7(i32 *%base) {
+; CHECK: f7:
+; CHECK: agfi %r2, 524288
+; CHECK: asi 0(%r2), 1
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131072
+  %val = load i32 *%ptr
+  %add = add i32 %val, 1
+  store i32 %add, i32 *%ptr
+  ret void
+}
+
+; Check the low end of the ASI range.
+define void @f8(i32 *%base) {
+; CHECK: f8:
+; CHECK: asi -524288(%r2), 1
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131072
+  %val = load i32 *%ptr
+  %add = add i32 %val, 1
+  store i32 %add, i32 *%ptr
+  ret void
+}
+
+; Check the next word down, which must use separate address logic.
+; Other sequences besides this one would be OK.
+define void @f9(i32 *%base) {
+; CHECK: f9:
+; CHECK: agfi %r2, -524292
+; CHECK: asi 0(%r2), 1
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131073
+  %val = load i32 *%ptr
+  %add = add i32 %val, 1
+  store i32 %add, i32 *%ptr
+  ret void
+}
+
+; Check that ASI does not allow indices.
+define void @f10(i64 %base, i64 %index) {
+; CHECK: f10:
+; CHECK: agr %r2, %r3
+; CHECK: asi 4(%r2), 1
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4
+  %ptr = inttoptr i64 %add2 to i32 *
+  %val = load i32 *%ptr
+  %add = add i32 %val, 1
+  store i32 %add, i32 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-add-12.ll b/test/CodeGen/SystemZ/int-add-12.ll
new file mode 100644
index 0000000..ae1c1f7
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-add-12.ll
@@ -0,0 +1,128 @@
+; Test 64-bit additions of constants to memory.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check additions of 1.
+define void @f1(i64 *%ptr) {
+; CHECK: f1:
+; CHECK: agsi 0(%r2), 1
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %add = add i64 %val, 127
+  store i64 %add, i64 *%ptr
+  ret void
+}
+
+; Check the high end of the constant range.
+define void @f2(i64 *%ptr) {
+; CHECK: f2:
+; CHECK: agsi 0(%r2), 127
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %add = add i64 %val, 127
+  store i64 %add, i64 *%ptr
+  ret void
+}
+
+; Check the next constant up, which must use an addition and a store.
+; Both LG/AGHI and LGHI/AG would be OK.
+define void @f3(i64 *%ptr) {
+; CHECK: f3:
+; CHECK-NOT: agsi
+; CHECK: stg %r0, 0(%r2)
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %add = add i64 %val, 128
+  store i64 %add, i64 *%ptr
+  ret void
+}
+
+; Check the low end of the constant range.
+define void @f4(i64 *%ptr) {
+; CHECK: f4:
+; CHECK: agsi 0(%r2), -128
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %add = add i64 %val, -128
+  store i64 %add, i64 *%ptr
+  ret void
+}
+
+; Check the next value down, with the same comment as f3.
+define void @f5(i64 *%ptr) {
+; CHECK: f5:
+; CHECK-NOT: agsi
+; CHECK: stg %r0, 0(%r2)
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %add = add i64 %val, -129
+  store i64 %add, i64 *%ptr
+  ret void
+}
+
+; Check the high end of the aligned AGSI range.
+define void @f6(i64 *%base) {
+; CHECK: f6:
+; CHECK: agsi 524280(%r2), 1
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 65535
+  %val = load i64 *%ptr
+  %add = add i64 %val, 1
+  store i64 %add, i64 *%ptr
+  ret void
+}
+
+; Check the next doubleword up, which must use separate address logic.
+; Other sequences besides this one would be OK.
+define void @f7(i64 *%base) {
+; CHECK: f7:
+; CHECK: agfi %r2, 524288
+; CHECK: agsi 0(%r2), 1
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 65536
+  %val = load i64 *%ptr
+  %add = add i64 %val, 1
+  store i64 %add, i64 *%ptr
+  ret void
+}
+
+; Check the low end of the AGSI range.
+define void @f8(i64 *%base) {
+; CHECK: f8:
+; CHECK: agsi -524288(%r2), 1
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -65536
+  %val = load i64 *%ptr
+  %add = add i64 %val, 1
+  store i64 %add, i64 *%ptr
+  ret void
+}
+
+; Check the next doubleword down, which must use separate address logic.
+; Other sequences besides this one would be OK.
+define void @f9(i64 *%base) {
+; CHECK: f9:
+; CHECK: agfi %r2, -524296
+; CHECK: agsi 0(%r2), 1
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -65537
+  %val = load i64 *%ptr
+  %add = add i64 %val, 1
+  store i64 %add, i64 *%ptr
+  ret void
+}
+
+; Check that AGSI does not allow indices.
+define void @f10(i64 %base, i64 %index) {
+; CHECK: f10:
+; CHECK: agr %r2, %r3
+; CHECK: agsi 8(%r2), 1
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 8
+  %ptr = inttoptr i64 %add2 to i64 *
+  %val = load i64 *%ptr
+  %add = add i64 %val, 1
+  store i64 %add, i64 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-01.ll b/test/CodeGen/SystemZ/int-cmp-01.ll
new file mode 100644
index 0000000..aa432f0
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-01.ll
@@ -0,0 +1,151 @@
+; Test 32-bit signed comparison in which the second operand is sign-extended
+; from an i16 memory value.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the CH range.
+define void @f1(i32 %lhs, i16 *%src, i32 *%dst) {
+; CHECK: f1:
+; CHECK: ch %r2, 0(%r3)
+; CHECK: br %r14
+  %half = load i16 *%src
+  %rhs = sext i16 %half to i32
+  %cond = icmp slt i32 %lhs, %rhs
+  %res = select i1 %cond, i32 100, i32 200
+  store i32 %res, i32 *%dst
+  ret void
+}
+
+; Check the high end of the aligned CH range.
+define void @f2(i32 %lhs, i16 *%src, i32 *%dst) {
+; CHECK: f2:
+; CHECK: ch %r2, 4094(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 2047
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %cond = icmp slt i32 %lhs, %rhs
+  %res = select i1 %cond, i32 100, i32 200
+  store i32 %res, i32 *%dst
+  ret void
+}
+
+; Check the next halfword up, which should use CHY instead of CH.
+define void @f3(i32 %lhs, i16 *%src, i32 *%dst) {
+; CHECK: f3:
+; CHECK: chy %r2, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 2048
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %cond = icmp slt i32 %lhs, %rhs
+  %res = select i1 %cond, i32 100, i32 200
+  store i32 %res, i32 *%dst
+  ret void
+}
+
+; Check the high end of the aligned CHY range.
+define void @f4(i32 %lhs, i16 *%src, i32 *%dst) {
+; CHECK: f4:
+; CHECK: chy %r2, 524286(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262143
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %cond = icmp slt i32 %lhs, %rhs
+  %res = select i1 %cond, i32 100, i32 200
+  store i32 %res, i32 *%dst
+  ret void
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f5(i32 %lhs, i16 *%src, i32 *%dst) {
+; CHECK: f5:
+; CHECK: agfi %r3, 524288
+; CHECK: ch %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262144
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %cond = icmp slt i32 %lhs, %rhs
+  %res = select i1 %cond, i32 100, i32 200
+  store i32 %res, i32 *%dst
+  ret void
+}
+
+; Check the high end of the negative aligned CHY range.
+define void @f6(i32 %lhs, i16 *%src, i32 *%dst) {
+; CHECK: f6:
+; CHECK: chy %r2, -2(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -1
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %cond = icmp slt i32 %lhs, %rhs
+  %res = select i1 %cond, i32 100, i32 200
+  store i32 %res, i32 *%dst
+  ret void
+}
+
+; Check the low end of the CHY range.
+define void @f7(i32 %lhs, i16 *%src, i32 *%dst) {
+; CHECK: f7:
+; CHECK: chy %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262144
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %cond = icmp slt i32 %lhs, %rhs
+  %res = select i1 %cond, i32 100, i32 200
+  store i32 %res, i32 *%dst
+  ret void
+}
+
+; Check the next halfword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f8(i32 %lhs, i16 *%src, i32 *%dst) {
+; CHECK: f8:
+; CHECK: agfi %r3, -524290
+; CHECK: ch %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262145
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %cond = icmp slt i32 %lhs, %rhs
+  %res = select i1 %cond, i32 100, i32 200
+  store i32 %res, i32 *%dst
+  ret void
+}
+
+; Check that CH allows an index.
+define void @f9(i32 %lhs, i64 %base, i64 %index, i32 *%dst) {
+; CHECK: f9:
+; CHECK: ch %r2, 4094({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4094
+  %ptr = inttoptr i64 %add2 to i16 *
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %cond = icmp slt i32 %lhs, %rhs
+  %res = select i1 %cond, i32 100, i32 200
+  store i32 %res, i32 *%dst
+  ret void
+}
+
+; Check that CHY allows an index.
+define void @f10(i32 %lhs, i64 %base, i64 %index, i32 *%dst) {
+; CHECK: f10:
+; CHECK: chy %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i16 *
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %cond = icmp slt i32 %lhs, %rhs
+  %res = select i1 %cond, i32 100, i32 200
+  store i32 %res, i32 *%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-02.ll b/test/CodeGen/SystemZ/int-cmp-02.ll
new file mode 100644
index 0000000..c158fb4
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-02.ll
@@ -0,0 +1,162 @@
+; Test 32-bit signed comparison in which the second operand is a variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register comparison.
+define double @f1(double %a, double %b, i32 %i1, i32 %i2) {
+; CHECK: f1:
+; CHECK: cr %r2, %r3
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the C range.
+define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) {
+; CHECK: f2:
+; CHECK: c %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i2 = load i32 *%ptr
+  %cond = icmp slt i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the aligned C range.
+define double @f3(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f3:
+; CHECK: c %r2, 4092(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 1023
+  %i2 = load i32 *%ptr
+  %cond = icmp slt i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next word up, which should use CY instead of C.
+define double @f4(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f4:
+; CHECK: cy %r2, 4096(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 1024
+  %i2 = load i32 *%ptr
+  %cond = icmp slt i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the aligned CY range.
+define double @f5(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f5:
+; CHECK: cy %r2, 524284(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131071
+  %i2 = load i32 *%ptr
+  %cond = icmp slt i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f6(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f6:
+; CHECK: agfi %r3, 524288
+; CHECK: c %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131072
+  %i2 = load i32 *%ptr
+  %cond = icmp slt i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the negative aligned CY range.
+define double @f7(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f7:
+; CHECK: cy %r2, -4(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -1
+  %i2 = load i32 *%ptr
+  %cond = icmp slt i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CY range.
+define double @f8(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f8:
+; CHECK: cy %r2, -524288(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131072
+  %i2 = load i32 *%ptr
+  %cond = icmp slt i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f9(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f9:
+; CHECK: agfi %r3, -524292
+; CHECK: c %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131073
+  %i2 = load i32 *%ptr
+  %cond = icmp slt i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that C allows an index.
+define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
+; CHECK: f10:
+; CHECK: c %r2, 4092({{%r4,%r3|%r3,%r4}})
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4092
+  %ptr = inttoptr i64 %add2 to i32 *
+  %i2 = load i32 *%ptr
+  %cond = icmp slt i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CY allows an index.
+define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
+; CHECK: f11:
+; CHECK: cy %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i32 *
+  %i2 = load i32 *%ptr
+  %cond = icmp slt i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-03.ll b/test/CodeGen/SystemZ/int-cmp-03.ll
new file mode 100644
index 0000000..4203bee
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-03.ll
@@ -0,0 +1,162 @@
+; Test 32-bit unsigned comparison in which the second operand is a variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register comparison.
+define double @f1(double %a, double %b, i32 %i1, i32 %i2) {
+; CHECK: f1:
+; CHECK: clr %r2, %r3
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CL range.
+define double @f2(double %a, double %b, i32 %i1, i32 *%ptr) {
+; CHECK: f2:
+; CHECK: cl %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i2 = load i32 *%ptr
+  %cond = icmp ult i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the aligned CL range.
+define double @f3(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f3:
+; CHECK: cl %r2, 4092(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 1023
+  %i2 = load i32 *%ptr
+  %cond = icmp ult i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next word up, which should use CLY instead of CL.
+define double @f4(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f4:
+; CHECK: cly %r2, 4096(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 1024
+  %i2 = load i32 *%ptr
+  %cond = icmp ult i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the aligned CLY range.
+define double @f5(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f5:
+; CHECK: cly %r2, 524284(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131071
+  %i2 = load i32 *%ptr
+  %cond = icmp ult i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f6(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f6:
+; CHECK: agfi %r3, 524288
+; CHECK: cl %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131072
+  %i2 = load i32 *%ptr
+  %cond = icmp ult i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the negative aligned CLY range.
+define double @f7(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f7:
+; CHECK: cly %r2, -4(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -1
+  %i2 = load i32 *%ptr
+  %cond = icmp ult i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CLY range.
+define double @f8(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f8:
+; CHECK: cly %r2, -524288(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131072
+  %i2 = load i32 *%ptr
+  %cond = icmp ult i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f9(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f9:
+; CHECK: agfi %r3, -524292
+; CHECK: cl %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131073
+  %i2 = load i32 *%ptr
+  %cond = icmp ult i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CL allows an index.
+define double @f10(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
+; CHECK: f10:
+; CHECK: cl %r2, 4092({{%r4,%r3|%r3,%r4}})
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4092
+  %ptr = inttoptr i64 %add2 to i32 *
+  %i2 = load i32 *%ptr
+  %cond = icmp ult i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CLY allows an index.
+define double @f11(double %a, double %b, i32 %i1, i64 %base, i64 %index) {
+; CHECK: f11:
+; CHECK: cly %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i32 *
+  %i2 = load i32 *%ptr
+  %cond = icmp ult i32 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-04.ll b/test/CodeGen/SystemZ/int-cmp-04.ll
new file mode 100644
index 0000000..d0625fb
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-04.ll
@@ -0,0 +1,107 @@
+; Test 64-bit signed comparison in which the second operand is sign-extended
+; from an i16 memory value.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check CGH with no displacement.
+define void @f1(i64 %lhs, i16 *%src, i64 *%dst) {
+; CHECK: f1:
+; CHECK: cgh %r2, 0(%r3)
+; CHECK: br %r14
+  %half = load i16 *%src
+  %rhs = sext i16 %half to i64
+  %cond = icmp slt i64 %lhs, %rhs
+  %res = select i1 %cond, i64 100, i64 200
+  store i64 %res, i64 *%dst
+  ret void
+}
+
+; Check the high end of the aligned CGH range.
+define void @f2(i64 %lhs, i16 *%src, i64 *%dst) {
+; CHECK: f2:
+; CHECK: cgh %r2, 524286(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262143
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i64
+  %cond = icmp slt i64 %lhs, %rhs
+  %res = select i1 %cond, i64 100, i64 200
+  store i64 %res, i64 *%dst
+  ret void
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f3(i64 %lhs, i16 *%src, i64 *%dst) {
+; CHECK: f3:
+; CHECK: agfi %r3, 524288
+; CHECK: cgh %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262144
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i64
+  %cond = icmp slt i64 %lhs, %rhs
+  %res = select i1 %cond, i64 100, i64 200
+  store i64 %res, i64 *%dst
+  ret void
+}
+
+; Check the high end of the negative aligned CGH range.
+define void @f4(i64 %lhs, i16 *%src, i64 *%dst) {
+; CHECK: f4:
+; CHECK: cgh %r2, -2(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -1
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i64
+  %cond = icmp slt i64 %lhs, %rhs
+  %res = select i1 %cond, i64 100, i64 200
+  store i64 %res, i64 *%dst
+  ret void
+}
+
+; Check the low end of the CGH range.
+define void @f5(i64 %lhs, i16 *%src, i64 *%dst) {
+; CHECK: f5:
+; CHECK: cgh %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262144
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i64
+  %cond = icmp slt i64 %lhs, %rhs
+  %res = select i1 %cond, i64 100, i64 200
+  store i64 %res, i64 *%dst
+  ret void
+}
+
+; Check the next halfword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(i64 %lhs, i16 *%src, i64 *%dst) {
+; CHECK: f6:
+; CHECK: agfi %r3, -524290
+; CHECK: cgh %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262145
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i64
+  %cond = icmp slt i64 %lhs, %rhs
+  %res = select i1 %cond, i64 100, i64 200
+  store i64 %res, i64 *%dst
+  ret void
+}
+
+; Check that CGH allows an index.
+define void @f7(i64 %lhs, i64 %base, i64 %index, i64 *%dst) {
+; CHECK: f7:
+; CHECK: cgh %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i16 *
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i64
+  %cond = icmp slt i64 %lhs, %rhs
+  %res = select i1 %cond, i64 100, i64 200
+  store i64 %res, i64 *%dst
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-05.ll b/test/CodeGen/SystemZ/int-cmp-05.ll
new file mode 100644
index 0000000..2ab64d5
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-05.ll
@@ -0,0 +1,203 @@
+; Test 64-bit comparison in which the second operand is a sign-extended i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check signed register comparison.
+define double @f1(double %a, double %b, i64 %i1, i32 %unext) {
+; CHECK: f1:
+; CHECK: cgfr %r2, %r3
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i2 = sext i32 %unext to i64
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned register comparison, which can't use CGFR.
+define double @f2(double %a, double %b, i64 %i1, i32 %unext) {
+; CHECK: f2:
+; CHECK-NOT: cgfr
+; CHECK: br %r14
+  %i2 = sext i32 %unext to i64
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check register equality.
+define double @f3(double %a, double %b, i64 %i1, i32 %unext) {
+; CHECK: f3:
+; CHECK: cgfr %r2, %r3
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i2 = sext i32 %unext to i64
+  %cond = icmp eq i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check register inequality.
+define double @f4(double %a, double %b, i64 %i1, i32 %unext) {
+; CHECK: f4:
+; CHECK: cgfr %r2, %r3
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i2 = sext i32 %unext to i64
+  %cond = icmp ne i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparisonn with memory.
+define double @f5(double %a, double %b, i64 %i1, i32 *%ptr) {
+; CHECK: f5:
+; CHECK: cgf %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %unext = load i32 *%ptr
+  %i2 = sext i32 %unext to i64
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison with memory.
+define double @f6(double %a, double %b, i64 %i1, i32 *%ptr) {
+; CHECK: f6:
+; CHECK-NOT: cgf
+; CHECK: br %r14
+  %unext = load i32 *%ptr
+  %i2 = sext i32 %unext to i64
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check memory equality.
+define double @f7(double %a, double %b, i64 %i1, i32 *%ptr) {
+; CHECK: f7:
+; CHECK: cgf %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %unext = load i32 *%ptr
+  %i2 = sext i32 %unext to i64
+  %cond = icmp eq i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check memory inequality.
+define double @f8(double %a, double %b, i64 %i1, i32 *%ptr) {
+; CHECK: f8:
+; CHECK: cgf %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %unext = load i32 *%ptr
+  %i2 = sext i32 %unext to i64
+  %cond = icmp ne i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the aligned CGF range.
+define double @f9(double %a, double %b, i64 %i1, i32 *%base) {
+; CHECK: f9:
+; CHECK: cgf %r2, 524284(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131071
+  %unext = load i32 *%ptr
+  %i2 = sext i32 %unext to i64
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f10(double %a, double %b, i64 %i1, i32 *%base) {
+; CHECK: f10:
+; CHECK: agfi %r3, 524288
+; CHECK: cgf %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131072
+  %unext = load i32 *%ptr
+  %i2 = sext i32 %unext to i64
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the negative aligned CGF range.
+define double @f11(double %a, double %b, i64 %i1, i32 *%base) {
+; CHECK: f11:
+; CHECK: cgf %r2, -4(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -1
+  %unext = load i32 *%ptr
+  %i2 = sext i32 %unext to i64
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CGF range.
+define double @f12(double %a, double %b, i64 %i1, i32 *%base) {
+; CHECK: f12:
+; CHECK: cgf %r2, -524288(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131072
+  %unext = load i32 *%ptr
+  %i2 = sext i32 %unext to i64
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f13(double %a, double %b, i64 %i1, i32 *%base) {
+; CHECK: f13:
+; CHECK: agfi %r3, -524292
+; CHECK: cgf %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131073
+  %unext = load i32 *%ptr
+  %i2 = sext i32 %unext to i64
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CGF allows an index.
+define double @f14(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
+; CHECK: f14:
+; CHECK: cgf %r2, 524284({{%r4,%r3|%r3,%r4}})
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 524284
+  %ptr = inttoptr i64 %add2 to i32 *
+  %unext = load i32 *%ptr
+  %i2 = sext i32 %unext to i64
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-06.ll b/test/CodeGen/SystemZ/int-cmp-06.ll
new file mode 100644
index 0000000..26f6dbf
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-06.ll
@@ -0,0 +1,253 @@
+; Test 64-bit comparison in which the second operand is a zero-extended i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check unsigned register comparison.
+define double @f1(double %a, double %b, i64 %i1, i32 %unext) {
+; CHECK: f1:
+; CHECK: clgfr %r2, %r3
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i2 = zext i32 %unext to i64
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and again with a different representation.
+define double @f2(double %a, double %b, i64 %i1, i64 %unext) {
+; CHECK: f2:
+; CHECK: clgfr %r2, %r3
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i2 = and i64 %unext, 4294967295
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed register comparison, which can't use CLGFR.
+define double @f3(double %a, double %b, i64 %i1, i32 %unext) {
+; CHECK: f3:
+; CHECK-NOT: clgfr
+; CHECK: br %r14
+  %i2 = zext i32 %unext to i64
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and again with a different representation
+define double @f4(double %a, double %b, i64 %i1, i64 %unext) {
+; CHECK: f4:
+; CHECK-NOT: clgfr
+; CHECK: br %r14
+  %i2 = and i64 %unext, 4294967295
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check register equality.
+define double @f5(double %a, double %b, i64 %i1, i32 %unext) {
+; CHECK: f5:
+; CHECK: clgfr %r2, %r3
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i2 = zext i32 %unext to i64
+  %cond = icmp eq i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and again with a different representation
+define double @f6(double %a, double %b, i64 %i1, i64 %unext) {
+; CHECK: f6:
+; CHECK: clgfr %r2, %r3
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i2 = and i64 %unext, 4294967295
+  %cond = icmp eq i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check register inequality.
+define double @f7(double %a, double %b, i64 %i1, i32 %unext) {
+; CHECK: f7:
+; CHECK: clgfr %r2, %r3
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i2 = zext i32 %unext to i64
+  %cond = icmp ne i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and again with a different representation
+define double @f8(double %a, double %b, i64 %i1, i64 %unext) {
+; CHECK: f8:
+; CHECK: clgfr %r2, %r3
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i2 = and i64 %unext, 4294967295
+  %cond = icmp ne i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparisonn with memory.
+define double @f9(double %a, double %b, i64 %i1, i32 *%ptr) {
+; CHECK: f9:
+; CHECK: clgf %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %unext = load i32 *%ptr
+  %i2 = zext i32 %unext to i64
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison with memory.
+define double @f10(double %a, double %b, i64 %i1, i32 *%ptr) {
+; CHECK: f10:
+; CHECK-NOT: clgf
+; CHECK: br %r14
+  %unext = load i32 *%ptr
+  %i2 = zext i32 %unext to i64
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check memory equality.
+define double @f11(double %a, double %b, i64 %i1, i32 *%ptr) {
+; CHECK: f11:
+; CHECK: clgf %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %unext = load i32 *%ptr
+  %i2 = zext i32 %unext to i64
+  %cond = icmp eq i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check memory inequality.
+define double @f12(double %a, double %b, i64 %i1, i32 *%ptr) {
+; CHECK: f12:
+; CHECK: clgf %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %unext = load i32 *%ptr
+  %i2 = zext i32 %unext to i64
+  %cond = icmp ne i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the aligned CLGF range.
+define double @f13(double %a, double %b, i64 %i1, i32 *%base) {
+; CHECK: f13:
+; CHECK: clgf %r2, 524284(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131071
+  %unext = load i32 *%ptr
+  %i2 = zext i32 %unext to i64
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f14(double %a, double %b, i64 %i1, i32 *%base) {
+; CHECK: f14:
+; CHECK: agfi %r3, 524288
+; CHECK: clgf %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 131072
+  %unext = load i32 *%ptr
+  %i2 = zext i32 %unext to i64
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the negative aligned CLGF range.
+define double @f15(double %a, double %b, i64 %i1, i32 *%base) {
+; CHECK: f15:
+; CHECK: clgf %r2, -4(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -1
+  %unext = load i32 *%ptr
+  %i2 = zext i32 %unext to i64
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CLGF range.
+define double @f16(double %a, double %b, i64 %i1, i32 *%base) {
+; CHECK: f16:
+; CHECK: clgf %r2, -524288(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131072
+  %unext = load i32 *%ptr
+  %i2 = zext i32 %unext to i64
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f17(double %a, double %b, i64 %i1, i32 *%base) {
+; CHECK: f17:
+; CHECK: agfi %r3, -524292
+; CHECK: clgf %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -131073
+  %unext = load i32 *%ptr
+  %i2 = zext i32 %unext to i64
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CLGF allows an index.
+define double @f18(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
+; CHECK: f18:
+; CHECK: clgf %r2, 524284({{%r4,%r3|%r3,%r4}})
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 524284
+  %ptr = inttoptr i64 %add2 to i32 *
+  %unext = load i32 *%ptr
+  %i2 = zext i32 %unext to i64
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-07.ll b/test/CodeGen/SystemZ/int-cmp-07.ll
new file mode 100644
index 0000000..1a6f622
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-07.ll
@@ -0,0 +1,118 @@
+; Test 64-bit signed comparison in which the second operand is a variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check CGR.
+define double @f1(double %a, double %b, i64 %i1, i64 %i2) {
+; CHECK: f1:
+; CHECK: cgr %r2, %r3
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check CG with no displacement.
+define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) {
+; CHECK: f2:
+; CHECK: cg %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i2 = load i64 *%ptr
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the aligned CG range.
+define double @f3(double %a, double %b, i64 %i1, i64 *%base) {
+; CHECK: f3:
+; CHECK: cg %r2, 524280(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 65535
+  %i2 = load i64 *%ptr
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %a, double %b, i64 %i1, i64 *%base) {
+; CHECK: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: cg %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 65536
+  %i2 = load i64 *%ptr
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the negative aligned CG range.
+define double @f5(double %a, double %b, i64 %i1, i64 *%base) {
+; CHECK: f5:
+; CHECK: cg %r2, -8(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -1
+  %i2 = load i64 *%ptr
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CG range.
+define double @f6(double %a, double %b, i64 %i1, i64 *%base) {
+; CHECK: f6:
+; CHECK: cg %r2, -524288(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -65536
+  %i2 = load i64 *%ptr
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f7(double %a, double %b, i64 %i1, i64 *%base) {
+; CHECK: f7:
+; CHECK: agfi %r3, -524296
+; CHECK: cg %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -65537
+  %i2 = load i64 *%ptr
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CG allows an index.
+define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
+; CHECK: f8:
+; CHECK: cg %r2, 524280({{%r4,%r3|%r3,%r4}})
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 524280
+  %ptr = inttoptr i64 %add2 to i64 *
+  %i2 = load i64 *%ptr
+  %cond = icmp slt i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-08.ll b/test/CodeGen/SystemZ/int-cmp-08.ll
new file mode 100644
index 0000000..6e9a13e
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-08.ll
@@ -0,0 +1,118 @@
+; Test 64-bit unsigned comparison in which the second operand is a variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check CLGR.
+define double @f1(double %a, double %b, i64 %i1, i64 %i2) {
+; CHECK: f1:
+; CHECK: clgr %r2, %r3
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check CLG with no displacement.
+define double @f2(double %a, double %b, i64 %i1, i64 *%ptr) {
+; CHECK: f2:
+; CHECK: clg %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %i2 = load i64 *%ptr
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the aligned CLG range.
+define double @f3(double %a, double %b, i64 %i1, i64 *%base) {
+; CHECK: f3:
+; CHECK: clg %r2, 524280(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 65535
+  %i2 = load i64 *%ptr
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %a, double %b, i64 %i1, i64 *%base) {
+; CHECK: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: clg %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 65536
+  %i2 = load i64 *%ptr
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the negative aligned CLG range.
+define double @f5(double %a, double %b, i64 %i1, i64 *%base) {
+; CHECK: f5:
+; CHECK: clg %r2, -8(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -1
+  %i2 = load i64 *%ptr
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CLG range.
+define double @f6(double %a, double %b, i64 %i1, i64 *%base) {
+; CHECK: f6:
+; CHECK: clg %r2, -524288(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -65536
+  %i2 = load i64 *%ptr
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f7(double %a, double %b, i64 %i1, i64 *%base) {
+; CHECK: f7:
+; CHECK: agfi %r3, -524296
+; CHECK: clg %r2, 0(%r3)
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -65537
+  %i2 = load i64 *%ptr
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CLG allows an index.
+define double @f8(double %a, double %b, i64 %i1, i64 %base, i64 %index) {
+; CHECK: f8:
+; CHECK: clg %r2, 524280({{%r4,%r3|%r3,%r4}})
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 524280
+  %ptr = inttoptr i64 %add2 to i64 *
+  %i2 = load i64 *%ptr
+  %cond = icmp ult i64 %i1, %i2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-09.ll b/test/CodeGen/SystemZ/int-cmp-09.ll
new file mode 100644
index 0000000..bb7213c
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-09.ll
@@ -0,0 +1,135 @@
+; Test 32-bit signed comparison in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check comparisons with 0.
+define double @f1(double %a, double %b, i32 %i1) {
+; CHECK: f1:
+; CHECK: chi %r2, 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i32 %i1, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with 1.
+define double @f2(double %a, double %b, i32 %i1) {
+; CHECK: f2:
+; CHECK: chi %r2, 1
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i32 %i1, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CHI range.
+define double @f3(double %a, double %b, i32 %i1) {
+; CHECK: f3:
+; CHECK: chi %r2, 32767
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i32 %i1, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which must use CFI.
+define double @f4(double %a, double %b, i32 %i1) {
+; CHECK: f4:
+; CHECK: cfi %r2, 32768
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i32 %i1, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the signed 32-bit range.
+define double @f5(double %a, double %b, i32 %i1) {
+; CHECK: f5:
+; CHECK: cfi %r2, 2147483647
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i32 %i1, 2147483647
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which should be treated as a negative value.
+define double @f6(double %a, double %b, i32 %i1) {
+; CHECK: f6:
+; CHECK: cfi %r2, -2147483648
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i32 %i1, 2147483648
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the negative CHI range.
+define double @f7(double %a, double %b, i32 %i1) {
+; CHECK: f7:
+; CHECK: chi %r2, -1
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i32 %i1, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CHI range.
+define double @f8(double %a, double %b, i32 %i1) {
+; CHECK: f8:
+; CHECK: chi %r2, -32768
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i32 %i1, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which must use CFI instead.
+define double @f9(double %a, double %b, i32 %i1) {
+; CHECK: f9:
+; CHECK: cfi %r2, -32769
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i32 %i1, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the signed 32-bit range.
+define double @f10(double %a, double %b, i32 %i1) {
+; CHECK: f10:
+; CHECK: cfi %r2, -2147483648
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i32 %i1, -2147483648
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which should be treated as a positive value.
+define double @f11(double %a, double %b, i32 %i1) {
+; CHECK: f11:
+; CHECK: cfi %r2, 2147483647
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i32 %i1, -2147483649
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-10.ll b/test/CodeGen/SystemZ/int-cmp-10.ll
new file mode 100644
index 0000000..f2d3ccd
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-10.ll
@@ -0,0 +1,28 @@
+; Test 32-bit unsigned comparisons in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check a value near the low end of the range.  We use CFI for comparisons
+; with zero, or things that are equivalent to them.
+define double @f1(double %a, double %b, i32 %i1) {
+; CHECK: f1:
+; CHECK: clfi %r2, 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ugt i32 %i1, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check a value near the high end of the range.
+define double @f2(double %a, double %b, i32 %i1) {
+; CHECK: f2:
+; CHECK: clfi %r2, 4294967280
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i32 %i1, 4294967280
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-11.ll b/test/CodeGen/SystemZ/int-cmp-11.ll
new file mode 100644
index 0000000..1bfb0c6
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-11.ll
@@ -0,0 +1,135 @@
+; Test 64-bit signed comparisons in which the second operand is a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check comparisons with 0.
+define double @f1(double %a, double %b, i64 %i1) {
+; CHECK: f1:
+; CHECK: cghi %r2, 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with 1.
+define double @f2(double %a, double %b, i64 %i1) {
+; CHECK: f2:
+; CHECK: cghi %r2, 1
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CGHI range.
+define double @f3(double %a, double %b, i64 %i1) {
+; CHECK: f3:
+; CHECK: cghi %r2, 32767
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which must use CGFI.
+define double @f4(double %a, double %b, i64 %i1) {
+; CHECK: f4:
+; CHECK: cgfi %r2, 32768
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CGFI range.
+define double @f5(double %a, double %b, i64 %i1) {
+; CHECK: f5:
+; CHECK: cgfi %r2, 2147483647
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, 2147483647
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which must use register comparison.
+define double @f6(double %a, double %b, i64 %i1) {
+; CHECK: f6:
+; CHECK: cgr
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, 2147483648
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the negative CGHI range.
+define double @f7(double %a, double %b, i64 %i1) {
+; CHECK: f7:
+; CHECK: cghi %r2, -1
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CGHI range.
+define double @f8(double %a, double %b, i64 %i1) {
+; CHECK: f8:
+; CHECK: cghi %r2, -32768
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which must use CGFI instead.
+define double @f9(double %a, double %b, i64 %i1) {
+; CHECK: f9:
+; CHECK: cgfi %r2, -32769
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CGFI range.
+define double @f10(double %a, double %b, i64 %i1) {
+; CHECK: f10:
+; CHECK: cgfi %r2, -2147483648
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, -2147483648
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which must use register comparison.
+define double @f11(double %a, double %b, i64 %i1) {
+; CHECK: f11:
+; CHECK: cgr
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp slt i64 %i1, -2147483649
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-12.ll b/test/CodeGen/SystemZ/int-cmp-12.ll
new file mode 100644
index 0000000..0288730
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-12.ll
@@ -0,0 +1,40 @@
+; Test 64-bit unsigned comparisons in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check a value near the low end of the range.  We use CGFI for comparisons
+; with zero, or things that are equivalent to them.
+define double @f1(double %a, double %b, i64 %i1) {
+; CHECK: f1:
+; CHECK: clgfi %r2, 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ugt i64 %i1, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CLGFI range.
+define double @f2(double %a, double %b, i64 %i1) {
+; CHECK: f2:
+; CHECK: clgfi %r2, 4294967295
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i64 %i1, 4294967295
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which must use a register comparison.
+define double @f3(double %a, double %b, i64 %i1) {
+; CHECK: f3:
+; CHECK: clgr %r2,
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ult i64 %i1, 4294967296
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-13.ll b/test/CodeGen/SystemZ/int-cmp-13.ll
new file mode 100644
index 0000000..c180831
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-13.ll
@@ -0,0 +1,147 @@
+; Test 64-bit equality comparisons in which the second operand is a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check comparisons with 0.
+define double @f1(double %a, double %b, i64 %i1) {
+; CHECK: f1:
+; CHECK: cghi %r2, 0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CGHI range.
+define double @f2(double %a, double %b, i64 %i1) {
+; CHECK: f2:
+; CHECK: cghi %r2, 32767
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which must use CGFI.
+define double @f3(double %a, double %b, i64 %i1) {
+; CHECK: f3:
+; CHECK: cgfi %r2, 32768
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CGFI range.
+define double @f4(double %a, double %b, i64 %i1) {
+; CHECK: f4:
+; CHECK: cgfi %r2, 2147483647
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, 2147483647
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which should use CLGFI instead.
+define double @f5(double %a, double %b, i64 %i1) {
+; CHECK: f5:
+; CHECK: clgfi %r2, 2147483648
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, 2147483648
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CLGFI range.
+define double @f6(double %a, double %b, i64 %i1) {
+; CHECK: f6:
+; CHECK: clgfi %r2, 4294967295
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, 4294967295
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which must use a register comparison.
+define double @f7(double %a, double %b, i64 %i1) {
+; CHECK: f7:
+; CHECK: cgr %r2,
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, 4294967296
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the negative CGHI range.
+define double @f8(double %a, double %b, i64 %i1) {
+; CHECK: f8:
+; CHECK: cghi %r2, -1
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CGHI range.
+define double @f9(double %a, double %b, i64 %i1) {
+; CHECK: f9:
+; CHECK: cghi %r2, -32768
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which must use CGFI instead.
+define double @f10(double %a, double %b, i64 %i1) {
+; CHECK: f10:
+; CHECK: cgfi %r2, -32769
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CGFI range.
+define double @f11(double %a, double %b, i64 %i1) {
+; CHECK: f11:
+; CHECK: cgfi %r2, -2147483648
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, -2147483648
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which must use register comparison.
+define double @f12(double %a, double %b, i64 %i1) {
+; CHECK: f12:
+; CHECK: cgr
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp eq i64 %i1, -2147483649
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-14.ll b/test/CodeGen/SystemZ/int-cmp-14.ll
new file mode 100644
index 0000000..6a7e0e6
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-14.ll
@@ -0,0 +1,147 @@
+; Test 64-bit inequality comparisons in which the second operand is a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check comparisons with 0.
+define double @f1(double %a, double %b, i64 %i1) {
+; CHECK: f1:
+; CHECK: cghi %r2, 0
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CGHI range.
+define double @f2(double %a, double %b, i64 %i1) {
+; CHECK: f2:
+; CHECK: cghi %r2, 32767
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which must use CGFI.
+define double @f3(double %a, double %b, i64 %i1) {
+; CHECK: f3:
+; CHECK: cgfi %r2, 32768
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CGFI range.
+define double @f4(double %a, double %b, i64 %i1) {
+; CHECK: f4:
+; CHECK: cgfi %r2, 2147483647
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, 2147483647
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which should use CLGFI instead.
+define double @f5(double %a, double %b, i64 %i1) {
+; CHECK: f5:
+; CHECK: clgfi %r2, 2147483648
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, 2147483648
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CLGFI range.
+define double @f6(double %a, double %b, i64 %i1) {
+; CHECK: f6:
+; CHECK: clgfi %r2, 4294967295
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, 4294967295
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which must use a register comparison.
+define double @f7(double %a, double %b, i64 %i1) {
+; CHECK: f7:
+; CHECK: cgr %r2,
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, 4294967296
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the negative CGHI range.
+define double @f8(double %a, double %b, i64 %i1) {
+; CHECK: f8:
+; CHECK: cghi %r2, -1
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CGHI range.
+define double @f9(double %a, double %b, i64 %i1) {
+; CHECK: f9:
+; CHECK: cghi %r2, -32768
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which must use CGFI instead.
+define double @f10(double %a, double %b, i64 %i1) {
+; CHECK: f10:
+; CHECK: cgfi %r2, -32769
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CGFI range.
+define double @f11(double %a, double %b, i64 %i1) {
+; CHECK: f11:
+; CHECK: cgfi %r2, -2147483648
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, -2147483648
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which must use register comparison.
+define double @f12(double %a, double %b, i64 %i1) {
+; CHECK: f12:
+; CHECK: cgr
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %cond = icmp ne i64 %i1, -2147483649
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-15.ll b/test/CodeGen/SystemZ/int-cmp-15.ll
new file mode 100644
index 0000000..6bb7e2b3
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-15.ll
@@ -0,0 +1,241 @@
+; Test 8-bit unsigned comparisons between memory and constants.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check ordered comparisons near the low end of the unsigned 8-bit range.
+define double @f1(double %a, double %b, i8 *%ptr) {
+; CHECK: f1:
+; CHECK: cli 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %cond = icmp ugt i8 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check ordered comparisons near the high end of the unsigned 8-bit range.
+define double @f2(double %a, double %b, i8 *%ptr) {
+; CHECK: f2:
+; CHECK: cli 0(%r2), 254
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %cond = icmp ult i8 %val, 254
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check tests for negative bytes.
+define double @f3(double %a, double %b, i8 *%ptr) {
+; CHECK: f3:
+; CHECK: cli 0(%r2), 127
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %cond = icmp slt i8 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and an alternative form.
+define double @f4(double %a, double %b, i8 *%ptr) {
+; CHECK: f4:
+; CHECK: cli 0(%r2), 127
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %cond = icmp sle i8 %val, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check tests for non-negative bytes.
+define double @f5(double %a, double %b, i8 *%ptr) {
+; CHECK: f5:
+; CHECK: cli 0(%r2), 128
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %cond = icmp sge i8 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and an alternative form.
+define double @f6(double %a, double %b, i8 *%ptr) {
+; CHECK: f6:
+; CHECK: cli 0(%r2), 128
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %cond = icmp sgt i8 %val, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons at the low end of the signed 8-bit range.
+define double @f7(double %a, double %b, i8 *%ptr) {
+; CHECK: f7:
+; CHECK: cli 0(%r2), 128
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %cond = icmp eq i8 %val, -128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons at the low end of the unsigned 8-bit range.
+define double @f8(double %a, double %b, i8 *%ptr) {
+; CHECK: f8:
+; CHECK: cli 0(%r2), 0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %cond = icmp eq i8 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons at the high end of the signed 8-bit range.
+define double @f9(double %a, double %b, i8 *%ptr) {
+; CHECK: f9:
+; CHECK: cli 0(%r2), 127
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %cond = icmp eq i8 %val, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons at the high end of the unsigned 8-bit range.
+define double @f10(double %a, double %b, i8 *%ptr) {
+; CHECK: f10:
+; CHECK: cli 0(%r2), 255
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %cond = icmp eq i8 %val, 255
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CLI range.
+define double @f11(double %a, double %b, i8 *%src) {
+; CHECK: f11:
+; CHECK: cli 4095(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4095
+  %val = load i8 *%ptr
+  %cond = icmp ult i8 %val, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next byte up, which should use CLIY instead of CLI.
+define double @f12(double %a, double %b, i8 *%src) {
+; CHECK: f12:
+; CHECK: cliy 4096(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4096
+  %val = load i8 *%ptr
+  %cond = icmp ult i8 %val, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CLIY range.
+define double @f13(double %a, double %b, i8 *%src) {
+; CHECK: f13:
+; CHECK: cliy 524287(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524287
+  %val = load i8 *%ptr
+  %cond = icmp ult i8 %val, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f14(double %a, double %b, i8 *%src) {
+; CHECK: f14:
+; CHECK: agfi %r2, 524288
+; CHECK: cli 0(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524288
+  %val = load i8 *%ptr
+  %cond = icmp ult i8 %val, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the negative CLIY range.
+define double @f15(double %a, double %b, i8 *%src) {
+; CHECK: f15:
+; CHECK: cliy -1(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -1
+  %val = load i8 *%ptr
+  %cond = icmp ult i8 %val, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the CLIY range.
+define double @f16(double %a, double %b, i8 *%src) {
+; CHECK: f16:
+; CHECK: cliy -524288(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524288
+  %val = load i8 *%ptr
+  %cond = icmp ult i8 %val, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f17(double %a, double %b, i8 *%src) {
+; CHECK: f17:
+; CHECK: agfi %r2, -524289
+; CHECK: cli 0(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524289
+  %val = load i8 *%ptr
+  %cond = icmp ult i8 %val, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CLI does not allow an index
+define double @f18(double %a, double %b, i64 %base, i64 %index) {
+; CHECK: f18:
+; CHECK: agr %r2, %r3
+; CHECK: cli 4095(%r2), 127
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4095
+  %ptr = inttoptr i64 %add2 to i8 *
+  %val = load i8 *%ptr
+  %cond = icmp ult i8 %val, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CLIY does not allow an index
+define double @f19(double %a, double %b, i64 %base, i64 %index) {
+; CHECK: f19:
+; CHECK: agr %r2, %r3
+; CHECK: cliy 4096(%r2), 127
+; CHECK: br %r14
+  %add1 = add i64 %base, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i8 *
+  %val = load i8 *%ptr
+  %cond = icmp ult i8 %val, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-16.ll b/test/CodeGen/SystemZ/int-cmp-16.ll
new file mode 100644
index 0000000..8af854e
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-16.ll
@@ -0,0 +1,133 @@
+; Test 32-bit equality comparisons that are really between a memory byte
+; and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the 8-bit unsigned range, with zero extension.
+define double @f1(double %a, double %b, i8 *%ptr) {
+; CHECK: f1:
+; CHECK: cli 0(%r2), 0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp eq i32 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the 8-bit unsigned range, with zero extension.
+define double @f2(double %a, double %b, i8 *%ptr) {
+; CHECK: f2:
+; CHECK: cli 0(%r2), 255
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp eq i32 %ext, 255
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, with zero extension.  The condition is always false.
+define double @f3(double %a, double %b, i8 *%ptr) {
+; CHECK: f3:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp eq i32 %ext, 256
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, with zero extension.
+; This condition is also always false.
+define double @f4(double %a, double %b, i8 *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp eq i32 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with 0, using sign extension.
+define double @f5(double %a, double %b, i8 *%ptr) {
+; CHECK: f5:
+; CHECK: cli 0(%r2), 0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp eq i32 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the signed 8-bit range, using sign extension.
+define double @f6(double %a, double %b, i8 *%ptr) {
+; CHECK: f6:
+; CHECK: cli 0(%r2), 127
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp eq i32 %ext, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, using sign extension.
+; The condition is always false.
+define double @f7(double %a, double %b, i8 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp eq i32 %ext, 128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, using sign extension.
+define double @f8(double %a, double %b, i8 *%ptr) {
+; CHECK: f8:
+; CHECK: cli 0(%r2), 255
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp eq i32 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the signed 8-bit range, using sign extension.
+define double @f9(double %a, double %b, i8 *%ptr) {
+; CHECK: f9:
+; CHECK: cli 0(%r2), 128
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp eq i32 %ext, -128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, using sign extension.
+; The condition is always false.
+define double @f10(double %a, double %b, i8 *%ptr) {
+; CHECK: f10:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp eq i32 %ext, -129
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-17.ll b/test/CodeGen/SystemZ/int-cmp-17.ll
new file mode 100644
index 0000000..d4d5e98
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-17.ll
@@ -0,0 +1,133 @@
+; Test 32-bit inequality comparisons that are really between a memory byte
+; and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the 8-bit unsigned range, with zero extension.
+define double @f1(double %a, double %b, i8 *%ptr) {
+; CHECK: f1:
+; CHECK: cli 0(%r2), 0
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp ne i32 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the 8-bit unsigned range, with zero extension.
+define double @f2(double %a, double %b, i8 *%ptr) {
+; CHECK: f2:
+; CHECK: cli 0(%r2), 255
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp ne i32 %ext, 255
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, with zero extension.  The condition is always false.
+define double @f3(double %a, double %b, i8 *%ptr) {
+; CHECK: f3:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp ne i32 %ext, 256
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, with zero extension.
+; This condition is also always false.
+define double @f4(double %a, double %b, i8 *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp ne i32 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with 0, using sign extension.
+define double @f5(double %a, double %b, i8 *%ptr) {
+; CHECK: f5:
+; CHECK: cli 0(%r2), 0
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp ne i32 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the signed 8-bit range, using sign extension.
+define double @f6(double %a, double %b, i8 *%ptr) {
+; CHECK: f6:
+; CHECK: cli 0(%r2), 127
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp ne i32 %ext, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, using sign extension.
+; The condition is always false.
+define double @f7(double %a, double %b, i8 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp ne i32 %ext, 128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, using sign extension.
+define double @f8(double %a, double %b, i8 *%ptr) {
+; CHECK: f8:
+; CHECK: cli 0(%r2), 255
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp ne i32 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the signed 8-bit range, using sign extension.
+define double @f9(double %a, double %b, i8 *%ptr) {
+; CHECK: f9:
+; CHECK: cli 0(%r2), 128
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp ne i32 %ext, -128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, using sign extension.
+; The condition is always false.
+define double @f10(double %a, double %b, i8 *%ptr) {
+; CHECK: f10:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp ne i32 %ext, -129
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-18.ll b/test/CodeGen/SystemZ/int-cmp-18.ll
new file mode 100644
index 0000000..9822dc2
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-18.ll
@@ -0,0 +1,133 @@
+; Test 64-bit equality comparisons that are really between a memory byte
+; and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the 8-bit unsigned range, with zero extension.
+define double @f1(double %a, double %b, i8 *%ptr) {
+; CHECK: f1:
+; CHECK: cli 0(%r2), 0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp eq i64 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the 8-bit unsigned range, with zero extension.
+define double @f2(double %a, double %b, i8 *%ptr) {
+; CHECK: f2:
+; CHECK: cli 0(%r2), 255
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp eq i64 %ext, 255
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, with zero extension.  The condition is always false.
+define double @f3(double %a, double %b, i8 *%ptr) {
+; CHECK: f3:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp eq i64 %ext, 256
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, with zero extension.
+; This condition is also always false.
+define double @f4(double %a, double %b, i8 *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp eq i64 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with 0, using sign extension.
+define double @f5(double %a, double %b, i8 *%ptr) {
+; CHECK: f5:
+; CHECK: cli 0(%r2), 0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp eq i64 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the signed 8-bit range, using sign extension.
+define double @f6(double %a, double %b, i8 *%ptr) {
+; CHECK: f6:
+; CHECK: cli 0(%r2), 127
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp eq i64 %ext, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, using sign extension.
+; The condition is always false.
+define double @f7(double %a, double %b, i8 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp eq i64 %ext, 128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, using sign extension.
+define double @f8(double %a, double %b, i8 *%ptr) {
+; CHECK: f8:
+; CHECK: cli 0(%r2), 255
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp eq i64 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the signed 8-bit range, using sign extension.
+define double @f9(double %a, double %b, i8 *%ptr) {
+; CHECK: f9:
+; CHECK: cli 0(%r2), 128
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp eq i64 %ext, -128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, using sign extension.
+; The condition is always false.
+define double @f10(double %a, double %b, i8 *%ptr) {
+; CHECK: f10:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp eq i64 %ext, -129
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-19.ll b/test/CodeGen/SystemZ/int-cmp-19.ll
new file mode 100644
index 0000000..7d29dbc
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-19.ll
@@ -0,0 +1,133 @@
+; Test 64-bit inequality comparisons that are really between a memory byte
+; and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the 8-bit unsigned range, with zero extension.
+define double @f1(double %a, double %b, i8 *%ptr) {
+; CHECK: f1:
+; CHECK: cli 0(%r2), 0
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp ne i64 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the 8-bit unsigned range, with zero extension.
+define double @f2(double %a, double %b, i8 *%ptr) {
+; CHECK: f2:
+; CHECK: cli 0(%r2), 255
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp ne i64 %ext, 255
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, with zero extension.  The condition is always false.
+define double @f3(double %a, double %b, i8 *%ptr) {
+; CHECK: f3:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp ne i64 %ext, 256
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, with zero extension.
+; This condition is also always false.
+define double @f4(double %a, double %b, i8 *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp ne i64 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with 0, using sign extension.
+define double @f5(double %a, double %b, i8 *%ptr) {
+; CHECK: f5:
+; CHECK: cli 0(%r2), 0
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp ne i64 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the signed 8-bit range, using sign extension.
+define double @f6(double %a, double %b, i8 *%ptr) {
+; CHECK: f6:
+; CHECK: cli 0(%r2), 127
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp ne i64 %ext, 127
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, using sign extension.
+; The condition is always false.
+define double @f7(double %a, double %b, i8 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp ne i64 %ext, 128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, using sign extension.
+define double @f8(double %a, double %b, i8 *%ptr) {
+; CHECK: f8:
+; CHECK: cli 0(%r2), 255
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp ne i64 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the signed 8-bit range, using sign extension.
+define double @f9(double %a, double %b, i8 *%ptr) {
+; CHECK: f9:
+; CHECK: cli 0(%r2), 128
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp ne i64 %ext, -128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, using sign extension.
+; The condition is always false.
+define double @f10(double %a, double %b, i8 *%ptr) {
+; CHECK: f10:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp ne i64 %ext, -129
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-20.ll b/test/CodeGen/SystemZ/int-cmp-20.ll
new file mode 100644
index 0000000..8fffbc8
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-20.ll
@@ -0,0 +1,220 @@
+; Test 32-bit ordered comparisons that are really between a memory byte
+; and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check unsigned comparison near the low end of the CLI range, using zero
+; extension.
+define double @f1(double %a, double %b, i8 *%ptr) {
+; CHECK: f1:
+; CHECK: cli 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp ugt i32 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison near the low end of the CLI range, using sign
+; extension.
+define double @f2(double %a, double %b, i8 *%ptr) {
+; CHECK: f2:
+; CHECK: cli 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp ugt i32 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison near the high end of the CLI range, using zero
+; extension.
+define double @f3(double %a, double %b, i8 *%ptr) {
+; CHECK: f3:
+; CHECK: cli 0(%r2), 254
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp ult i32 %ext, 254
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison near the high end of the CLI range, using sign
+; extension.
+define double @f4(double %a, double %b, i8 *%ptr) {
+; CHECK: f4:
+; CHECK: cli 0(%r2), 254
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp ult i32 %ext, -2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison above the high end of the CLI range, using zero
+; extension.  The condition is always true.
+define double @f5(double %a, double %b, i8 *%ptr) {
+; CHECK: f5:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp ult i32 %ext, 256
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; When using unsigned comparison with sign extension, equality with values
+; in the range [128, MAX-129] is impossible, and ordered comparisons with
+; those values are effectively sign tests.  Since such comparisons are
+; unlikely to occur in practice, we don't bother optimizing the second case,
+; and simply ignore CLI for this range.  First check the low end of the range.
+define double @f6(double %a, double %b, i8 *%ptr) {
+; CHECK: f6:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp ult i32 %ext, 128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and then the high end.
+define double @f7(double %a, double %b, i8 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp ult i32 %ext, -129
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the low end of the CLI range, using zero
+; extension.  This is equivalent to unsigned comparison.
+define double @f8(double %a, double %b, i8 *%ptr) {
+; CHECK: f8:
+; CHECK: cli 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp sgt i32 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the low end of the CLI range, using sign
+; extension.  This cannot use CLI.
+define double @f9(double %a, double %b, i8 *%ptr) {
+; CHECK: f9:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp sgt i32 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the high end of the CLI range, using zero
+; extension.  This is equivalent to unsigned comparison.
+define double @f10(double %a, double %b, i8 *%ptr) {
+; CHECK: f10:
+; CHECK: cli 0(%r2), 254
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp slt i32 %ext, 254
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the high end of the CLI range, using sign
+; extension.  This cannot use CLI.
+define double @f11(double %a, double %b, i8 *%ptr) {
+; CHECK: f11:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp slt i32 %ext, -2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison above the high end of the CLI range, using zero
+; extension.  The condition is always true.
+define double @f12(double %a, double %b, i8 *%ptr) {
+; CHECK: f12:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %cond = icmp slt i32 %ext, 256
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check tests for nonnegative values.
+define double @f13(double %a, double %b, i8 *%ptr) {
+; CHECK: f13:
+; CHECK: cli 0(%r2), 128
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp sge i32 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and another form
+define double @f14(double %a, double %b, i8 *%ptr) {
+; CHECK: f14:
+; CHECK: cli 0(%r2), 128
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp sgt i32 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check tests for negative values.
+define double @f15(double %a, double %b, i8 *%ptr) {
+; CHECK: f15:
+; CHECK: cli 0(%r2), 127
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp slt i32 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and another form
+define double @f16(double %a, double %b, i8 *%ptr) {
+; CHECK: f16:
+; CHECK: cli 0(%r2), 127
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %cond = icmp sle i32 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-21.ll b/test/CodeGen/SystemZ/int-cmp-21.ll
new file mode 100644
index 0000000..43447b8
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-21.ll
@@ -0,0 +1,220 @@
+; Test 64-bit ordered comparisons that are really between a memory byte
+; and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check unsigned comparison near the low end of the CLI range, using zero
+; extension.
+define double @f1(double %a, double %b, i8 *%ptr) {
+; CHECK: f1:
+; CHECK: cli 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp ugt i64 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison near the low end of the CLI range, using sign
+; extension.
+define double @f2(double %a, double %b, i8 *%ptr) {
+; CHECK: f2:
+; CHECK: cli 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp ugt i64 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison near the high end of the CLI range, using zero
+; extension.
+define double @f3(double %a, double %b, i8 *%ptr) {
+; CHECK: f3:
+; CHECK: cli 0(%r2), 254
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp ult i64 %ext, 254
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison near the high end of the CLI range, using sign
+; extension.
+define double @f4(double %a, double %b, i8 *%ptr) {
+; CHECK: f4:
+; CHECK: cli 0(%r2), 254
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp ult i64 %ext, -2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison above the high end of the CLI range, using zero
+; extension.  The condition is always true.
+define double @f5(double %a, double %b, i8 *%ptr) {
+; CHECK: f5:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp ult i64 %ext, 256
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; When using unsigned comparison with sign extension, equality with values
+; in the range [128, MAX-129] is impossible, and ordered comparisons with
+; those values are effectively sign tests.  Since such comparisons are
+; unlikely to occur in practice, we don't bother optimizing the second case,
+; and simply ignore CLI for this range.  First check the low end of the range.
+define double @f6(double %a, double %b, i8 *%ptr) {
+; CHECK: f6:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp ult i64 %ext, 128
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and then the high end.
+define double @f7(double %a, double %b, i8 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp ult i64 %ext, -129
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the low end of the CLI range, using zero
+; extension.  This is equivalent to unsigned comparison.
+define double @f8(double %a, double %b, i8 *%ptr) {
+; CHECK: f8:
+; CHECK: cli 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp sgt i64 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the low end of the CLI range, using sign
+; extension.  This cannot use CLI.
+define double @f9(double %a, double %b, i8 *%ptr) {
+; CHECK: f9:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp sgt i64 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the high end of the CLI range, using zero
+; extension.  This is equivalent to unsigned comparison.
+define double @f10(double %a, double %b, i8 *%ptr) {
+; CHECK: f10:
+; CHECK: cli 0(%r2), 254
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp slt i64 %ext, 254
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the high end of the CLI range, using sign
+; extension.  This cannot use CLI.
+define double @f11(double %a, double %b, i8 *%ptr) {
+; CHECK: f11:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp slt i64 %ext, -2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison above the high end of the CLI range, using zero
+; extension.  The condition is always true.
+define double @f12(double %a, double %b, i8 *%ptr) {
+; CHECK: f12:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %cond = icmp slt i64 %ext, 256
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check tests for nonnegative values.
+define double @f13(double %a, double %b, i8 *%ptr) {
+; CHECK: f13:
+; CHECK: cli 0(%r2), 128
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp sge i64 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and another form
+define double @f14(double %a, double %b, i8 *%ptr) {
+; CHECK: f14:
+; CHECK: cli 0(%r2), 128
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp sgt i64 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check tests for negative values.
+define double @f15(double %a, double %b, i8 *%ptr) {
+; CHECK: f15:
+; CHECK: cli 0(%r2), 127
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp slt i64 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and another form
+define double @f16(double %a, double %b, i8 *%ptr) {
+; CHECK: f16:
+; CHECK: cli 0(%r2), 127
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %cond = icmp sle i64 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-22.ll b/test/CodeGen/SystemZ/int-cmp-22.ll
new file mode 100644
index 0000000..513d4be
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-22.ll
@@ -0,0 +1,128 @@
+; Test 16-bit signed ordered comparisons between memory and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check comparisons with 0.
+define double @f1(double %a, double %b, i16 *%ptr) {
+; CHECK: f1:
+; CHECK: chhsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp slt i16 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with 1.
+define double @f2(double %a, double %b, i16 *%ptr) {
+; CHECK: f2:
+; CHECK: chhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp slt i16 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check a value near the high end of the signed 16-bit range.
+define double @f3(double %a, double %b, i16 *%ptr) {
+; CHECK: f3:
+; CHECK: chhsi 0(%r2), 32766
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp slt i16 %val, 32766
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1.
+define double @f4(double %a, double %b, i16 *%ptr) {
+; CHECK: f4:
+; CHECK: chhsi 0(%r2), -1
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp slt i16 %val, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check a value near the low end of the 16-bit signed range.
+define double @f5(double %a, double %b, i16 *%ptr) {
+; CHECK: f5:
+; CHECK: chhsi 0(%r2), -32766
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp slt i16 %val, -32766
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CHHSI range.
+define double @f6(double %a, double %b, i16 %i1, i16 *%base) {
+; CHECK: f6:
+; CHECK: chhsi 4094(%r3), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%base, i64 2047
+  %val = load i16 *%ptr
+  %cond = icmp slt i16 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next halfword up, which needs separate address logic,
+define double @f7(double %a, double %b, i16 *%base) {
+; CHECK: f7:
+; CHECK: aghi %r2, 4096
+; CHECK: chhsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%base, i64 2048
+  %val = load i16 *%ptr
+  %cond = icmp slt i16 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check negative offsets, which also need separate address logic.
+define double @f8(double %a, double %b, i16 *%base) {
+; CHECK: f8:
+; CHECK: aghi %r2, -2
+; CHECK: chhsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%base, i64 -1
+  %val = load i16 *%ptr
+  %cond = icmp slt i16 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CHHSI does not allow indices.
+define double @f9(double %a, double %b, i64 %base, i64 %index) {
+; CHECK: f9:
+; CHECK: agr {{%r2, %r3|%r3, %r2}}
+; CHECK: chhsi 0({{%r[23]}}), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i16 *
+  %val = load i16 *%ptr
+  %cond = icmp slt i16 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-23.ll b/test/CodeGen/SystemZ/int-cmp-23.ll
new file mode 100644
index 0000000..40e1331
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-23.ll
@@ -0,0 +1,89 @@
+; Test 16-bit unsigned comparisons between memory and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check a value near the low end of the unsigned 16-bit range.
+define double @f1(double %a, double %b, i16 *%ptr) {
+; CHECK: f1:
+; CHECK: clhhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp ugt i16 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check a value near the high end of the unsigned 16-bit range.
+define double @f2(double %a, double %b, i16 *%ptr) {
+; CHECK: f2:
+; CHECK: clhhsi 0(%r2), 65534
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp ult i16 %val, 65534
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CLHHSI range.
+define double @f3(double %a, double %b, i16 %i1, i16 *%base) {
+; CHECK: f3:
+; CHECK: clhhsi 4094(%r3), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%base, i64 2047
+  %val = load i16 *%ptr
+  %cond = icmp ugt i16 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next halfword up, which needs separate address logic,
+define double @f4(double %a, double %b, i16 *%base) {
+; CHECK: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: clhhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%base, i64 2048
+  %val = load i16 *%ptr
+  %cond = icmp ugt i16 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check negative offsets, which also need separate address logic.
+define double @f5(double %a, double %b, i16 *%base) {
+; CHECK: f5:
+; CHECK: aghi %r2, -2
+; CHECK: clhhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%base, i64 -1
+  %val = load i16 *%ptr
+  %cond = icmp ugt i16 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CLHHSI does not allow indices.
+define double @f6(double %a, double %b, i64 %base, i64 %index) {
+; CHECK: f6:
+; CHECK: agr {{%r2, %r3|%r3, %r2}}
+; CHECK: clhhsi 0({{%r[23]}}), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i16 *
+  %val = load i16 *%ptr
+  %cond = icmp ugt i16 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-24.ll b/test/CodeGen/SystemZ/int-cmp-24.ll
new file mode 100644
index 0000000..46186cd
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-24.ll
@@ -0,0 +1,55 @@
+; Test 16-bit equality comparisons between memory and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the unsigned 16-bit range.
+define double @f1(double %a, double %b, i16 *%ptr) {
+; CHECK: f1:
+; CHECK: clhhsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp eq i16 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the unsigned 16-bit range.
+define double @f2(double %a, double %b, i16 *%ptr) {
+; CHECK: f2:
+; CHECK: clhhsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp eq i16 %val, 65535
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the signed 16-bit range.
+define double @f3(double %a, double %b, i16 *%ptr) {
+; CHECK: f3:
+; CHECK: clhhsi 0(%r2), 32768
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp eq i16 %val, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the signed 16-bit range.
+define double @f4(double %a, double %b, i16 *%ptr) {
+; CHECK: f4:
+; CHECK: clhhsi 0(%r2), 32767
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp eq i16 %val, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-25.ll b/test/CodeGen/SystemZ/int-cmp-25.ll
new file mode 100644
index 0000000..a3a223f
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-25.ll
@@ -0,0 +1,55 @@
+; Test 16-bit inequality comparisons between memory and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the unsigned 16-bit range.
+define double @f1(double %a, double %b, i16 *%ptr) {
+; CHECK: f1:
+; CHECK: clhhsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp ne i16 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the unsigned 16-bit range.
+define double @f2(double %a, double %b, i16 *%ptr) {
+; CHECK: f2:
+; CHECK: clhhsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp ne i16 %val, 65535
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the signed 16-bit range.
+define double @f3(double %a, double %b, i16 *%ptr) {
+; CHECK: f3:
+; CHECK: clhhsi 0(%r2), 32768
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp ne i16 %val, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the signed 16-bit range.
+define double @f4(double %a, double %b, i16 *%ptr) {
+; CHECK: f4:
+; CHECK: clhhsi 0(%r2), 32767
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %cond = icmp ne i16 %val, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-26.ll b/test/CodeGen/SystemZ/int-cmp-26.ll
new file mode 100644
index 0000000..31330b2
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-26.ll
@@ -0,0 +1,133 @@
+; Test 32-bit equality comparisons that are really between a memory halfword
+; and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the 16-bit unsigned range, with zero extension.
+define double @f1(double %a, double %b, i16 *%ptr) {
+; CHECK: f1:
+; CHECK: clhhsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp eq i32 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the 16-bit unsigned range, with zero extension.
+define double @f2(double %a, double %b, i16 *%ptr) {
+; CHECK: f2:
+; CHECK: clhhsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp eq i32 %ext, 65535
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, with zero extension.  The condition is always false.
+define double @f3(double %a, double %b, i16 *%ptr) {
+; CHECK: f3:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp eq i32 %ext, 65536
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, with zero extension.
+; This condition is also always false.
+define double @f4(double %a, double %b, i16 *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp eq i32 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with 0, using sign extension.
+define double @f5(double %a, double %b, i16 *%ptr) {
+; CHECK: f5:
+; CHECK: clhhsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp eq i32 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the signed 16-bit range, using sign extension.
+define double @f6(double %a, double %b, i16 *%ptr) {
+; CHECK: f6:
+; CHECK: clhhsi 0(%r2), 32767
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp eq i32 %ext, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, using sign extension.
+; The condition is always false.
+define double @f7(double %a, double %b, i16 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp eq i32 %ext, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, using sign extension.
+define double @f8(double %a, double %b, i16 *%ptr) {
+; CHECK: f8:
+; CHECK: clhhsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp eq i32 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the signed 16-bit range, using sign extension.
+define double @f9(double %a, double %b, i16 *%ptr) {
+; CHECK: f9:
+; CHECK: clhhsi 0(%r2), 32768
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp eq i32 %ext, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, using sign extension.
+; The condition is always false.
+define double @f10(double %a, double %b, i16 *%ptr) {
+; CHECK: f10:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp eq i32 %ext, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-27.ll b/test/CodeGen/SystemZ/int-cmp-27.ll
new file mode 100644
index 0000000..7cbea3d
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-27.ll
@@ -0,0 +1,133 @@
+; Test 32-bit inequality comparisons that are really between a memory halfword
+; and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the 16-bit unsigned range, with zero extension.
+define double @f1(double %a, double %b, i16 *%ptr) {
+; CHECK: f1:
+; CHECK: clhhsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp ne i32 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the 16-bit unsigned range, with zero extension.
+define double @f2(double %a, double %b, i16 *%ptr) {
+; CHECK: f2:
+; CHECK: clhhsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp ne i32 %ext, 65535
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, with zero extension.  The condition is always false.
+define double @f3(double %a, double %b, i16 *%ptr) {
+; CHECK: f3:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp ne i32 %ext, 65536
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, with zero extension.
+; This condition is also always false.
+define double @f4(double %a, double %b, i16 *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp ne i32 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with 0, using sign extension.
+define double @f5(double %a, double %b, i16 *%ptr) {
+; CHECK: f5:
+; CHECK: clhhsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp ne i32 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the signed 16-bit range, using sign extension.
+define double @f6(double %a, double %b, i16 *%ptr) {
+; CHECK: f6:
+; CHECK: clhhsi 0(%r2), 32767
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp ne i32 %ext, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, using sign extension.
+; The condition is always false.
+define double @f7(double %a, double %b, i16 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp ne i32 %ext, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, using sign extension.
+define double @f8(double %a, double %b, i16 *%ptr) {
+; CHECK: f8:
+; CHECK: clhhsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp ne i32 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the signed 16-bit range, using sign extension.
+define double @f9(double %a, double %b, i16 *%ptr) {
+; CHECK: f9:
+; CHECK: clhhsi 0(%r2), 32768
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp ne i32 %ext, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, using sign extension.
+; The condition is always false.
+define double @f10(double %a, double %b, i16 *%ptr) {
+; CHECK: f10:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp ne i32 %ext, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-28.ll b/test/CodeGen/SystemZ/int-cmp-28.ll
new file mode 100644
index 0000000..629eb4f
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-28.ll
@@ -0,0 +1,133 @@
+; Test 64-bit equality comparisons that are really between a memory halfword
+; and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the 16-bit unsigned range, with zero extension.
+define double @f1(double %a, double %b, i16 *%ptr) {
+; CHECK: f1:
+; CHECK: clhhsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp eq i64 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the 16-bit unsigned range, with zero extension.
+define double @f2(double %a, double %b, i16 *%ptr) {
+; CHECK: f2:
+; CHECK: clhhsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp eq i64 %ext, 65535
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, with zero extension.  The condition is always false.
+define double @f3(double %a, double %b, i16 *%ptr) {
+; CHECK: f3:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp eq i64 %ext, 65536
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, with zero extension.
+; This condition is also always false.
+define double @f4(double %a, double %b, i16 *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp eq i64 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with 0, using sign extension.
+define double @f5(double %a, double %b, i16 *%ptr) {
+; CHECK: f5:
+; CHECK: clhhsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp eq i64 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the signed 16-bit range, using sign extension.
+define double @f6(double %a, double %b, i16 *%ptr) {
+; CHECK: f6:
+; CHECK: clhhsi 0(%r2), 32767
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp eq i64 %ext, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, using sign extension.
+; The condition is always false.
+define double @f7(double %a, double %b, i16 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp eq i64 %ext, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, using sign extension.
+define double @f8(double %a, double %b, i16 *%ptr) {
+; CHECK: f8:
+; CHECK: clhhsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp eq i64 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the signed 16-bit range, using sign extension.
+define double @f9(double %a, double %b, i16 *%ptr) {
+; CHECK: f9:
+; CHECK: clhhsi 0(%r2), 32768
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp eq i64 %ext, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, using sign extension.
+; The condition is always false.
+define double @f10(double %a, double %b, i16 *%ptr) {
+; CHECK: f10:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp eq i64 %ext, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-29.ll b/test/CodeGen/SystemZ/int-cmp-29.ll
new file mode 100644
index 0000000..de41dd7
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-29.ll
@@ -0,0 +1,133 @@
+; Test 64-bit inequality comparisons that are really between a memory halfword
+; and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the 16-bit unsigned range, with zero extension.
+define double @f1(double %a, double %b, i16 *%ptr) {
+; CHECK: f1:
+; CHECK: clhhsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp ne i64 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the 16-bit unsigned range, with zero extension.
+define double @f2(double %a, double %b, i16 *%ptr) {
+; CHECK: f2:
+; CHECK: clhhsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp ne i64 %ext, 65535
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, with zero extension.  The condition is always false.
+define double @f3(double %a, double %b, i16 *%ptr) {
+; CHECK: f3:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp ne i64 %ext, 65536
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, with zero extension.
+; This condition is also always false.
+define double @f4(double %a, double %b, i16 *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp ne i64 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with 0, using sign extension.
+define double @f5(double %a, double %b, i16 *%ptr) {
+; CHECK: f5:
+; CHECK: clhhsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp ne i64 %ext, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the signed 16-bit range, using sign extension.
+define double @f6(double %a, double %b, i16 *%ptr) {
+; CHECK: f6:
+; CHECK: clhhsi 0(%r2), 32767
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp ne i64 %ext, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, using sign extension.
+; The condition is always false.
+define double @f7(double %a, double %b, i16 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp ne i64 %ext, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check comparisons with -1, using sign extension.
+define double @f8(double %a, double %b, i16 *%ptr) {
+; CHECK: f8:
+; CHECK: clhhsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp ne i64 %ext, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the low end of the signed 16-bit range, using sign extension.
+define double @f9(double %a, double %b, i16 *%ptr) {
+; CHECK: f9:
+; CHECK: clhhsi 0(%r2), 32768
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp ne i64 %ext, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, using sign extension.
+; The condition is always false.
+define double @f10(double %a, double %b, i16 *%ptr) {
+; CHECK: f10:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp ne i64 %ext, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-30.ll b/test/CodeGen/SystemZ/int-cmp-30.ll
new file mode 100644
index 0000000..713ad8e
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-30.ll
@@ -0,0 +1,225 @@
+; Test 32-bit ordered comparisons that are really between a memory halfword
+; and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check unsigned comparison near the low end of the CLHHSI range, using zero
+; extension.
+define double @f1(double %a, double %b, i16 *%ptr) {
+; CHECK: f1:
+; CHECK: clhhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp ugt i32 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison near the low end of the CLHHSI range, using sign
+; extension.
+define double @f2(double %a, double %b, i16 *%ptr) {
+; CHECK: f2:
+; CHECK: clhhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp ugt i32 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison near the high end of the CLHHSI range, using zero
+; extension.
+define double @f3(double %a, double %b, i16 *%ptr) {
+; CHECK: f3:
+; CHECK: clhhsi 0(%r2), 65534
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp ult i32 %ext, 65534
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison near the high end of the CLHHSI range, using sign
+; extension.
+define double @f4(double %a, double %b, i16 *%ptr) {
+; CHECK: f4:
+; CHECK: clhhsi 0(%r2), 65534
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp ult i32 %ext, -2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison above the high end of the CLHHSI range, using zero
+; extension.  The condition is always true.
+define double @f5(double %a, double %b, i16 *%ptr) {
+; CHECK: f5:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp ult i32 %ext, 65536
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; When using unsigned comparison with sign extension, equality with values
+; in the range [32768, MAX-32769] is impossible, and ordered comparisons with
+; those values are effectively sign tests.  Since such comparisons are
+; unlikely to occur in practice, we don't bother optimizing the second case,
+; and simply ignore CLHHSI for this range.  First check the low end of the
+; range.
+define double @f6(double %a, double %b, i16 *%ptr) {
+; CHECK: f6:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp ult i32 %ext, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and then the high end.
+define double @f7(double %a, double %b, i16 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp ult i32 %ext, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the low end of the CLHHSI range, using zero
+; extension.  This is equivalent to unsigned comparison.
+define double @f8(double %a, double %b, i16 *%ptr) {
+; CHECK: f8:
+; CHECK: clhhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp sgt i32 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the low end of the CLHHSI range, using sign
+; extension.  This should use CHHSI instead.
+define double @f9(double %a, double %b, i16 *%ptr) {
+; CHECK: f9:
+; CHECK: chhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp sgt i32 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the high end of the CLHHSI range, using zero
+; extension.  This is equivalent to unsigned comparison.
+define double @f10(double %a, double %b, i16 *%ptr) {
+; CHECK: f10:
+; CHECK: clhhsi 0(%r2), 65534
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp slt i32 %ext, 65534
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the high end of the CLHHSI range, using sign
+; extension.  This should use CHHSI instead.
+define double @f11(double %a, double %b, i16 *%ptr) {
+; CHECK: f11:
+; CHECK: chhsi 0(%r2), -2
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp slt i32 %ext, -2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison above the high end of the CLHHSI range, using zero
+; extension.  The condition is always true.
+define double @f12(double %a, double %b, i16 *%ptr) {
+; CHECK: f12:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i32
+  %cond = icmp slt i32 %ext, 65536
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the high end of the CHHSI range, using sign
+; extension.
+define double @f13(double %a, double %b, i16 *%ptr) {
+; CHECK: f13:
+; CHECK: chhsi 0(%r2), 32766
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp slt i32 %ext, 32766
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison above the high end of the CHHSI range, using sign
+; extension.  This condition is always true.
+define double @f14(double %a, double %b, i16 *%ptr) {
+; CHECK: f14:
+; CHECK-NOT: chhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp slt i32 %ext, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the low end of the CHHSI range, using sign
+; extension.
+define double @f15(double %a, double %b, i16 *%ptr) {
+; CHECK: f15:
+; CHECK: chhsi 0(%r2), -32767
+; CHECK-NEXT: j{{g?}}g
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp sgt i32 %ext, -32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison below the low end of the CHHSI range, using sign
+; extension.  This condition is always true.
+define double @f16(double %a, double %b, i16 *%ptr) {
+; CHECK: f16:
+; CHECK-NOT: chhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i32
+  %cond = icmp sgt i32 %ext, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-31.ll b/test/CodeGen/SystemZ/int-cmp-31.ll
new file mode 100644
index 0000000..cabe9b8
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-31.ll
@@ -0,0 +1,225 @@
+; Test 64-bit ordered comparisons that are really between a memory halfword
+; and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check unsigned comparison near the low end of the CLHHSI range, using zero
+; extension.
+define double @f1(double %a, double %b, i16 *%ptr) {
+; CHECK: f1:
+; CHECK: clhhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp ugt i64 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison near the low end of the CLHHSI range, using sign
+; extension.
+define double @f2(double %a, double %b, i16 *%ptr) {
+; CHECK: f2:
+; CHECK: clhhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp ugt i64 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison near the high end of the CLHHSI range, using zero
+; extension.
+define double @f3(double %a, double %b, i16 *%ptr) {
+; CHECK: f3:
+; CHECK: clhhsi 0(%r2), 65534
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp ult i64 %ext, 65534
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison near the high end of the CLHHSI range, using sign
+; extension.
+define double @f4(double %a, double %b, i16 *%ptr) {
+; CHECK: f4:
+; CHECK: clhhsi 0(%r2), 65534
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp ult i64 %ext, -2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check unsigned comparison above the high end of the CLHHSI range, using zero
+; extension.  The condition is always true.
+define double @f5(double %a, double %b, i16 *%ptr) {
+; CHECK: f5:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp ult i64 %ext, 65536
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; When using unsigned comparison with sign extension, equality with values
+; in the range [32768, MAX-32769] is impossible, and ordered comparisons with
+; those values are effectively sign tests.  Since such comparisons are
+; unlikely to occur in practice, we don't bother optimizing the second case,
+; and simply ignore CLHHSI for this range.  First check the low end of the
+; range.
+define double @f6(double %a, double %b, i16 *%ptr) {
+; CHECK: f6:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp ult i64 %ext, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; ...and then the high end.
+define double @f7(double %a, double %b, i16 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: clhhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp ult i64 %ext, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the low end of the CLHHSI range, using zero
+; extension.  This is equivalent to unsigned comparison.
+define double @f8(double %a, double %b, i16 *%ptr) {
+; CHECK: f8:
+; CHECK: clhhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp sgt i64 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the low end of the CLHHSI range, using sign
+; extension.  This should use CHHSI instead.
+define double @f9(double %a, double %b, i16 *%ptr) {
+; CHECK: f9:
+; CHECK: chhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp sgt i64 %ext, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the high end of the CLHHSI range, using zero
+; extension.  This is equivalent to unsigned comparison.
+define double @f10(double %a, double %b, i16 *%ptr) {
+; CHECK: f10:
+; CHECK: clhhsi 0(%r2), 65534
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp slt i64 %ext, 65534
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the high end of the CLHHSI range, using sign
+; extension.  This should use CHHSI instead.
+define double @f11(double %a, double %b, i16 *%ptr) {
+; CHECK: f11:
+; CHECK: chhsi 0(%r2), -2
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp slt i64 %ext, -2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison above the high end of the CLHHSI range, using zero
+; extension.  The condition is always true.
+define double @f12(double %a, double %b, i16 *%ptr) {
+; CHECK: f12:
+; CHECK-NOT: cli
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = zext i16 %val to i64
+  %cond = icmp slt i64 %ext, 65536
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the high end of the CHHSI range, using sign
+; extension.
+define double @f13(double %a, double %b, i16 *%ptr) {
+; CHECK: f13:
+; CHECK: chhsi 0(%r2), 32766
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp slt i64 %ext, 32766
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison above the high end of the CHHSI range, using sign
+; extension.  This condition is always true.
+define double @f14(double %a, double %b, i16 *%ptr) {
+; CHECK: f14:
+; CHECK-NOT: chhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp slt i64 %ext, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison near the low end of the CHHSI range, using sign
+; extension.
+define double @f15(double %a, double %b, i16 *%ptr) {
+; CHECK: f15:
+; CHECK: chhsi 0(%r2), -32767
+; CHECK-NEXT: j{{g?}}g
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp sgt i64 %ext, -32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check signed comparison below the low end of the CHHSI range, using sign
+; extension.  This condition is always true.
+define double @f16(double %a, double %b, i16 *%ptr) {
+; CHECK: f16:
+; CHECK-NOT: chhsi
+; CHECK: br %r14
+  %val = load i16 *%ptr
+  %ext = sext i16 %val to i64
+  %cond = icmp sgt i64 %ext, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-32.ll b/test/CodeGen/SystemZ/int-cmp-32.ll
new file mode 100644
index 0000000..4bdeebb
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-32.ll
@@ -0,0 +1,237 @@
+; Test 32-bit signed comparisons between memory and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check ordered comparisons with 0.
+define double @f1(double %a, double %b, i32 *%ptr) {
+; CHECK: f1:
+; CHECK: chsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp slt i32 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check ordered comparisons with 1.
+define double @f2(double %a, double %b, i32 *%ptr) {
+; CHECK: f2:
+; CHECK: chsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp slt i32 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check ordered comparisons with the high end of the signed 16-bit range.
+define double @f3(double %a, double %b, i32 *%ptr) {
+; CHECK: f3:
+; CHECK: chsi 0(%r2), 32767
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp slt i32 %val, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which can't use CHSI.
+define double @f4(double %a, double %b, i32 *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: chsi
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp slt i32 %val, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check ordered comparisons with -1.
+define double @f5(double %a, double %b, i32 *%ptr) {
+; CHECK: f5:
+; CHECK: chsi 0(%r2), -1
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp slt i32 %val, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check ordered comparisons with the low end of the 16-bit signed range.
+define double @f6(double %a, double %b, i32 *%ptr) {
+; CHECK: f6:
+; CHECK: chsi 0(%r2), -32768
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp slt i32 %val, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which can't use CHSI.
+define double @f7(double %a, double %b, i32 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: chsi
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp slt i32 %val, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with 0.
+define double @f8(double %a, double %b, i32 *%ptr) {
+; CHECK: f8:
+; CHECK: chsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp eq i32 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with 1.
+define double @f9(double %a, double %b, i32 *%ptr) {
+; CHECK: f9:
+; CHECK: chsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp eq i32 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with the high end of the signed 16-bit range.
+define double @f10(double %a, double %b, i32 *%ptr) {
+; CHECK: f10:
+; CHECK: chsi 0(%r2), 32767
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp eq i32 %val, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which can't use CHSI.
+define double @f11(double %a, double %b, i32 *%ptr) {
+; CHECK: f11:
+; CHECK-NOT: chsi
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp eq i32 %val, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with -1.
+define double @f12(double %a, double %b, i32 *%ptr) {
+; CHECK: f12:
+; CHECK: chsi 0(%r2), -1
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp eq i32 %val, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with the low end of the 16-bit signed range.
+define double @f13(double %a, double %b, i32 *%ptr) {
+; CHECK: f13:
+; CHECK: chsi 0(%r2), -32768
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp eq i32 %val, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which should be treated as a positive value.
+define double @f14(double %a, double %b, i32 *%ptr) {
+; CHECK: f14:
+; CHECK-NOT: chsi
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp eq i32 %val, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CHSI range.
+define double @f15(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f15:
+; CHECK: chsi 4092(%r3), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 1023
+  %val = load i32 *%ptr
+  %cond = icmp slt i32 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next word up, which needs separate address logic,
+define double @f16(double %a, double %b, i32 *%base) {
+; CHECK: f16:
+; CHECK: aghi %r2, 4096
+; CHECK: chsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 1024
+  %val = load i32 *%ptr
+  %cond = icmp slt i32 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check negative offsets, which also need separate address logic.
+define double @f17(double %a, double %b, i32 *%base) {
+; CHECK: f17:
+; CHECK: aghi %r2, -4
+; CHECK: chsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -1
+  %val = load i32 *%ptr
+  %cond = icmp slt i32 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CHSI does not allow indices.
+define double @f18(double %a, double %b, i64 %base, i64 %index) {
+; CHECK: f18:
+; CHECK: agr {{%r2, %r3|%r3, %r2}}
+; CHECK: chsi 0({{%r[23]}}), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i32 *
+  %val = load i32 *%ptr
+  %cond = icmp slt i32 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-33.ll b/test/CodeGen/SystemZ/int-cmp-33.ll
new file mode 100644
index 0000000..0144806
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-33.ll
@@ -0,0 +1,139 @@
+; Test 32-bit unsigned comparisons between memory and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check ordered comparisons with a constant near the low end of the unsigned
+; 16-bit range.
+define double @f1(double %a, double %b, i32 *%ptr) {
+; CHECK: f1:
+; CHECK: clfhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp ugt i32 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check ordered comparisons with the high end of the unsigned 16-bit range.
+define double @f2(double %a, double %b, i32 *%ptr) {
+; CHECK: f2:
+; CHECK: clfhsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp ult i32 %val, 65535
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which can't use CLFHSI.
+define double @f3(double %a, double %b, i32 *%ptr) {
+; CHECK: f3:
+; CHECK-NOT: clfhsi
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp ult i32 %val, 65536
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with 32768, the lowest value for which
+; we prefer CLFHSI to CHSI.
+define double @f4(double %a, double %b, i32 *%ptr) {
+; CHECK: f4:
+; CHECK: clfhsi 0(%r2), 32768
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp eq i32 %val, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with the high end of the unsigned 16-bit range.
+define double @f5(double %a, double %b, i32 *%ptr) {
+; CHECK: f5:
+; CHECK: clfhsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp eq i32 %val, 65535
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which can't use CLFHSI.
+define double @f6(double %a, double %b, i32 *%ptr) {
+; CHECK: f6:
+; CHECK-NOT: clfhsi
+; CHECK: br %r14
+  %val = load i32 *%ptr
+  %cond = icmp eq i32 %val, 65536
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CLFHSI range.
+define double @f7(double %a, double %b, i32 %i1, i32 *%base) {
+; CHECK: f7:
+; CHECK: clfhsi 4092(%r3), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 1023
+  %val = load i32 *%ptr
+  %cond = icmp ugt i32 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next word up, which needs separate address logic,
+define double @f8(double %a, double %b, i32 *%base) {
+; CHECK: f8:
+; CHECK: aghi %r2, 4096
+; CHECK: clfhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 1024
+  %val = load i32 *%ptr
+  %cond = icmp ugt i32 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check negative offsets, which also need separate address logic.
+define double @f9(double %a, double %b, i32 *%base) {
+; CHECK: f9:
+; CHECK: aghi %r2, -4
+; CHECK: clfhsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%base, i64 -1
+  %val = load i32 *%ptr
+  %cond = icmp ugt i32 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CLFHSI does not allow indices.
+define double @f10(double %a, double %b, i64 %base, i64 %index) {
+; CHECK: f10:
+; CHECK: agr {{%r2, %r3|%r3, %r2}}
+; CHECK: clfhsi 0({{%r[23]}}), 1
+; CHECK-NEXT: j{{g?}}h
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i32 *
+  %val = load i32 *%ptr
+  %cond = icmp ugt i32 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-34.ll b/test/CodeGen/SystemZ/int-cmp-34.ll
new file mode 100644
index 0000000..b10bd4e
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-34.ll
@@ -0,0 +1,237 @@
+; Test 64-bit signed comparisons between memory and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check ordered comparisons with 0.
+define double @f1(double %a, double %b, i64 *%ptr) {
+; CHECK: f1:
+; CHECK: cghsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp slt i64 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check ordered comparisons with 1.
+define double @f2(double %a, double %b, i64 *%ptr) {
+; CHECK: f2:
+; CHECK: cghsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp slt i64 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check ordered comparisons with the high end of the signed 16-bit range.
+define double @f3(double %a, double %b, i64 *%ptr) {
+; CHECK: f3:
+; CHECK: cghsi 0(%r2), 32767
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp slt i64 %val, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which can't use CGHSI.
+define double @f4(double %a, double %b, i64 *%ptr) {
+; CHECK: f4:
+; CHECK-NOT: cghsi
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp slt i64 %val, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check ordered comparisons with -1.
+define double @f5(double %a, double %b, i64 *%ptr) {
+; CHECK: f5:
+; CHECK: cghsi 0(%r2), -1
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp slt i64 %val, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check ordered comparisons with the low end of the 16-bit signed range.
+define double @f6(double %a, double %b, i64 *%ptr) {
+; CHECK: f6:
+; CHECK: cghsi 0(%r2), -32768
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp slt i64 %val, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which should be treated as a positive value.
+define double @f7(double %a, double %b, i64 *%ptr) {
+; CHECK: f7:
+; CHECK-NOT: cghsi
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp slt i64 %val, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with 0.
+define double @f8(double %a, double %b, i64 *%ptr) {
+; CHECK: f8:
+; CHECK: cghsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp eq i64 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with 1.
+define double @f9(double %a, double %b, i64 *%ptr) {
+; CHECK: f9:
+; CHECK: cghsi 0(%r2), 1
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp eq i64 %val, 1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with the high end of the signed 16-bit range.
+define double @f10(double %a, double %b, i64 *%ptr) {
+; CHECK: f10:
+; CHECK: cghsi 0(%r2), 32767
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp eq i64 %val, 32767
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which can't use CGHSI.
+define double @f11(double %a, double %b, i64 *%ptr) {
+; CHECK: f11:
+; CHECK-NOT: cghsi
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp eq i64 %val, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with -1.
+define double @f12(double %a, double %b, i64 *%ptr) {
+; CHECK: f12:
+; CHECK: cghsi 0(%r2), -1
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp eq i64 %val, -1
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with the low end of the 16-bit signed range.
+define double @f13(double %a, double %b, i64 *%ptr) {
+; CHECK: f13:
+; CHECK: cghsi 0(%r2), -32768
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp eq i64 %val, -32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value down, which should be treated as a positive value.
+define double @f14(double %a, double %b, i64 *%ptr) {
+; CHECK: f14:
+; CHECK-NOT: cghsi
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp eq i64 %val, -32769
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CGHSI range.
+define double @f15(double %a, double %b, i64 %i1, i64 *%base) {
+; CHECK: f15:
+; CHECK: cghsi 4088(%r3), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 511
+  %val = load i64 *%ptr
+  %cond = icmp slt i64 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic,
+define double @f16(double %a, double %b, i64 *%base) {
+; CHECK: f16:
+; CHECK: aghi %r2, 4096
+; CHECK: cghsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 512
+  %val = load i64 *%ptr
+  %cond = icmp slt i64 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check negative offsets, which also need separate address logic.
+define double @f17(double %a, double %b, i64 *%base) {
+; CHECK: f17:
+; CHECK: aghi %r2, -8
+; CHECK: cghsi 0(%r2), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -1
+  %val = load i64 *%ptr
+  %cond = icmp slt i64 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CGHSI does not allow indices.
+define double @f18(double %a, double %b, i64 %base, i64 %index) {
+; CHECK: f18:
+; CHECK: agr {{%r2, %r3|%r3, %r2}}
+; CHECK: cghsi 0({{%r[23]}}), 0
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i64 *
+  %val = load i64 *%ptr
+  %cond = icmp slt i64 %val, 0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-35.ll b/test/CodeGen/SystemZ/int-cmp-35.ll
new file mode 100644
index 0000000..9934906
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-35.ll
@@ -0,0 +1,139 @@
+; Test 64-bit unsigned comparisons between memory and a constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check ordered comparisons with a constant near the low end of the unsigned
+; 16-bit range.
+define double @f1(double %a, double %b, i64 *%ptr) {
+; CHECK: f1:
+; CHECK: clghsi 0(%r2), 2
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp ult i64 %val, 2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check ordered comparisons with the high end of the unsigned 16-bit range.
+define double @f2(double %a, double %b, i64 *%ptr) {
+; CHECK: f2:
+; CHECK: clghsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp ult i64 %val, 65535
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which can't use CLGHSI.
+define double @f3(double %a, double %b, i64 *%ptr) {
+; CHECK: f3:
+; CHECK-NOT: clghsi
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp ult i64 %val, 65536
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with 32768, the lowest value for which
+; we prefer CLGHSI to CGHSI.
+define double @f4(double %a, double %b, i64 *%ptr) {
+; CHECK: f4:
+; CHECK: clghsi 0(%r2), 32768
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp eq i64 %val, 32768
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check equality comparisons with the high end of the unsigned 16-bit range.
+define double @f5(double %a, double %b, i64 *%ptr) {
+; CHECK: f5:
+; CHECK: clghsi 0(%r2), 65535
+; CHECK-NEXT: j{{g?}}e
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp eq i64 %val, 65535
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next value up, which can't use CLGHSI.
+define double @f6(double %a, double %b, i64 *%ptr) {
+; CHECK: f6:
+; CHECK-NOT: clghsi
+; CHECK: br %r14
+  %val = load i64 *%ptr
+  %cond = icmp eq i64 %val, 65536
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the high end of the CLGHSI range.
+define double @f7(double %a, double %b, i64 %i1, i64 *%base) {
+; CHECK: f7:
+; CHECK: clghsi 4088(%r3), 2
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 511
+  %val = load i64 *%ptr
+  %cond = icmp ult i64 %val, 2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic,
+define double @f8(double %a, double %b, i64 *%base) {
+; CHECK: f8:
+; CHECK: aghi %r2, 4096
+; CHECK: clghsi 0(%r2), 2
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 512
+  %val = load i64 *%ptr
+  %cond = icmp ult i64 %val, 2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check negative offsets, which also need separate address logic.
+define double @f9(double %a, double %b, i64 *%base) {
+; CHECK: f9:
+; CHECK: aghi %r2, -8
+; CHECK: clghsi 0(%r2), 2
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%base, i64 -1
+  %val = load i64 *%ptr
+  %cond = icmp ult i64 %val, 2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Check that CLGHSI does not allow indices.
+define double @f10(double %a, double %b, i64 %base, i64 %index) {
+; CHECK: f10:
+; CHECK: agr {{%r2, %r3|%r3, %r2}}
+; CHECK: clghsi 0({{%r[23]}}), 2
+; CHECK-NEXT: j{{g?}}l
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %add = add i64 %base, %index
+  %ptr = inttoptr i64 %add to i64 *
+  %val = load i64 *%ptr
+  %cond = icmp ult i64 %val, 2
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-36.ll b/test/CodeGen/SystemZ/int-cmp-36.ll
new file mode 100644
index 0000000..0813594
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-36.ll
@@ -0,0 +1,81 @@
+; Test 32-bit comparisons in which the second operand is sign-extended
+; from a PC-relative i16.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@g = global i16 1
+
+; Check signed comparison.
+define i32 @f1(i32 %src1) {
+; CHECK: f1:
+; CHECK: chrl %r2, g
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = sext i16 %val to i32
+  %cond = icmp slt i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
+
+; Check unsigned comparison, which cannot use CHRL.
+define i32 @f2(i32 %src1) {
+; CHECK: f2:
+; CHECK-NOT: chrl
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = sext i16 %val to i32
+  %cond = icmp ult i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
+
+; Check equality.
+define i32 @f3(i32 %src1) {
+; CHECK: f3:
+; CHECK: chrl %r2, g
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = sext i16 %val to i32
+  %cond = icmp eq i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
+
+; Check inequality.
+define i32 @f4(i32 %src1) {
+; CHECK: f4:
+; CHECK: chrl %r2, g
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = sext i16 %val to i32
+  %cond = icmp ne i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-37.ll b/test/CodeGen/SystemZ/int-cmp-37.ll
new file mode 100644
index 0000000..aebd1f6
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-37.ll
@@ -0,0 +1,81 @@
+; Test 32-bit comparisons in which the second operand is zero-extended
+; from a PC-relative i16.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@g = global i16 1
+
+; Check unsigned comparison.
+define i32 @f1(i32 %src1) {
+; CHECK: f1:
+; CHECK: clhrl %r2, g
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = zext i16 %val to i32
+  %cond = icmp ult i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
+
+; Check signed comparison.
+define i32 @f2(i32 %src1) {
+; CHECK: f2:
+; CHECK-NOT: clhrl
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = zext i16 %val to i32
+  %cond = icmp slt i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
+
+; Check equality.
+define i32 @f3(i32 %src1) {
+; CHECK: f3:
+; CHECK: clhrl %r2, g
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = zext i16 %val to i32
+  %cond = icmp eq i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
+
+; Check inequality.
+define i32 @f4(i32 %src1) {
+; CHECK: f4:
+; CHECK: clhrl %r2, g
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = zext i16 %val to i32
+  %cond = icmp ne i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-38.ll b/test/CodeGen/SystemZ/int-cmp-38.ll
new file mode 100644
index 0000000..3470730
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-38.ll
@@ -0,0 +1,78 @@
+; Test 32-bit comparisons in which the second operand is a PC-relative
+; variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@g = global i32 1
+
+; Check signed comparisons.
+define i32 @f1(i32 %src1) {
+; CHECK: f1:
+; CHECK: crl %r2, g
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+entry:
+  %src2 = load i32 *@g
+  %cond = icmp slt i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
+
+; Check unsigned comparisons.
+define i32 @f2(i32 %src1) {
+; CHECK: f2:
+; CHECK: clrl %r2, g
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+entry:
+  %src2 = load i32 *@g
+  %cond = icmp ult i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
+
+; Check equality, which can use CRL or CLRL.
+define i32 @f3(i32 %src1) {
+; CHECK: f3:
+; CHECK: c{{l?}}rl %r2, g
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+entry:
+  %src2 = load i32 *@g
+  %cond = icmp eq i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
+
+; ...likewise inequality.
+define i32 @f4(i32 %src1) {
+; CHECK: f4:
+; CHECK: c{{l?}}rl %r2, g
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+entry:
+  %src2 = load i32 *@g
+  %cond = icmp ne i32 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i32 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i32 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-39.ll b/test/CodeGen/SystemZ/int-cmp-39.ll
new file mode 100644
index 0000000..1129dce
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-39.ll
@@ -0,0 +1,81 @@
+; Test 64-bit comparisons in which the second operand is sign-extended
+; from a PC-relative i16.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@g = global i16 1
+
+; Check signed comparison.
+define i64 @f1(i64 %src1) {
+; CHECK: f1:
+; CHECK: cghrl %r2, g
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = sext i16 %val to i64
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check unsigned comparison, which cannot use CHRL.
+define i64 @f2(i64 %src1) {
+; CHECK: f2:
+; CHECK-NOT: cghrl
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = sext i16 %val to i64
+  %cond = icmp ult i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check equality.
+define i64 @f3(i64 %src1) {
+; CHECK: f3:
+; CHECK: cghrl %r2, g
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = sext i16 %val to i64
+  %cond = icmp eq i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check inequality.
+define i64 @f4(i64 %src1) {
+; CHECK: f4:
+; CHECK: cghrl %r2, g
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = sext i16 %val to i64
+  %cond = icmp ne i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-40.ll b/test/CodeGen/SystemZ/int-cmp-40.ll
new file mode 100644
index 0000000..8d9fd9a
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-40.ll
@@ -0,0 +1,81 @@
+; Test 64-bit comparisons in which the second operand is zero-extended
+; from a PC-relative i16.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@g = global i16 1
+
+; Check unsigned comparison.
+define i64 @f1(i64 %src1) {
+; CHECK: f1:
+; CHECK: clghrl %r2, g
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = zext i16 %val to i64
+  %cond = icmp ult i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check signed comparison.
+define i64 @f2(i64 %src1) {
+; CHECK: f2:
+; CHECK-NOT: clghrl
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = zext i16 %val to i64
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check equality.
+define i64 @f3(i64 %src1) {
+; CHECK: f3:
+; CHECK: clghrl %r2, g
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = zext i16 %val to i64
+  %cond = icmp eq i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check inequality.
+define i64 @f4(i64 %src1) {
+; CHECK: f4:
+; CHECK: clghrl %r2, g
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+entry:
+  %val = load i16 *@g
+  %src2 = zext i16 %val to i64
+  %cond = icmp ne i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-41.ll b/test/CodeGen/SystemZ/int-cmp-41.ll
new file mode 100644
index 0000000..0808bff
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-41.ll
@@ -0,0 +1,81 @@
+; Test 64-bit comparisons in which the second operand is sign-extended
+; from a PC-relative i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@g = global i32 1
+
+; Check signed comparison.
+define i64 @f1(i64 %src1) {
+; CHECK: f1:
+; CHECK: cgfrl %r2, g
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+entry:
+  %val = load i32 *@g
+  %src2 = sext i32 %val to i64
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check unsigned comparison, which cannot use CHRL.
+define i64 @f2(i64 %src1) {
+; CHECK: f2:
+; CHECK-NOT: cgfrl
+; CHECK: br %r14
+entry:
+  %val = load i32 *@g
+  %src2 = sext i32 %val to i64
+  %cond = icmp ult i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check equality.
+define i64 @f3(i64 %src1) {
+; CHECK: f3:
+; CHECK: cgfrl %r2, g
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+entry:
+  %val = load i32 *@g
+  %src2 = sext i32 %val to i64
+  %cond = icmp eq i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check inequality.
+define i64 @f4(i64 %src1) {
+; CHECK: f4:
+; CHECK: cgfrl %r2, g
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+entry:
+  %val = load i32 *@g
+  %src2 = sext i32 %val to i64
+  %cond = icmp ne i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-42.ll b/test/CodeGen/SystemZ/int-cmp-42.ll
new file mode 100644
index 0000000..5c67581
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-42.ll
@@ -0,0 +1,81 @@
+; Test 64-bit comparisons in which the second operand is zero-extended
+; from a PC-relative i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@g = global i32 1
+
+; Check unsigned comparison.
+define i64 @f1(i64 %src1) {
+; CHECK: f1:
+; CHECK: clgfrl %r2, g
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+entry:
+  %val = load i32 *@g
+  %src2 = zext i32 %val to i64
+  %cond = icmp ult i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check signed comparison.
+define i64 @f2(i64 %src1) {
+; CHECK: f2:
+; CHECK-NOT: clgfrl
+; CHECK: br %r14
+entry:
+  %val = load i32 *@g
+  %src2 = zext i32 %val to i64
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check equality.
+define i64 @f3(i64 %src1) {
+; CHECK: f3:
+; CHECK: clgfrl %r2, g
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+entry:
+  %val = load i32 *@g
+  %src2 = zext i32 %val to i64
+  %cond = icmp eq i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check inequality.
+define i64 @f4(i64 %src1) {
+; CHECK: f4:
+; CHECK: clgfrl %r2, g
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+entry:
+  %val = load i32 *@g
+  %src2 = zext i32 %val to i64
+  %cond = icmp ne i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-cmp-43.ll b/test/CodeGen/SystemZ/int-cmp-43.ll
new file mode 100644
index 0000000..f387293
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-43.ll
@@ -0,0 +1,78 @@
+; Test 64-bit comparisons in which the second operand is a PC-relative
+; variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@g = global i64 1
+
+; Check signed comparisons.
+define i64 @f1(i64 %src1) {
+; CHECK: f1:
+; CHECK: cgrl %r2, g
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+entry:
+  %src2 = load i64 *@g
+  %cond = icmp slt i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check unsigned comparisons.
+define i64 @f2(i64 %src1) {
+; CHECK: f2:
+; CHECK: clgrl %r2, g
+; CHECK-NEXT: j{{g?}}l
+; CHECK: br %r14
+entry:
+  %src2 = load i64 *@g
+  %cond = icmp ult i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; Check equality, which can use CRL or CLRL.
+define i64 @f3(i64 %src1) {
+; CHECK: f3:
+; CHECK: c{{l?}}grl %r2, g
+; CHECK-NEXT: j{{g?}}e
+; CHECK: br %r14
+entry:
+  %src2 = load i64 *@g
+  %cond = icmp eq i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
+
+; ...likewise inequality.
+define i64 @f4(i64 %src1) {
+; CHECK: f4:
+; CHECK: c{{l?}}grl %r2, g
+; CHECK-NEXT: j{{g?}}lh
+; CHECK: br %r14
+entry:
+  %src2 = load i64 *@g
+  %cond = icmp ne i64 %src1, %src2
+  br i1 %cond, label %exit, label %mulb
+mulb:
+  %mul = mul i64 %src1, %src1
+  br label %exit
+exit:
+  %res = phi i64 [ %src1, %entry ], [ %mul, %mulb ]
+  ret i64 %res
+}
diff --git a/test/CodeGen/SystemZ/int-const-01.ll b/test/CodeGen/SystemZ/int-const-01.ll
new file mode 100644
index 0000000..a580154
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-const-01.ll
@@ -0,0 +1,91 @@
+; Test loading of 32-bit constants.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check 0.
+define i32 @f1() {
+; CHECK: f1:
+; CHECK: lhi %r2, 0
+; CHECK: br %r14
+  ret i32 0
+}
+
+; Check the high end of the LHI range.
+define i32 @f2() {
+; CHECK: f2:
+; CHECK: lhi %r2, 32767
+; CHECK: br %r14
+  ret i32 32767
+}
+
+; Check the next value up, which must use LLILL instead.
+define i32 @f3() {
+; CHECK: f3:
+; CHECK: llill %r2, 32768
+; CHECK: br %r14
+  ret i32 32768
+}
+
+; Check the high end of the LLILL range.
+define i32 @f4() {
+; CHECK: f4:
+; CHECK: llill %r2, 65535
+; CHECK: br %r14
+  ret i32 65535
+}
+
+; Check the first useful LLILH value, which is the next one up.
+define i32 @f5() {
+; CHECK: f5:
+; CHECK: llilh %r2, 1
+; CHECK: br %r14
+  ret i32 65536
+}
+
+; Check the first useful IILF value, which is the next one up again.
+define i32 @f6() {
+; CHECK: f6:
+; CHECK: iilf %r2, 65537
+; CHECK: br %r14
+  ret i32 65537
+}
+
+; Check the high end of the LLILH range.
+define i32 @f7() {
+; CHECK: f7:
+; CHECK: llilh %r2, 65535
+; CHECK: br %r14
+  ret i32 -65536
+}
+
+; Check the next value up, which must use IILF.
+define i32 @f8() {
+; CHECK: f8:
+; CHECK: iilf %r2, 4294901761
+; CHECK: br %r14
+  ret i32 -65535
+}
+
+; Check the highest useful IILF value, 0xffff7fff
+define i32 @f9() {
+; CHECK: f9:
+; CHECK: iilf %r2, 4294934527
+; CHECK: br %r14
+  ret i32 -32769
+}
+
+; Check the next value up, which should use LHI.
+define i32 @f10() {
+; CHECK: f10:
+; CHECK: lhi %r2, -32768
+; CHECK: br %r14
+  ret i32 -32768
+}
+
+; Check -1.
+define i32 @f11() {
+; CHECK: f11:
+; CHECK: lhi %r2, -1
+; CHECK: br %r14
+  ret i32 -1
+}
diff --git a/test/CodeGen/SystemZ/int-const-02.ll b/test/CodeGen/SystemZ/int-const-02.ll
new file mode 100644
index 0000000..b345e3f
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-const-02.ll
@@ -0,0 +1,251 @@
+; Test loading of 64-bit constants.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check 0.
+define i64 @f1() {
+; CHECK: f1:
+; CHECK: lghi %r2, 0
+; CHECK-NEXT: br %r14
+  ret i64 0
+}
+
+; Check the high end of the LGHI range.
+define i64 @f2() {
+; CHECK: f2:
+; CHECK: lghi %r2, 32767
+; CHECK-NEXT: br %r14
+  ret i64 32767
+}
+
+; Check the next value up, which must use LLILL instead.
+define i64 @f3() {
+; CHECK: f3:
+; CHECK: llill %r2, 32768
+; CHECK-NEXT: br %r14
+  ret i64 32768
+}
+
+; Check the high end of the LLILL range.
+define i64 @f4() {
+; CHECK: f4:
+; CHECK: llill %r2, 65535
+; CHECK-NEXT: br %r14
+  ret i64 65535
+}
+
+; Check the first useful LLILH value, which is the next one up.
+define i64 @f5() {
+; CHECK: f5:
+; CHECK: llilh %r2, 1
+; CHECK-NEXT: br %r14
+  ret i64 65536
+}
+
+; Check the first useful LGFI value, which is the next one up again.
+define i64 @f6() {
+; CHECK: f6:
+; CHECK: lgfi %r2, 65537
+; CHECK-NEXT: br %r14
+  ret i64 65537
+}
+
+; Check the high end of the LGFI range.
+define i64 @f7() {
+; CHECK: f7:
+; CHECK: lgfi %r2, 2147483647
+; CHECK-NEXT: br %r14
+  ret i64 2147483647
+}
+
+; Check the next value up, which should use LLILH instead.
+define i64 @f8() {
+; CHECK: f8:
+; CHECK: llilh %r2, 32768
+; CHECK-NEXT: br %r14
+  ret i64 2147483648
+}
+
+; Check the next value up again, which should use LLILF.
+define i64 @f9() {
+; CHECK: f9:
+; CHECK: llilf %r2, 2147483649
+; CHECK-NEXT: br %r14
+  ret i64 2147483649
+}
+
+; Check the high end of the LLILH range.
+define i64 @f10() {
+; CHECK: f10:
+; CHECK: llilh %r2, 65535
+; CHECK-NEXT: br %r14
+  ret i64 4294901760
+}
+
+; Check the next value up, which must use LLILF.
+define i64 @f11() {
+; CHECK: f11:
+; CHECK: llilf %r2, 4294901761
+; CHECK-NEXT: br %r14
+  ret i64 4294901761
+}
+
+; Check the high end of the LLILF range.
+define i64 @f12() {
+; CHECK: f12:
+; CHECK: llilf %r2, 4294967295
+; CHECK-NEXT: br %r14
+  ret i64 4294967295
+}
+
+; Check the lowest useful LLIHL value, which is the next one up.
+define i64 @f13() {
+; CHECK: f13:
+; CHECK: llihl %r2, 1
+; CHECK-NEXT: br %r14
+  ret i64 4294967296
+}
+
+; Check the next value up, which must use a combination of two instructions.
+define i64 @f14() {
+; CHECK: f14:
+; CHECK: llihl %r2, 1
+; CHECK-NEXT: oill %r2, 1
+; CHECK-NEXT: br %r14
+  ret i64 4294967297
+}
+
+; Check the high end of the OILL range.
+define i64 @f15() {
+; CHECK: f15:
+; CHECK: llihl %r2, 1
+; CHECK-NEXT: oill %r2, 65535
+; CHECK-NEXT: br %r14
+  ret i64 4295032831
+}
+
+; Check the next value up, which should use OILH instead.
+define i64 @f16() {
+; CHECK: f16:
+; CHECK: llihl %r2, 1
+; CHECK-NEXT: oilh %r2, 1
+; CHECK-NEXT: br %r14
+  ret i64 4295032832
+}
+
+; Check the next value up again, which should use OILF.
+define i64 @f17() {
+; CHECK: f17:
+; CHECK: llihl %r2, 1
+; CHECK-NEXT: oilf %r2, 65537
+; CHECK-NEXT: br %r14
+  ret i64 4295032833
+}
+
+; Check the high end of the OILH range.
+define i64 @f18() {
+; CHECK: f18:
+; CHECK: llihl %r2, 1
+; CHECK-NEXT: oilh %r2, 65535
+; CHECK-NEXT: br %r14
+  ret i64 8589869056
+}
+
+; Check the high end of the OILF range.
+define i64 @f19() {
+; CHECK: f19:
+; CHECK: llihl %r2, 1
+; CHECK-NEXT: oilf %r2, 4294967295
+; CHECK-NEXT: br %r14
+  ret i64 8589934591
+}
+
+; Check the high end of the LLIHL range.
+define i64 @f20() {
+; CHECK: f20:
+; CHECK: llihl %r2, 65535
+; CHECK-NEXT: br %r14
+  ret i64 281470681743360
+}
+
+; Check the lowest useful LLIHH value, which is 1<<32 greater than the above.
+define i64 @f21() {
+; CHECK: f21:
+; CHECK: llihh %r2, 1
+; CHECK-NEXT: br %r14
+  ret i64 281474976710656
+}
+
+; Check the lowest useful LLIHF value, which is 1<<32 greater again.
+define i64 @f22() {
+; CHECK: f22:
+; CHECK: llihf %r2, 65537
+; CHECK-NEXT: br %r14
+  ret i64 281479271677952
+}
+
+; Check the highest end of the LLIHH range.
+define i64 @f23() {
+; CHECK: f23:
+; CHECK: llihh %r2, 65535
+; CHECK-NEXT: br %r14
+  ret i64 -281474976710656
+}
+
+; Check the next value up, which must use OILL too.
+define i64 @f24() {
+; CHECK: f24:
+; CHECK: llihh %r2, 65535
+; CHECK-NEXT: oill %r2, 1
+; CHECK-NEXT: br %r14
+  ret i64 -281474976710655
+}
+
+; Check the high end of the LLIHF range.
+define i64 @f25() {
+; CHECK: f25:
+; CHECK: llihf %r2, 4294967295
+; CHECK-NEXT: br %r14
+  ret i64 -4294967296
+}
+
+; Check -1.
+define i64 @f26() {
+; CHECK: f26:
+; CHECK: lghi %r2, -1
+; CHECK-NEXT: br %r14
+  ret i64 -1
+}
+
+; Check the low end of the LGHI range.
+define i64 @f27() {
+; CHECK: f27:
+; CHECK: lghi %r2, -32768
+; CHECK-NEXT: br %r14
+  ret i64 -32768
+}
+
+; Check the next value down, which must use LGFI instead.
+define i64 @f28() {
+; CHECK: f28:
+; CHECK: lgfi %r2, -32769
+; CHECK-NEXT: br %r14
+  ret i64 -32769
+}
+
+; Check the low end of the LGFI range.
+define i64 @f29() {
+; CHECK: f29:
+; CHECK: lgfi %r2, -2147483648
+; CHECK-NEXT: br %r14
+  ret i64 -2147483648
+}
+
+; Check the next value down, which needs a two-instruction sequence.
+define i64 @f30() {
+; CHECK: f30:
+; CHECK: llihf %r2, 4294967295
+; CHECK-NEXT: oilf %r2, 2147483647
+; CHECK-NEXT: br %r14
+  ret i64 -2147483649
+}
diff --git a/test/CodeGen/SystemZ/int-const-03.ll b/test/CodeGen/SystemZ/int-const-03.ll
new file mode 100644
index 0000000..807b7e4
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-const-03.ll
@@ -0,0 +1,166 @@
+; Test moves of integers to byte memory locations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the unsigned range.
+define void @f1(i8 *%ptr) {
+; CHECK: f1:
+; CHECK: mvi 0(%r2), 0
+; CHECK: br %r14
+  store i8 0, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the signed range.
+define void @f2(i8 *%ptr) {
+; CHECK: f2:
+; CHECK: mvi 0(%r2), 127
+; CHECK: br %r14
+  store i8 127, i8 *%ptr
+  ret void
+}
+
+; Check the next value up.
+define void @f3(i8 *%ptr) {
+; CHECK: f3:
+; CHECK: mvi 0(%r2), 128
+; CHECK: br %r14
+  store i8 -128, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the unsigned range.
+define void @f4(i8 *%ptr) {
+; CHECK: f4:
+; CHECK: mvi 0(%r2), 255
+; CHECK: br %r14
+  store i8 255, i8 *%ptr
+  ret void
+}
+
+; Check -1.
+define void @f5(i8 *%ptr) {
+; CHECK: f5:
+; CHECK: mvi 0(%r2), 255
+; CHECK: br %r14
+  store i8 -1, i8 *%ptr
+  ret void
+}
+
+; Check the low end of the signed range.
+define void @f6(i8 *%ptr) {
+; CHECK: f6:
+; CHECK: mvi 0(%r2), 128
+; CHECK: br %r14
+  store i8 -128, i8 *%ptr
+  ret void
+}
+
+; Check the next value down.
+define void @f7(i8 *%ptr) {
+; CHECK: f7:
+; CHECK: mvi 0(%r2), 127
+; CHECK: br %r14
+  store i8 -129, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the MVI range.
+define void @f8(i8 *%src) {
+; CHECK: f8:
+; CHECK: mvi 4095(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4095
+  store i8 42, i8 *%ptr
+  ret void
+}
+
+; Check the next byte up, which should use MVIY instead of MVI.
+define void @f9(i8 *%src) {
+; CHECK: f9:
+; CHECK: mviy 4096(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4096
+  store i8 42, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the MVIY range.
+define void @f10(i8 *%src) {
+; CHECK: f10:
+; CHECK: mviy 524287(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524287
+  store i8 42, i8 *%ptr
+  ret void
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f11(i8 *%src) {
+; CHECK: f11:
+; CHECK: agfi %r2, 524288
+; CHECK: mvi 0(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524288
+  store i8 42, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the negative MVIY range.
+define void @f12(i8 *%src) {
+; CHECK: f12:
+; CHECK: mviy -1(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -1
+  store i8 42, i8 *%ptr
+  ret void
+}
+
+; Check the low end of the MVIY range.
+define void @f13(i8 *%src) {
+; CHECK: f13:
+; CHECK: mviy -524288(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524288
+  store i8 42, i8 *%ptr
+  ret void
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f14(i8 *%src) {
+; CHECK: f14:
+; CHECK: agfi %r2, -524289
+; CHECK: mvi 0(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524289
+  store i8 42, i8 *%ptr
+  ret void
+}
+
+; Check that MVI does not allow an index
+define void @f15(i64 %src, i64 %index) {
+; CHECK: f15:
+; CHECK: agr %r2, %r3
+; CHECK: mvi 4095(%r2), 42
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4095
+  %ptr = inttoptr i64 %add2 to i8 *
+  store i8 42, i8 *%ptr
+  ret void
+}
+
+; Check that MVIY does not allow an index
+define void @f16(i64 %src, i64 %index) {
+; CHECK: f16:
+; CHECK: agr %r2, %r3
+; CHECK: mviy 4096(%r2), 42
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i8 *
+  store i8 42, i8 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-const-04.ll b/test/CodeGen/SystemZ/int-const-04.ll
new file mode 100644
index 0000000..41c7306
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-const-04.ll
@@ -0,0 +1,111 @@
+; Test moves of integers to 2-byte memory locations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the unsigned range.
+define void @f1(i16 *%ptr) {
+; CHECK: f1:
+; CHECK: mvhhi 0(%r2), 0
+; CHECK: br %r14
+  store i16 0, i16 *%ptr
+  ret void
+}
+
+; Check the high end of the signed range.
+define void @f2(i16 *%ptr) {
+; CHECK: f2:
+; CHECK: mvhhi 0(%r2), 32767
+; CHECK: br %r14
+  store i16 32767, i16 *%ptr
+  ret void
+}
+
+; Check the next value up.
+define void @f3(i16 *%ptr) {
+; CHECK: f3:
+; CHECK: mvhhi 0(%r2), -32768
+; CHECK: br %r14
+  store i16 -32768, i16 *%ptr
+  ret void
+}
+
+; Check the high end of the unsigned range.
+define void @f4(i16 *%ptr) {
+; CHECK: f4:
+; CHECK: mvhhi 0(%r2), -1
+; CHECK: br %r14
+  store i16 65535, i16 *%ptr
+  ret void
+}
+
+; Check -1.
+define void @f5(i16 *%ptr) {
+; CHECK: f5:
+; CHECK: mvhhi 0(%r2), -1
+; CHECK: br %r14
+  store i16 -1, i16 *%ptr
+  ret void
+}
+
+; Check the low end of the signed range.
+define void @f6(i16 *%ptr) {
+; CHECK: f6:
+; CHECK: mvhhi 0(%r2), -32768
+; CHECK: br %r14
+  store i16 -32768, i16 *%ptr
+  ret void
+}
+
+; Check the next value down.
+define void @f7(i16 *%ptr) {
+; CHECK: f7:
+; CHECK: mvhhi 0(%r2), 32767
+; CHECK: br %r14
+  store i16 -32769, i16 *%ptr
+  ret void
+}
+
+; Check the high end of the MVHHI range.
+define void @f8(i16 *%a) {
+; CHECK: f8:
+; CHECK: mvhhi 4094(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%a, i64 2047
+  store i16 42, i16 *%ptr
+  ret void
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f9(i16 *%a) {
+; CHECK: f9:
+; CHECK: aghi %r2, 4096
+; CHECK: mvhhi 0(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%a, i64 2048
+  store i16 42, i16 *%ptr
+  ret void
+}
+
+; Check negative displacements, which also need separate address logic.
+define void @f10(i16 *%a) {
+; CHECK: f10:
+; CHECK: aghi %r2, -2
+; CHECK: mvhhi 0(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%a, i64 -1
+  store i16 42, i16 *%ptr
+  ret void
+}
+
+; Check that MVHHI does not allow an index
+define void @f11(i64 %src, i64 %index) {
+; CHECK: f11:
+; CHECK: agr %r2, %r3
+; CHECK: mvhhi 0(%r2), 42
+; CHECK: br %r14
+  %add = add i64 %src, %index
+  %ptr = inttoptr i64 %add to i16 *
+  store i16 42, i16 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-const-05.ll b/test/CodeGen/SystemZ/int-const-05.ll
new file mode 100644
index 0000000..b85fd6b
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-const-05.ll
@@ -0,0 +1,102 @@
+; Test moves of integers to 4-byte memory locations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check moves of zero.
+define void @f1(i32 *%a) {
+; CHECK: f1:
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  store i32 0, i32 *%a
+  ret void
+}
+
+; Check the high end of the signed 16-bit range.
+define void @f2(i32 *%a) {
+; CHECK: f2:
+; CHECK: mvhi 0(%r2), 32767
+; CHECK: br %r14
+  store i32 32767, i32 *%a
+  ret void
+}
+
+; Check the next value up, which can't use MVHI.
+define void @f3(i32 *%a) {
+; CHECK: f3:
+; CHECK-NOT: mvhi
+; CHECK: br %r14
+  store i32 32768, i32 *%a
+  ret void
+}
+
+; Check moves of -1.
+define void @f4(i32 *%a) {
+; CHECK: f4:
+; CHECK: mvhi 0(%r2), -1
+; CHECK: br %r14
+  store i32 -1, i32 *%a
+  ret void
+}
+
+; Check the low end of the MVHI range.
+define void @f5(i32 *%a) {
+; CHECK: f5:
+; CHECK: mvhi 0(%r2), -32768
+; CHECK: br %r14
+  store i32 -32768, i32 *%a
+  ret void
+}
+
+; Check the next value down, which can't use MVHI.
+define void @f6(i32 *%a) {
+; CHECK: f6:
+; CHECK-NOT: mvhi
+; CHECK: br %r14
+  store i32 -32769, i32 *%a
+  ret void
+}
+
+; Check the high end of the MVHI range.
+define void @f7(i32 *%a) {
+; CHECK: f7:
+; CHECK: mvhi 4092(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%a, i64 1023
+  store i32 42, i32 *%ptr
+  ret void
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f8(i32 *%a) {
+; CHECK: f8:
+; CHECK: aghi %r2, 4096
+; CHECK: mvhi 0(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%a, i64 1024
+  store i32 42, i32 *%ptr
+  ret void
+}
+
+; Check negative displacements, which also need separate address logic.
+define void @f9(i32 *%a) {
+; CHECK: f9:
+; CHECK: aghi %r2, -4
+; CHECK: mvhi 0(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%a, i64 -1
+  store i32 42, i32 *%ptr
+  ret void
+}
+
+; Check that MVHI does not allow an index
+define void @f10(i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: agr %r2, %r3
+; CHECK: mvhi 0(%r2), 42
+; CHECK: br %r14
+  %add = add i64 %src, %index
+  %ptr = inttoptr i64 %add to i32 *
+  store i32 42, i32 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-const-06.ll b/test/CodeGen/SystemZ/int-const-06.ll
new file mode 100644
index 0000000..9f14347
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-const-06.ll
@@ -0,0 +1,102 @@
+; Test moves of integers to 8-byte memory locations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check moves of zero.
+define void @f1(i64 *%a) {
+; CHECK: f1:
+; CHECK: mvghi 0(%r2), 0
+; CHECK: br %r14
+  store i64 0, i64 *%a
+  ret void
+}
+
+; Check the high end of the signed 16-bit range.
+define void @f2(i64 *%a) {
+; CHECK: f2:
+; CHECK: mvghi 0(%r2), 32767
+; CHECK: br %r14
+  store i64 32767, i64 *%a
+  ret void
+}
+
+; Check the next value up, which can't use MVGHI.
+define void @f3(i64 *%a) {
+; CHECK: f3:
+; CHECK-NOT: mvghi
+; CHECK: br %r14
+  store i64 32768, i64 *%a
+  ret void
+}
+
+; Check moves of -1.
+define void @f4(i64 *%a) {
+; CHECK: f4:
+; CHECK: mvghi 0(%r2), -1
+; CHECK: br %r14
+  store i64 -1, i64 *%a
+  ret void
+}
+
+; Check the low end of the MVGHI range.
+define void @f5(i64 *%a) {
+; CHECK: f5:
+; CHECK: mvghi 0(%r2), -32768
+; CHECK: br %r14
+  store i64 -32768, i64 *%a
+  ret void
+}
+
+; Check the next value down, which can't use MVGHI.
+define void @f6(i64 *%a) {
+; CHECK: f6:
+; CHECK-NOT: mvghi
+; CHECK: br %r14
+  store i64 -32769, i64 *%a
+  ret void
+}
+
+; Check the high end of the MVGHI range.
+define void @f7(i64 *%a) {
+; CHECK: f7:
+; CHECK: mvghi 4088(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%a, i64 511
+  store i64 42, i64 *%ptr
+  ret void
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f8(i64 *%a) {
+; CHECK: f8:
+; CHECK: aghi %r2, 4096
+; CHECK: mvghi 0(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%a, i64 512
+  store i64 42, i64 *%ptr
+  ret void
+}
+
+; Check negative displacements, which also need separate address logic.
+define void @f9(i64 *%a) {
+; CHECK: f9:
+; CHECK: aghi %r2, -8
+; CHECK: mvghi 0(%r2), 42
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%a, i64 -1
+  store i64 42, i64 *%ptr
+  ret void
+}
+
+; Check that MVGHI does not allow an index
+define void @f10(i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: agr %r2, %r3
+; CHECK: mvghi 0(%r2), 42
+; CHECK: br %r14
+  %add = add i64 %src, %index
+  %ptr = inttoptr i64 %add to i64 *
+  store i64 42, i64 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-conv-01.ll b/test/CodeGen/SystemZ/int-conv-01.ll
new file mode 100644
index 0000000..643ac6a
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-conv-01.ll
@@ -0,0 +1,105 @@
+; Test sign extensions from a byte to an i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register extension, starting with an i32.
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lbr %r2, %r2
+; CHECk: br %r14
+  %byte = trunc i32 %a to i8
+  %ext = sext i8 %byte to i32
+  ret i32 %ext
+}
+
+; ...and again with an i64.
+define i32 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: lbr %r2, %r2
+; CHECk: br %r14
+  %byte = trunc i64 %a to i8
+  %ext = sext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check LB with no displacement.
+define i32 @f3(i8 *%src) {
+; CHECK: f3:
+; CHECK: lb %r2, 0(%r2)
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %ext = sext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check the high end of the LB range.
+define i32 @f4(i8 *%src) {
+; CHECK: f4:
+; CHECK: lb %r2, 524287(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524287
+  %byte = load i8 *%ptr
+  %ext = sext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: agfi %r2, 524288
+; CHECK: lb %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524288
+  %byte = load i8 *%ptr
+  %ext = sext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check the high end of the negative LB range.
+define i32 @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: lb %r2, -1(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -1
+  %byte = load i8 *%ptr
+  %ext = sext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check the low end of the LB range.
+define i32 @f7(i8 *%src) {
+; CHECK: f7:
+; CHECK: lb %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524288
+  %byte = load i8 *%ptr
+  %ext = sext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f8(i8 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r2, -524289
+; CHECK: lb %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524289
+  %byte = load i8 *%ptr
+  %ext = sext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check that LB allows an index
+define i32 @f9(i64 %src, i64 %index) {
+; CHECK: f9:
+; CHECK: lb %r2, 524287(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i8 *
+  %byte = load i8 *%ptr
+  %ext = sext i8 %byte to i32
+  ret i32 %ext
+}
diff --git a/test/CodeGen/SystemZ/int-conv-02.ll b/test/CodeGen/SystemZ/int-conv-02.ll
new file mode 100644
index 0000000..86144d3
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-conv-02.ll
@@ -0,0 +1,114 @@
+; Test zero extensions from a byte to an i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register extension, starting with an i32.
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: llcr %r2, %r2
+; CHECk: br %r14
+  %byte = trunc i32 %a to i8
+  %ext = zext i8 %byte to i32
+  ret i32 %ext
+}
+
+; ...and again with an i64.
+define i32 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: llcr %r2, %r2
+; CHECk: br %r14
+  %byte = trunc i64 %a to i8
+  %ext = zext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check ANDs that are equivalent to zero extension.
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: llcr %r2, %r2
+; CHECk: br %r14
+  %ext = and i32 %a, 255
+  ret i32 %ext
+}
+
+; Check LLC with no displacement.
+define i32 @f4(i8 *%src) {
+; CHECK: f4:
+; CHECK: llc %r2, 0(%r2)
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %ext = zext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check the high end of the LLC range.
+define i32 @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: llc %r2, 524287(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524287
+  %byte = load i8 *%ptr
+  %ext = zext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r2, 524288
+; CHECK: llc %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524288
+  %byte = load i8 *%ptr
+  %ext = zext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check the high end of the negative LLC range.
+define i32 @f7(i8 *%src) {
+; CHECK: f7:
+; CHECK: llc %r2, -1(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -1
+  %byte = load i8 *%ptr
+  %ext = zext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check the low end of the LLC range.
+define i32 @f8(i8 *%src) {
+; CHECK: f8:
+; CHECK: llc %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524288
+  %byte = load i8 *%ptr
+  %ext = zext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f9(i8 *%src) {
+; CHECK: f9:
+; CHECK: agfi %r2, -524289
+; CHECK: llc %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524289
+  %byte = load i8 *%ptr
+  %ext = zext i8 %byte to i32
+  ret i32 %ext
+}
+
+; Check that LLC allows an index
+define i32 @f10(i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: llc %r2, 524287(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i8 *
+  %byte = load i8 *%ptr
+  %ext = zext i8 %byte to i32
+  ret i32 %ext
+}
diff --git a/test/CodeGen/SystemZ/int-conv-03.ll b/test/CodeGen/SystemZ/int-conv-03.ll
new file mode 100644
index 0000000..73b8dbb
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-conv-03.ll
@@ -0,0 +1,105 @@
+; Test sign extensions from a byte to an i64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register extension, starting with an i32.
+define i64 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lgbr %r2, %r2
+; CHECk: br %r14
+  %byte = trunc i32 %a to i8
+  %ext = sext i8 %byte to i64
+  ret i64 %ext
+}
+
+; ...and again with an i64.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: lgbr %r2, %r2
+; CHECk: br %r14
+  %byte = trunc i64 %a to i8
+  %ext = sext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check LGB with no displacement.
+define i64 @f3(i8 *%src) {
+; CHECK: f3:
+; CHECK: lgb %r2, 0(%r2)
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %ext = sext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check the high end of the LGB range.
+define i64 @f4(i8 *%src) {
+; CHECK: f4:
+; CHECK: lgb %r2, 524287(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524287
+  %byte = load i8 *%ptr
+  %ext = sext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: agfi %r2, 524288
+; CHECK: lgb %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524288
+  %byte = load i8 *%ptr
+  %ext = sext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check the high end of the negative LGB range.
+define i64 @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: lgb %r2, -1(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -1
+  %byte = load i8 *%ptr
+  %ext = sext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check the low end of the LGB range.
+define i64 @f7(i8 *%src) {
+; CHECK: f7:
+; CHECK: lgb %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524288
+  %byte = load i8 *%ptr
+  %ext = sext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f8(i8 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r2, -524289
+; CHECK: lgb %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524289
+  %byte = load i8 *%ptr
+  %ext = sext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check that LGB allows an index
+define i64 @f9(i64 %src, i64 %index) {
+; CHECK: f9:
+; CHECK: lgb %r2, 524287(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i8 *
+  %byte = load i8 *%ptr
+  %ext = sext i8 %byte to i64
+  ret i64 %ext
+}
diff --git a/test/CodeGen/SystemZ/int-conv-04.ll b/test/CodeGen/SystemZ/int-conv-04.ll
new file mode 100644
index 0000000..4cec524
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-conv-04.ll
@@ -0,0 +1,114 @@
+; Test zero extensions from a byte to an i64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register extension, starting with an i32.
+define i64 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: llgcr %r2, %r2
+; CHECk: br %r14
+  %byte = trunc i32 %a to i8
+  %ext = zext i8 %byte to i64
+  ret i64 %ext
+}
+
+; ...and again with an i64.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: llgcr %r2, %r2
+; CHECk: br %r14
+  %byte = trunc i64 %a to i8
+  %ext = zext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check ANDs that are equivalent to zero extension.
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: llgcr %r2, %r2
+; CHECk: br %r14
+  %ext = and i64 %a, 255
+  ret i64 %ext
+}
+
+; Check LLGC with no displacement.
+define i64 @f4(i8 *%src) {
+; CHECK: f4:
+; CHECK: llgc %r2, 0(%r2)
+; CHECK: br %r14
+  %byte = load i8 *%src
+  %ext = zext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check the high end of the LLGC range.
+define i64 @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: llgc %r2, 524287(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524287
+  %byte = load i8 *%ptr
+  %ext = zext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r2, 524288
+; CHECK: llgc %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524288
+  %byte = load i8 *%ptr
+  %ext = zext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check the high end of the negative LLGC range.
+define i64 @f7(i8 *%src) {
+; CHECK: f7:
+; CHECK: llgc %r2, -1(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -1
+  %byte = load i8 *%ptr
+  %ext = zext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check the low end of the LLGC range.
+define i64 @f8(i8 *%src) {
+; CHECK: f8:
+; CHECK: llgc %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524288
+  %byte = load i8 *%ptr
+  %ext = zext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f9(i8 *%src) {
+; CHECK: f9:
+; CHECK: agfi %r2, -524289
+; CHECK: llgc %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524289
+  %byte = load i8 *%ptr
+  %ext = zext i8 %byte to i64
+  ret i64 %ext
+}
+
+; Check that LLGC allows an index
+define i64 @f10(i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: llgc %r2, 524287(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i8 *
+  %byte = load i8 *%ptr
+  %ext = zext i8 %byte to i64
+  ret i64 %ext
+}
diff --git a/test/CodeGen/SystemZ/int-conv-05.ll b/test/CodeGen/SystemZ/int-conv-05.ll
new file mode 100644
index 0000000..5358f7d
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-conv-05.ll
@@ -0,0 +1,140 @@
+; Test sign extensions from a halfword to an i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register extension, starting with an i32.
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lhr %r2, %r2
+; CHECk: br %r14
+  %half = trunc i32 %a to i16
+  %ext = sext i16 %half to i32
+  ret i32 %ext
+}
+
+; ...and again with an i64.
+define i32 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: lhr %r2, %r2
+; CHECk: br %r14
+  %half = trunc i64 %a to i16
+  %ext = sext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check the low end of the LH range.
+define i32 @f3(i16 *%src) {
+; CHECK: f3:
+; CHECK: lh %r2, 0(%r2)
+; CHECK: br %r14
+  %half = load i16 *%src
+  %ext = sext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check the high end of the LH range.
+define i32 @f4(i16 *%src) {
+; CHECK: f4:
+; CHECK: lh %r2, 4094(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 2047
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check the next halfword up, which needs LHY rather than LH.
+define i32 @f5(i16 *%src) {
+; CHECK: f5:
+; CHECK: lhy %r2, 4096(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 2048
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check the high end of the LHY range.
+define i32 @f6(i16 *%src) {
+; CHECK: f6:
+; CHECK: lhy %r2, 524286(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262143
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f7(i16 *%src) {
+; CHECK: f7:
+; CHECK: agfi %r2, 524288
+; CHECK: lh %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262144
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check the high end of the negative LHY range.
+define i32 @f8(i16 *%src) {
+; CHECK: f8:
+; CHECK: lhy %r2, -2(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -1
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check the low end of the LHY range.
+define i32 @f9(i16 *%src) {
+; CHECK: f9:
+; CHECK: lhy %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262144
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check the next halfword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f10(i16 *%src) {
+; CHECK: f10:
+; CHECK: agfi %r2, -524290
+; CHECK: lh %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262145
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check that LH allows an index
+define i32 @f11(i64 %src, i64 %index) {
+; CHECK: f11:
+; CHECK: lh %r2, 4094(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4094
+  %ptr = inttoptr i64 %add2 to i16 *
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check that LH allows an index
+define i32 @f12(i64 %src, i64 %index) {
+; CHECK: f12:
+; CHECK: lhy %r2, 4096(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i16 *
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i32
+  ret i32 %ext
+}
diff --git a/test/CodeGen/SystemZ/int-conv-06.ll b/test/CodeGen/SystemZ/int-conv-06.ll
new file mode 100644
index 0000000..64af612d
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-conv-06.ll
@@ -0,0 +1,114 @@
+; Test zero extensions from a halfword to an i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register extension, starting with an i32.
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: llhr %r2, %r2
+; CHECk: br %r14
+  %half = trunc i32 %a to i16
+  %ext = zext i16 %half to i32
+  ret i32 %ext
+}
+
+; ...and again with an i64.
+define i32 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: llhr %r2, %r2
+; CHECk: br %r14
+  %half = trunc i64 %a to i16
+  %ext = zext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check ANDs that are equivalent to zero extension.
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: llhr %r2, %r2
+; CHECk: br %r14
+  %ext = and i32 %a, 65535
+  ret i32 %ext
+}
+
+; Check LLH with no displacement.
+define i32 @f4(i16 *%src) {
+; CHECK: f4:
+; CHECK: llh %r2, 0(%r2)
+; CHECK: br %r14
+  %half = load i16 *%src
+  %ext = zext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check the high end of the LLH range.
+define i32 @f5(i16 *%src) {
+; CHECK: f5:
+; CHECK: llh %r2, 524286(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262143
+  %half = load i16 *%ptr
+  %ext = zext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f6(i16 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r2, 524288
+; CHECK: llh %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262144
+  %half = load i16 *%ptr
+  %ext = zext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check the high end of the negative LLH range.
+define i32 @f7(i16 *%src) {
+; CHECK: f7:
+; CHECK: llh %r2, -2(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -1
+  %half = load i16 *%ptr
+  %ext = zext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check the low end of the LLH range.
+define i32 @f8(i16 *%src) {
+; CHECK: f8:
+; CHECK: llh %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262144
+  %half = load i16 *%ptr
+  %ext = zext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check the next halfword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f9(i16 *%src) {
+; CHECK: f9:
+; CHECK: agfi %r2, -524290
+; CHECK: llh %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262145
+  %half = load i16 *%ptr
+  %ext = zext i16 %half to i32
+  ret i32 %ext
+}
+
+; Check that LLH allows an index
+define i32 @f10(i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: llh %r2, 524287(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i16 *
+  %half = load i16 *%ptr
+  %ext = zext i16 %half to i32
+  ret i32 %ext
+}
diff --git a/test/CodeGen/SystemZ/int-conv-07.ll b/test/CodeGen/SystemZ/int-conv-07.ll
new file mode 100644
index 0000000..041caa2
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-conv-07.ll
@@ -0,0 +1,105 @@
+; Test sign extensions from a halfword to an i64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register extension, starting with an i32.
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: lghr %r2, %r2
+; CHECk: br %r14
+  %half = trunc i64 %a to i16
+  %ext = sext i16 %half to i64
+  ret i64 %ext
+}
+
+; ...and again with an i64.
+define i64 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: lghr %r2, %r2
+; CHECk: br %r14
+  %half = trunc i32 %a to i16
+  %ext = sext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check LGH with no displacement.
+define i64 @f3(i16 *%src) {
+; CHECK: f3:
+; CHECK: lgh %r2, 0(%r2)
+; CHECK: br %r14
+  %half = load i16 *%src
+  %ext = sext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check the high end of the LGH range.
+define i64 @f4(i16 *%src) {
+; CHECK: f4:
+; CHECK: lgh %r2, 524286(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262143
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f5(i16 *%src) {
+; CHECK: f5:
+; CHECK: agfi %r2, 524288
+; CHECK: lgh %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262144
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check the high end of the negative LGH range.
+define i64 @f6(i16 *%src) {
+; CHECK: f6:
+; CHECK: lgh %r2, -2(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -1
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check the low end of the LGH range.
+define i64 @f7(i16 *%src) {
+; CHECK: f7:
+; CHECK: lgh %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262144
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check the next halfword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f8(i16 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r2, -524290
+; CHECK: lgh %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262145
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check that LGH allows an index.
+define i64 @f9(i64 %src, i64 %index) {
+; CHECK: f9:
+; CHECK: lgh %r2, 524287(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i16 *
+  %half = load i16 *%ptr
+  %ext = sext i16 %half to i64
+  ret i64 %ext
+}
diff --git a/test/CodeGen/SystemZ/int-conv-08.ll b/test/CodeGen/SystemZ/int-conv-08.ll
new file mode 100644
index 0000000..3d7f966
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-conv-08.ll
@@ -0,0 +1,114 @@
+; Test zero extensions from a halfword to an i64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register extension, starting with an i32.
+define i64 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: llghr %r2, %r2
+; CHECk: br %r14
+  %half = trunc i32 %a to i16
+  %ext = zext i16 %half to i64
+  ret i64 %ext
+}
+
+; ...and again with an i64.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: llghr %r2, %r2
+; CHECk: br %r14
+  %half = trunc i64 %a to i16
+  %ext = zext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check ANDs that are equivalent to zero extension.
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: llghr %r2, %r2
+; CHECk: br %r14
+  %ext = and i64 %a, 65535
+  ret i64 %ext
+}
+
+; Check LLGH with no displacement.
+define i64 @f4(i16 *%src) {
+; CHECK: f4:
+; CHECK: llgh %r2, 0(%r2)
+; CHECK: br %r14
+  %half = load i16 *%src
+  %ext = zext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check the high end of the LLGH range.
+define i64 @f5(i16 *%src) {
+; CHECK: f5:
+; CHECK: llgh %r2, 524286(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262143
+  %half = load i16 *%ptr
+  %ext = zext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f6(i16 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r2, 524288
+; CHECK: llgh %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262144
+  %half = load i16 *%ptr
+  %ext = zext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check the high end of the negative LLGH range.
+define i64 @f7(i16 *%src) {
+; CHECK: f7:
+; CHECK: llgh %r2, -2(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -1
+  %half = load i16 *%ptr
+  %ext = zext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check the low end of the LLGH range.
+define i64 @f8(i16 *%src) {
+; CHECK: f8:
+; CHECK: llgh %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262144
+  %half = load i16 *%ptr
+  %ext = zext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check the next halfword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f9(i16 *%src) {
+; CHECK: f9:
+; CHECK: agfi %r2, -524290
+; CHECK: llgh %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262145
+  %half = load i16 *%ptr
+  %ext = zext i16 %half to i64
+  ret i64 %ext
+}
+
+; Check that LLGH allows an index
+define i64 @f10(i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: llgh %r2, 524287(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i16 *
+  %half = load i16 *%ptr
+  %ext = zext i16 %half to i64
+  ret i64 %ext
+}
diff --git a/test/CodeGen/SystemZ/int-conv-09.ll b/test/CodeGen/SystemZ/int-conv-09.ll
new file mode 100644
index 0000000..6e93886
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-conv-09.ll
@@ -0,0 +1,104 @@
+; Test sign extensions from an i32 to an i64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register extension, starting with an i32.
+define i64 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lgfr %r2, %r2
+; CHECk: br %r14
+  %ext = sext i32 %a to i64
+  ret i64 %ext
+}
+
+; ...and again with an i64.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: lgfr %r2, %r2
+; CHECk: br %r14
+  %word = trunc i64 %a to i32
+  %ext = sext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check LGF with no displacement.
+define i64 @f3(i32 *%src) {
+; CHECK: f3:
+; CHECK: lgf %r2, 0(%r2)
+; CHECK: br %r14
+  %word = load i32 *%src
+  %ext = sext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check the high end of the LGF range.
+define i64 @f4(i32 *%src) {
+; CHECK: f4:
+; CHECK: lgf %r2, 524284(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %word = load i32 *%ptr
+  %ext = sext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f5(i32 *%src) {
+; CHECK: f5:
+; CHECK: agfi %r2, 524288
+; CHECK: lgf %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %word = load i32 *%ptr
+  %ext = sext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check the high end of the negative LGF range.
+define i64 @f6(i32 *%src) {
+; CHECK: f6:
+; CHECK: lgf %r2, -4(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %word = load i32 *%ptr
+  %ext = sext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check the low end of the LGF range.
+define i64 @f7(i32 *%src) {
+; CHECK: f7:
+; CHECK: lgf %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %word = load i32 *%ptr
+  %ext = sext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f8(i32 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r2, -524292
+; CHECK: lgf %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %word = load i32 *%ptr
+  %ext = sext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check that LGF allows an index.
+define i64 @f9(i64 %src, i64 %index) {
+; CHECK: f9:
+; CHECK: lgf %r2, 524287(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i32 *
+  %word = load i32 *%ptr
+  %ext = sext i32 %word to i64
+  ret i64 %ext
+}
diff --git a/test/CodeGen/SystemZ/int-conv-10.ll b/test/CodeGen/SystemZ/int-conv-10.ll
new file mode 100644
index 0000000..918bc1d
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-conv-10.ll
@@ -0,0 +1,113 @@
+; Test zero extensions from an i32 to an i64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register extension, starting with an i32.
+define i64 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: llgfr %r2, %r2
+; CHECk: br %r14
+  %ext = zext i32 %a to i64
+  ret i64 %ext
+}
+
+; ...and again with an i64.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: llgfr %r2, %r2
+; CHECk: br %r14
+  %word = trunc i64 %a to i32
+  %ext = zext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check ANDs that are equivalent to zero extension.
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: llgfr %r2, %r2
+; CHECk: br %r14
+  %ext = and i64 %a, 4294967295
+  ret i64 %ext
+}
+
+; Check LLGF with no displacement.
+define i64 @f4(i32 *%src) {
+; CHECK: f4:
+; CHECK: llgf %r2, 0(%r2)
+; CHECK: br %r14
+  %word = load i32 *%src
+  %ext = zext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check the high end of the LLGF range.
+define i64 @f5(i32 *%src) {
+; CHECK: f5:
+; CHECK: llgf %r2, 524284(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %word = load i32 *%ptr
+  %ext = zext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f6(i32 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r2, 524288
+; CHECK: llgf %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %word = load i32 *%ptr
+  %ext = zext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check the high end of the negative LLGF range.
+define i64 @f7(i32 *%src) {
+; CHECK: f7:
+; CHECK: llgf %r2, -4(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %word = load i32 *%ptr
+  %ext = zext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check the low end of the LLGF range.
+define i64 @f8(i32 *%src) {
+; CHECK: f8:
+; CHECK: llgf %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %word = load i32 *%ptr
+  %ext = zext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f9(i32 *%src) {
+; CHECK: f9:
+; CHECK: agfi %r2, -524292
+; CHECK: llgf %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %word = load i32 *%ptr
+  %ext = zext i32 %word to i64
+  ret i64 %ext
+}
+
+; Check that LLGF allows an index.
+define i64 @f10(i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: llgf %r2, 524287(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i32 *
+  %word = load i32 *%ptr
+  %ext = zext i32 %word to i64
+  ret i64 %ext
+}
diff --git a/test/CodeGen/SystemZ/int-div-01.ll b/test/CodeGen/SystemZ/int-div-01.ll
new file mode 100644
index 0000000..492ece9
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-div-01.ll
@@ -0,0 +1,190 @@
+; Test 32-bit signed division and remainder.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register division.  The result is in the second of the two registers.
+define void @f1(i32 *%dest, i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: lgfr %r1, %r3
+; CHECK: dsgfr %r0, %r4
+; CHECK: st %r1, 0(%r2)
+; CHECK: br %r14
+  %div = sdiv i32 %a, %b
+  store i32 %div, i32 *%dest
+  ret void
+}
+
+; Test register remainder.  The result is in the first of the two registers.
+define void @f2(i32 *%dest, i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: lgfr %r1, %r3
+; CHECK: dsgfr %r0, %r4
+; CHECK: st %r0, 0(%r2)
+; CHECK: br %r14
+  %rem = srem i32 %a, %b
+  store i32 %rem, i32 *%dest
+  ret void
+}
+
+; Test that division and remainder use a single instruction.
+define i32 @f3(i32 %dummy, i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK-NOT: %r2
+; CHECK: lgfr %r3, %r3
+; CHECK-NOT: %r2
+; CHECK: dsgfr %r2, %r4
+; CHECK-NOT: dsgfr
+; CHECK: or %r2, %r3
+; CHECK: br %r14
+  %div = sdiv i32 %a, %b
+  %rem = srem i32 %a, %b
+  %or = or i32 %rem, %div
+  ret i32 %or
+}
+
+; Check that the sign extension of the dividend is elided when the argument
+; is already sign-extended.
+define i32 @f4(i32 %dummy, i32 signext %a, i32 %b) {
+; CHECK: f4:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: dsgfr %r2, %r4
+; CHECK-NOT: dsgfr
+; CHECK: or %r2, %r3
+; CHECK: br %r14
+  %div = sdiv i32 %a, %b
+  %rem = srem i32 %a, %b
+  %or = or i32 %rem, %div
+  ret i32 %or
+}
+
+; Test that memory dividends are loaded using sign extension (LGF).
+define i32 @f5(i32 %dummy, i32 *%src, i32 %b) {
+; CHECK: f5:
+; CHECK-NOT: %r2
+; CHECK: lgf %r3, 0(%r3)
+; CHECK-NOT: %r2
+; CHECK: dsgfr %r2, %r4
+; CHECK-NOT: dsgfr
+; CHECK: or %r2, %r3
+; CHECK: br %r14
+  %a = load i32 *%src
+  %div = sdiv i32 %a, %b
+  %rem = srem i32 %a, %b
+  %or = or i32 %rem, %div
+  ret i32 %or
+}
+
+; Test memory division with no displacement.
+define void @f6(i32 *%dest, i32 %a, i32 *%src) {
+; CHECK: f6:
+; CHECK: lgfr %r1, %r3
+; CHECK: dsgf %r0, 0(%r4)
+; CHECK: st %r1, 0(%r2)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %div = sdiv i32 %a, %b
+  store i32 %div, i32 *%dest
+  ret void
+}
+
+; Test memory remainder with no displacement.
+define void @f7(i32 *%dest, i32 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK: lgfr %r1, %r3
+; CHECK: dsgf %r0, 0(%r4)
+; CHECK: st %r0, 0(%r2)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %rem = srem i32 %a, %b
+  store i32 %rem, i32 *%dest
+  ret void
+}
+
+; Test both memory division and memory remainder.
+define i32 @f8(i32 %dummy, i32 %a, i32 *%src) {
+; CHECK: f8:
+; CHECK-NOT: %r2
+; CHECK: lgfr %r3, %r3
+; CHECK-NOT: %r2
+; CHECK: dsgf %r2, 0(%r4)
+; CHECK-NOT: {{dsgf|dsgfr}}
+; CHECK: or %r2, %r3
+; CHECK: br %r14
+  %b = load i32 *%src
+  %div = sdiv i32 %a, %b
+  %rem = srem i32 %a, %b
+  %or = or i32 %rem, %div
+  ret i32 %or
+}
+
+; Check the high end of the DSGF range.
+define i32 @f9(i32 %dummy, i32 %a, i32 *%src) {
+; CHECK: f9:
+; CHECK: dsgf %r2, 524284(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %rem = srem i32 %a, %b
+  ret i32 %rem
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f10(i32 %dummy, i32 %a, i32 *%src) {
+; CHECK: f10:
+; CHECK: agfi %r4, 524288
+; CHECK: dsgf %r2, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %rem = srem i32 %a, %b
+  ret i32 %rem
+}
+
+; Check the high end of the negative aligned DSGF range.
+define i32 @f11(i32 %dummy, i32 %a, i32 *%src) {
+; CHECK: f11:
+; CHECK: dsgf %r2, -4(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %rem = srem i32 %a, %b
+  ret i32 %rem
+}
+
+; Check the low end of the DSGF range.
+define i32 @f12(i32 %dummy, i32 %a, i32 *%src) {
+; CHECK: f12:
+; CHECK: dsgf %r2, -524288(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %rem = srem i32 %a, %b
+  ret i32 %rem
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f13(i32 %dummy, i32 %a, i32 *%src) {
+; CHECK: f13:
+; CHECK: agfi %r4, -524292
+; CHECK: dsgf %r2, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %rem = srem i32 %a, %b
+  ret i32 %rem
+}
+
+; Check that DSGF allows an index.
+define i32 @f14(i32 %dummy, i32 %a, i64 %src, i64 %index) {
+; CHECK: f14:
+; CHECK: dsgf %r2, 524287(%r5,%r4)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %rem = srem i32 %a, %b
+  ret i32 %rem
+}
diff --git a/test/CodeGen/SystemZ/int-div-02.ll b/test/CodeGen/SystemZ/int-div-02.ll
new file mode 100644
index 0000000..7954384
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-div-02.ll
@@ -0,0 +1,166 @@
+; Test 32-bit unsigned division and remainder.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register division.  The result is in the second of the two registers.
+define void @f1(i32 %dummy, i32 %a, i32 %b, i32 *%dest) {
+; CHECK: f1:
+; CHECK-NOT: %r3
+; CHECK: {{llill|lhi}} %r2, 0
+; CHECK-NOT: %r3
+; CHECK: dlr %r2, %r4
+; CHECK: st %r3, 0(%r5)
+; CHECK: br %r14
+  %div = udiv i32 %a, %b
+  store i32 %div, i32 *%dest
+  ret void
+}
+
+; Test register remainder.  The result is in the first of the two registers.
+define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%dest) {
+; CHECK: f2:
+; CHECK-NOT: %r3
+; CHECK: {{llill|lhi}} %r2, 0
+; CHECK-NOT: %r3
+; CHECK: dlr %r2, %r4
+; CHECK: st %r2, 0(%r5)
+; CHECK: br %r14
+  %rem = urem i32 %a, %b
+  store i32 %rem, i32 *%dest
+  ret void
+}
+
+; Test that division and remainder use a single instruction.
+define i32 @f3(i32 %dummy1, i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK-NOT: %r3
+; CHECK: {{llill|lhi}} %r2, 0
+; CHECK-NOT: %r3
+; CHECK: dlr %r2, %r4
+; CHECK-NOT: dlr
+; CHECK: or %r2, %r3
+; CHECK: br %r14
+  %div = udiv i32 %a, %b
+  %rem = urem i32 %a, %b
+  %or = or i32 %rem, %div
+  ret i32 %or
+}
+
+; Test memory division with no displacement.
+define void @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) {
+; CHECK: f4:
+; CHECK-NOT: %r3
+; CHECK: {{llill|lhi}} %r2, 0
+; CHECK-NOT: %r3
+; CHECK: dl %r2, 0(%r4)
+; CHECK: st %r3, 0(%r5)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %div = udiv i32 %a, %b
+  store i32 %div, i32 *%dest
+  ret void
+}
+
+; Test memory remainder with no displacement.
+define void @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%dest) {
+; CHECK: f5:
+; CHECK-NOT: %r3
+; CHECK: {{llill|lhi}} %r2, 0
+; CHECK-NOT: %r3
+; CHECK: dl %r2, 0(%r4)
+; CHECK: st %r2, 0(%r5)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %rem = urem i32 %a, %b
+  store i32 %rem, i32 *%dest
+  ret void
+}
+
+; Test both memory division and memory remainder.
+define i32 @f6(i32 %dummy, i32 %a, i32 *%src) {
+; CHECK: f6:
+; CHECK-NOT: %r3
+; CHECK: {{llill|lhi}} %r2, 0
+; CHECK-NOT: %r3
+; CHECK: dl %r2, 0(%r4)
+; CHECK-NOT: {{dl|dlr}}
+; CHECK: or %r2, %r3
+; CHECK: br %r14
+  %b = load i32 *%src
+  %div = udiv i32 %a, %b
+  %rem = urem i32 %a, %b
+  %or = or i32 %rem, %div
+  ret i32 %or
+}
+
+; Check the high end of the DL range.
+define i32 @f7(i32 %dummy, i32 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK: dl %r2, 524284(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %rem = urem i32 %a, %b
+  ret i32 %rem
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f8(i32 %dummy, i32 %a, i32 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r4, 524288
+; CHECK: dl %r2, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %rem = urem i32 %a, %b
+  ret i32 %rem
+}
+
+; Check the high end of the negative aligned DL range.
+define i32 @f9(i32 %dummy, i32 %a, i32 *%src) {
+; CHECK: f9:
+; CHECK: dl %r2, -4(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %rem = urem i32 %a, %b
+  ret i32 %rem
+}
+
+; Check the low end of the DL range.
+define i32 @f10(i32 %dummy, i32 %a, i32 *%src) {
+; CHECK: f10:
+; CHECK: dl %r2, -524288(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %rem = urem i32 %a, %b
+  ret i32 %rem
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f11(i32 %dummy, i32 %a, i32 *%src) {
+; CHECK: f11:
+; CHECK: agfi %r4, -524292
+; CHECK: dl %r2, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %rem = urem i32 %a, %b
+  ret i32 %rem
+}
+
+; Check that DL allows an index.
+define i32 @f12(i32 %dummy, i32 %a, i64 %src, i64 %index) {
+; CHECK: f12:
+; CHECK: dl %r2, 524287(%r5,%r4)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %rem = urem i32 %a, %b
+  ret i32 %rem
+}
diff --git a/test/CodeGen/SystemZ/int-div-03.ll b/test/CodeGen/SystemZ/int-div-03.ll
new file mode 100644
index 0000000..b950f2b
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-div-03.ll
@@ -0,0 +1,189 @@
+; Test 64-bit signed division and remainder when the divisor is
+; a signed-extended i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register division.  The result is in the second of the two registers.
+define void @f1(i64 %dummy, i64 %a, i32 %b, i64 *%dest) {
+; CHECK: f1:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: dsgfr %r2, %r4
+; CHECK: stg %r3, 0(%r5)
+; CHECK: br %r14
+  %bext = sext i32 %b to i64
+  %div = sdiv i64 %a, %bext
+  store i64 %div, i64 *%dest
+  ret void
+}
+
+; Test register remainder.  The result is in the first of the two registers.
+define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%dest) {
+; CHECK: f2:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: dsgfr %r2, %r4
+; CHECK: stg %r2, 0(%r5)
+; CHECK: br %r14
+  %bext = sext i32 %b to i64
+  %rem = srem i64 %a, %bext
+  store i64 %rem, i64 *%dest
+  ret void
+}
+
+; Test that division and remainder use a single instruction.
+define i64 @f3(i64 %dummy, i64 %a, i32 %b) {
+; CHECK: f3:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: dsgfr %r2, %r4
+; CHECK: ogr %r2, %r3
+; CHECK: br %r14
+  %bext = sext i32 %b to i64
+  %div = sdiv i64 %a, %bext
+  %rem = srem i64 %a, %bext
+  %or = or i64 %rem, %div
+  ret i64 %or
+}
+
+; Test register division when the dividend is zero rather than sign extended.
+; We can't use dsgfr here
+define void @f4(i64 %dummy, i64 %a, i32 %b, i64 *%dest) {
+; CHECK: f4:
+; CHECK-NOT: dsgfr
+; CHECK: br %r14
+  %bext = zext i32 %b to i64
+  %div = sdiv i64 %a, %bext
+  store i64 %div, i64 *%dest
+  ret void
+}
+
+; ...likewise remainder.
+define void @f5(i64 %dummy, i64 %a, i32 %b, i64 *%dest) {
+; CHECK: f5:
+; CHECK-NOT: dsgfr
+; CHECK: br %r14
+  %bext = zext i32 %b to i64
+  %rem = srem i64 %a, %bext
+  store i64 %rem, i64 *%dest
+  ret void
+}
+
+; Test memory division with no displacement.
+define void @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) {
+; CHECK: f6:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: dsgf %r2, 0(%r4)
+; CHECK: stg %r3, 0(%r5)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %bext = sext i32 %b to i64
+  %div = sdiv i64 %a, %bext
+  store i64 %div, i64 *%dest
+  ret void
+}
+
+; Test memory remainder with no displacement.
+define void @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%dest) {
+; CHECK: f7:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: dsgf %r2, 0(%r4)
+; CHECK: stg %r2, 0(%r5)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %bext = sext i32 %b to i64
+  %rem = srem i64 %a, %bext
+  store i64 %rem, i64 *%dest
+  ret void
+}
+
+; Test both memory division and memory remainder.
+define i64 @f8(i64 %dummy, i64 %a, i32 *%src) {
+; CHECK: f8:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: dsgf %r2, 0(%r4)
+; CHECK-NOT: {{dsgf|dsgfr}}
+; CHECK: ogr %r2, %r3
+; CHECK: br %r14
+  %b = load i32 *%src
+  %bext = sext i32 %b to i64
+  %div = sdiv i64 %a, %bext
+  %rem = srem i64 %a, %bext
+  %or = or i64 %rem, %div
+  ret i64 %or
+}
+
+; Check the high end of the DSGF range.
+define i64 @f9(i64 %dummy, i64 %a, i32 *%src) {
+; CHECK: f9:
+; CHECK: dsgf %r2, 524284(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %rem = srem i64 %a, %bext
+  ret i64 %rem
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f10(i64 %dummy, i64 %a, i32 *%src) {
+; CHECK: f10:
+; CHECK: agfi %r4, 524288
+; CHECK: dsgf %r2, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %rem = srem i64 %a, %bext
+  ret i64 %rem
+}
+
+; Check the high end of the negative aligned DSGF range.
+define i64 @f11(i64 %dummy, i64 %a, i32 *%src) {
+; CHECK: f11:
+; CHECK: dsgf %r2, -4(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %rem = srem i64 %a, %bext
+  ret i64 %rem
+}
+
+; Check the low end of the DSGF range.
+define i64 @f12(i64 %dummy, i64 %a, i32 *%src) {
+; CHECK: f12:
+; CHECK: dsgf %r2, -524288(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %rem = srem i64 %a, %bext
+  ret i64 %rem
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f13(i64 %dummy, i64 %a, i32 *%src) {
+; CHECK: f13:
+; CHECK: agfi %r4, -524292
+; CHECK: dsgf %r2, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %rem = srem i64 %a, %bext
+  ret i64 %rem
+}
+
+; Check that DSGF allows an index.
+define i64 @f14(i64 %dummy, i64 %a, i64 %src, i64 %index) {
+; CHECK: f14:
+; CHECK: dsgf %r2, 524287(%r5,%r4)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %rem = srem i64 %a, %bext
+  ret i64 %rem
+}
diff --git a/test/CodeGen/SystemZ/int-div-04.ll b/test/CodeGen/SystemZ/int-div-04.ll
new file mode 100644
index 0000000..3f72be9
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-div-04.ll
@@ -0,0 +1,154 @@
+; Testg 64-bit signed division and remainder.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Testg register division.  The result is in the second of the two registers.
+define void @f1(i64 %dummy, i64 %a, i64 %b, i64 *%dest) {
+; CHECK: f1:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: dsgr %r2, %r4
+; CHECK: stg %r3, 0(%r5)
+; CHECK: br %r14
+  %div = sdiv i64 %a, %b
+  store i64 %div, i64 *%dest
+  ret void
+}
+
+; Testg register remainder.  The result is in the first of the two registers.
+define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%dest) {
+; CHECK: f2:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: dsgr %r2, %r4
+; CHECK: stg %r2, 0(%r5)
+; CHECK: br %r14
+  %rem = srem i64 %a, %b
+  store i64 %rem, i64 *%dest
+  ret void
+}
+
+; Testg that division and remainder use a single instruction.
+define i64 @f3(i64 %dummy1, i64 %a, i64 %b) {
+; CHECK: f3:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: dsgr %r2, %r4
+; CHECK-NOT: dsgr
+; CHECK: ogr %r2, %r3
+; CHECK: br %r14
+  %div = sdiv i64 %a, %b
+  %rem = srem i64 %a, %b
+  %or = or i64 %rem, %div
+  ret i64 %or
+}
+
+; Testg memory division with no displacement.
+define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
+; CHECK: f4:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: dsg %r2, 0(%r4)
+; CHECK: stg %r3, 0(%r5)
+; CHECK: br %r14
+  %b = load i64 *%src
+  %div = sdiv i64 %a, %b
+  store i64 %div, i64 *%dest
+  ret void
+}
+
+; Testg memory remainder with no displacement.
+define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
+; CHECK: f5:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: dsg %r2, 0(%r4)
+; CHECK: stg %r2, 0(%r5)
+; CHECK: br %r14
+  %b = load i64 *%src
+  %rem = srem i64 %a, %b
+  store i64 %rem, i64 *%dest
+  ret void
+}
+
+; Testg both memory division and memory remainder.
+define i64 @f6(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f6:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: dsg %r2, 0(%r4)
+; CHECK-NOT: {{dsg|dsgr}}
+; CHECK: ogr %r2, %r3
+; CHECK: br %r14
+  %b = load i64 *%src
+  %div = sdiv i64 %a, %b
+  %rem = srem i64 %a, %b
+  %or = or i64 %rem, %div
+  ret i64 %or
+}
+
+; Check the high end of the DSG range.
+define i64 @f7(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f7:
+; CHECK: dsg %r2, 524280(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %b = load i64 *%ptr
+  %rem = srem i64 %a, %b
+  ret i64 %rem
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f8(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r4, 524288
+; CHECK: dsg %r2, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %b = load i64 *%ptr
+  %rem = srem i64 %a, %b
+  ret i64 %rem
+}
+
+; Check the high end of the negative aligned DSG range.
+define i64 @f9(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f9:
+; CHECK: dsg %r2, -8(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -1
+  %b = load i64 *%ptr
+  %rem = srem i64 %a, %b
+  ret i64 %rem
+}
+
+; Check the low end of the DSG range.
+define i64 @f10(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f10:
+; CHECK: dsg %r2, -524288(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %b = load i64 *%ptr
+  %rem = srem i64 %a, %b
+  ret i64 %rem
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f11(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f11:
+; CHECK: agfi %r4, -524296
+; CHECK: dsg %r2, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %b = load i64 *%ptr
+  %rem = srem i64 %a, %b
+  ret i64 %rem
+}
+
+; Check that DSG allows an index.
+define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) {
+; CHECK: f12:
+; CHECK: dsg %r2, 524287(%r5,%r4)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i64 *
+  %b = load i64 *%ptr
+  %rem = srem i64 %a, %b
+  ret i64 %rem
+}
diff --git a/test/CodeGen/SystemZ/int-div-05.ll b/test/CodeGen/SystemZ/int-div-05.ll
new file mode 100644
index 0000000..04f622b
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-div-05.ll
@@ -0,0 +1,166 @@
+; Testg 64-bit unsigned division and remainder.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Testg register division.  The result is in the second of the two registers.
+define void @f1(i64 %dummy, i64 %a, i64 %b, i64 *%dest) {
+; CHECK: f1:
+; CHECK-NOT: %r3
+; CHECK: {{llill|lghi}} %r2, 0
+; CHECK-NOT: %r3
+; CHECK: dlgr %r2, %r4
+; CHECK: stg %r3, 0(%r5)
+; CHECK: br %r14
+  %div = udiv i64 %a, %b
+  store i64 %div, i64 *%dest
+  ret void
+}
+
+; Testg register remainder.  The result is in the first of the two registers.
+define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%dest) {
+; CHECK: f2:
+; CHECK-NOT: %r3
+; CHECK: {{llill|lghi}} %r2, 0
+; CHECK-NOT: %r3
+; CHECK: dlgr %r2, %r4
+; CHECK: stg %r2, 0(%r5)
+; CHECK: br %r14
+  %rem = urem i64 %a, %b
+  store i64 %rem, i64 *%dest
+  ret void
+}
+
+; Testg that division and remainder use a single instruction.
+define i64 @f3(i64 %dummy1, i64 %a, i64 %b) {
+; CHECK: f3:
+; CHECK-NOT: %r3
+; CHECK: {{llill|lghi}} %r2, 0
+; CHECK-NOT: %r3
+; CHECK: dlgr %r2, %r4
+; CHECK-NOT: dlgr
+; CHECK: ogr %r2, %r3
+; CHECK: br %r14
+  %div = udiv i64 %a, %b
+  %rem = urem i64 %a, %b
+  %or = or i64 %rem, %div
+  ret i64 %or
+}
+
+; Testg memory division with no displacement.
+define void @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
+; CHECK: f4:
+; CHECK-NOT: %r3
+; CHECK: {{llill|lghi}} %r2, 0
+; CHECK-NOT: %r3
+; CHECK: dlg %r2, 0(%r4)
+; CHECK: stg %r3, 0(%r5)
+; CHECK: br %r14
+  %b = load i64 *%src
+  %div = udiv i64 %a, %b
+  store i64 %div, i64 *%dest
+  ret void
+}
+
+; Testg memory remainder with no displacement.
+define void @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%dest) {
+; CHECK: f5:
+; CHECK-NOT: %r3
+; CHECK: {{llill|lghi}} %r2, 0
+; CHECK-NOT: %r3
+; CHECK: dlg %r2, 0(%r4)
+; CHECK: stg %r2, 0(%r5)
+; CHECK: br %r14
+  %b = load i64 *%src
+  %rem = urem i64 %a, %b
+  store i64 %rem, i64 *%dest
+  ret void
+}
+
+; Testg both memory division and memory remainder.
+define i64 @f6(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f6:
+; CHECK-NOT: %r3
+; CHECK: {{llill|lghi}} %r2, 0
+; CHECK-NOT: %r3
+; CHECK: dlg %r2, 0(%r4)
+; CHECK-NOT: {{dlg|dlgr}}
+; CHECK: ogr %r2, %r3
+; CHECK: br %r14
+  %b = load i64 *%src
+  %div = udiv i64 %a, %b
+  %rem = urem i64 %a, %b
+  %or = or i64 %rem, %div
+  ret i64 %or
+}
+
+; Check the high end of the DLG range.
+define i64 @f7(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f7:
+; CHECK: dlg %r2, 524280(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %b = load i64 *%ptr
+  %rem = urem i64 %a, %b
+  ret i64 %rem
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f8(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r4, 524288
+; CHECK: dlg %r2, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %b = load i64 *%ptr
+  %rem = urem i64 %a, %b
+  ret i64 %rem
+}
+
+; Check the high end of the negative aligned DLG range.
+define i64 @f9(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f9:
+; CHECK: dlg %r2, -8(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -1
+  %b = load i64 *%ptr
+  %rem = urem i64 %a, %b
+  ret i64 %rem
+}
+
+; Check the low end of the DLG range.
+define i64 @f10(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f10:
+; CHECK: dlg %r2, -524288(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %b = load i64 *%ptr
+  %rem = urem i64 %a, %b
+  ret i64 %rem
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f11(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f11:
+; CHECK: agfi %r4, -524296
+; CHECK: dlg %r2, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %b = load i64 *%ptr
+  %rem = urem i64 %a, %b
+  ret i64 %rem
+}
+
+; Check that DLG allows an index.
+define i64 @f12(i64 %dummy, i64 %a, i64 %src, i64 %index) {
+; CHECK: f12:
+; CHECK: dlg %r2, 524287(%r5,%r4)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i64 *
+  %b = load i64 *%ptr
+  %rem = urem i64 %a, %b
+  ret i64 %rem
+}
diff --git a/test/CodeGen/SystemZ/int-move-01.ll b/test/CodeGen/SystemZ/int-move-01.ll
new file mode 100644
index 0000000..ae890ad
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-move-01.ll
@@ -0,0 +1,35 @@
+; Test moves between GPRs.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test 8-bit moves, which should get promoted to i32.
+define i8 @f1(i8 %a, i8 %b) {
+; CHECK: f1:
+; CHECK: lr %r2, %r3
+; CHECK: br %r14
+  ret i8 %b
+}
+
+; Test 16-bit moves, which again should get promoted to i32.
+define i16 @f2(i16 %a, i16 %b) {
+; CHECK: f2:
+; CHECK: lr %r2, %r3
+; CHECK: br %r14
+  ret i16 %b
+}
+
+; Test 32-bit moves.
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: lr %r2, %r3
+; CHECK: br %r14
+  ret i32 %b
+}
+
+; Test 64-bit moves.
+define i64 @f4(i64 %a, i64 %b) {
+; CHECK: f4:
+; CHECK: lgr %r2, %r3
+; CHECK: br %r14
+  ret i64 %b
+}
diff --git a/test/CodeGen/SystemZ/int-move-02.ll b/test/CodeGen/SystemZ/int-move-02.ll
new file mode 100644
index 0000000..467e22d
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-move-02.ll
@@ -0,0 +1,110 @@
+; Test 32-bit GPR loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the L range.
+define i32 @f1(i32 *%src) {
+; CHECK: f1:
+; CHECK: l %r2, 0(%r2)
+; CHECK: br %r14
+  %val = load i32 *%src
+  ret i32 %val
+}
+
+; Check the high end of the aligned L range.
+define i32 @f2(i32 *%src) {
+; CHECK: f2:
+; CHECK: l %r2, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1023
+  %val = load i32 *%ptr
+  ret i32 %val
+}
+
+; Check the next word up, which should use LY instead of L.
+define i32 @f3(i32 *%src) {
+; CHECK: f3:
+; CHECK: ly %r2, 4096(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1024
+  %val = load i32 *%ptr
+  ret i32 %val
+}
+
+; Check the high end of the aligned LY range.
+define i32 @f4(i32 *%src) {
+; CHECK: f4:
+; CHECK: ly %r2, 524284(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %val = load i32 *%ptr
+  ret i32 %val
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f5(i32 *%src) {
+; CHECK: f5:
+; CHECK: agfi %r2, 524288
+; CHECK: l %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %val = load i32 *%ptr
+  ret i32 %val
+}
+
+; Check the high end of the negative aligned LY range.
+define i32 @f6(i32 *%src) {
+; CHECK: f6:
+; CHECK: ly %r2, -4(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %val = load i32 *%ptr
+  ret i32 %val
+}
+
+; Check the low end of the LY range.
+define i32 @f7(i32 *%src) {
+; CHECK: f7:
+; CHECK: ly %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %val = load i32 *%ptr
+  ret i32 %val
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f8(i32 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r2, -524292
+; CHECK: l %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %val = load i32 *%ptr
+  ret i32 %val
+}
+
+; Check that L allows an index.
+define i32 @f9(i64 %src, i64 %index) {
+; CHECK: f9:
+; CHECK: l %r2, 4095({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4095
+  %ptr = inttoptr i64 %add2 to i32 *
+  %val = load i32 *%ptr
+  ret i32 %val
+}
+
+; Check that LY allows an index.
+define i32 @f10(i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: ly %r2, 4096({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i32 *
+  %val = load i32 *%ptr
+  ret i32 %val
+}
diff --git a/test/CodeGen/SystemZ/int-move-03.ll b/test/CodeGen/SystemZ/int-move-03.ll
new file mode 100644
index 0000000..97c70a2
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-move-03.ll
@@ -0,0 +1,78 @@
+; Test 64-bit GPR loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check LG with no displacement.
+define i64 @f1(i64 *%src) {
+; CHECK: f1:
+; CHECK: lg %r2, 0(%r2)
+; CHECK: br %r14
+  %val = load i64 *%src
+  ret i64 %val
+}
+
+; Check the high end of the aligned LG range.
+define i64 @f2(i64 *%src) {
+; CHECK: f2:
+; CHECK: lg %r2, 524280(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %val = load i64 *%ptr
+  ret i64 %val
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f3(i64 *%src) {
+; CHECK: f3:
+; CHECK: agfi %r2, 524288
+; CHECK: lg %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %val = load i64 *%ptr
+  ret i64 %val
+}
+
+; Check the high end of the negative aligned LG range.
+define i64 @f4(i64 *%src) {
+; CHECK: f4:
+; CHECK: lg %r2, -8(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -1
+  %val = load i64 *%ptr
+  ret i64 %val
+}
+
+; Check the low end of the LG range.
+define i64 @f5(i64 *%src) {
+; CHECK: f5:
+; CHECK: lg %r2, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %val = load i64 *%ptr
+  ret i64 %val
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f6(i64 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r2, -524296
+; CHECK: lg %r2, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %val = load i64 *%ptr
+  ret i64 %val
+}
+
+; Check that LG allows an index.
+define i64 @f7(i64 %src, i64 %index) {
+; CHECK: f7:
+; CHECK: lg %r2, 524287({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i64 *
+  %val = load i64 *%ptr
+  ret i64 %val
+}
diff --git a/test/CodeGen/SystemZ/int-move-04.ll b/test/CodeGen/SystemZ/int-move-04.ll
new file mode 100644
index 0000000..9736657
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-move-04.ll
@@ -0,0 +1,130 @@
+; Test 8-bit GPR stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test an i8 store, which should get converted into an i32 truncation.
+define void @f1(i8 *%dst, i8 %val) {
+; CHECK: f1:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: br %r14
+  store i8 %val, i8 *%dst
+  ret void
+}
+
+; Test an i32 truncating store.
+define void @f2(i8 *%dst, i32 %val) {
+; CHECK: f2:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: br %r14
+  %trunc = trunc i32 %val to i8
+  store i8 %trunc, i8 *%dst
+  ret void
+}
+
+; Test an i64 truncating store.
+define void @f3(i8 *%dst, i64 %val) {
+; CHECK: f3:
+; CHECK: stc %r3, 0(%r2)
+; CHECK: br %r14
+  %trunc = trunc i64 %val to i8
+  store i8 %trunc, i8 *%dst
+  ret void
+}
+
+; Check the high end of the STC range.
+define void @f4(i8 *%dst, i8 %val) {
+; CHECK: f4:
+; CHECK: stc %r3, 4095(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%dst, i64 4095
+  store i8 %val, i8 *%ptr
+  ret void
+}
+
+; Check the next byte up, which should use STCY instead of STC.
+define void @f5(i8 *%dst, i8 %val) {
+; CHECK: f5:
+; CHECK: stcy %r3, 4096(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%dst, i64 4096
+  store i8 %val, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the STCY range.
+define void @f6(i8 *%dst, i8 %val) {
+; CHECK: f6:
+; CHECK: stcy %r3, 524287(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%dst, i64 524287
+  store i8 %val, i8 *%ptr
+  ret void
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f7(i8 *%dst, i8 %val) {
+; CHECK: f7:
+; CHECK: agfi %r2, 524288
+; CHECK: stc %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%dst, i64 524288
+  store i8 %val, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the negative STCY range.
+define void @f8(i8 *%dst, i8 %val) {
+; CHECK: f8:
+; CHECK: stcy %r3, -1(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%dst, i64 -1
+  store i8 %val, i8 *%ptr
+  ret void
+}
+
+; Check the low end of the STCY range.
+define void @f9(i8 *%dst, i8 %val) {
+; CHECK: f9:
+; CHECK: stcy %r3, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%dst, i64 -524288
+  store i8 %val, i8 *%ptr
+  ret void
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f10(i8 *%dst, i8 %val) {
+; CHECK: f10:
+; CHECK: agfi %r2, -524289
+; CHECK: stc %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%dst, i64 -524289
+  store i8 %val, i8 *%ptr
+  ret void
+}
+
+; Check that STC allows an index.
+define void @f11(i64 %dst, i64 %index, i8 %val) {
+; CHECK: f11:
+; CHECK: stc %r4, 4095(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %dst, %index
+  %add2 = add i64 %add1, 4095
+  %ptr = inttoptr i64 %add2 to i8 *
+  store i8 %val, i8 *%ptr
+  ret void
+}
+
+; Check that STCY allows an index.
+define void @f12(i64 %dst, i64 %index, i8 %val) {
+; CHECK: f12:
+; CHECK: stcy %r4, 4096(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %dst, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i8 *
+  store i8 %val, i8 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-move-05.ll b/test/CodeGen/SystemZ/int-move-05.ll
new file mode 100644
index 0000000..f61477e
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-move-05.ll
@@ -0,0 +1,130 @@
+; Test 16-bit GPR stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test an i16 store, which should get converted into an i32 truncation.
+define void @f1(i16 *%dst, i16 %val) {
+; CHECK: f1:
+; CHECK: sth %r3, 0(%r2)
+; CHECK: br %r14
+  store i16 %val, i16 *%dst
+  ret void
+}
+
+; Test an i32 truncating store.
+define void @f2(i16 *%dst, i32 %val) {
+; CHECK: f2:
+; CHECK: sth %r3, 0(%r2)
+; CHECK: br %r14
+  %trunc = trunc i32 %val to i16
+  store i16 %trunc, i16 *%dst
+  ret void
+}
+
+; Test an i64 truncating store.
+define void @f3(i16 *%dst, i64 %val) {
+; CHECK: f3:
+; CHECK: sth %r3, 0(%r2)
+; CHECK: br %r14
+  %trunc = trunc i64 %val to i16
+  store i16 %trunc, i16 *%dst
+  ret void
+}
+
+; Check the high end of the STH range.
+define void @f4(i16 *%dst, i16 %val) {
+; CHECK: f4:
+; CHECK: sth %r3, 4094(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%dst, i64 2047
+  store i16 %val, i16 *%ptr
+  ret void
+}
+
+; Check the next halfword up, which should use STHY instead of STH.
+define void @f5(i16 *%dst, i16 %val) {
+; CHECK: f5:
+; CHECK: sthy %r3, 4096(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%dst, i64 2048
+  store i16 %val, i16 *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STHY range.
+define void @f6(i16 *%dst, i16 %val) {
+; CHECK: f6:
+; CHECK: sthy %r3, 524286(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%dst, i64 262143
+  store i16 %val, i16 *%ptr
+  ret void
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f7(i16 *%dst, i16 %val) {
+; CHECK: f7:
+; CHECK: agfi %r2, 524288
+; CHECK: sth %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%dst, i64 262144
+  store i16 %val, i16 *%ptr
+  ret void
+}
+
+; Check the high end of the negative aligned STHY range.
+define void @f8(i16 *%dst, i16 %val) {
+; CHECK: f8:
+; CHECK: sthy %r3, -2(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%dst, i64 -1
+  store i16 %val, i16 *%ptr
+  ret void
+}
+
+; Check the low end of the STHY range.
+define void @f9(i16 *%dst, i16 %val) {
+; CHECK: f9:
+; CHECK: sthy %r3, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%dst, i64 -262144
+  store i16 %val, i16 *%ptr
+  ret void
+}
+
+; Check the next halfword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f10(i16 *%dst, i16 %val) {
+; CHECK: f10:
+; CHECK: agfi %r2, -524290
+; CHECK: sth %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%dst, i64 -262145
+  store i16 %val, i16 *%ptr
+  ret void
+}
+
+; Check that STH allows an index.
+define void @f11(i64 %dst, i64 %index, i16 %val) {
+; CHECK: f11:
+; CHECK: sth %r4, 4094({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %dst, %index
+  %add2 = add i64 %add1, 4094
+  %ptr = inttoptr i64 %add2 to i16 *
+  store i16 %val, i16 *%ptr
+  ret void
+}
+
+; Check that STHY allows an index.
+define void @f12(i64 %dst, i64 %index, i16 %val) {
+; CHECK: f12:
+; CHECK: sthy %r4, 4096({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %dst, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i16 *
+  store i16 %val, i16 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-move-06.ll b/test/CodeGen/SystemZ/int-move-06.ll
new file mode 100644
index 0000000..5b35a32
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-move-06.ll
@@ -0,0 +1,117 @@
+; Test 32-bit GPR stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test an i32 store.
+define void @f1(i32 *%dst, i32 %val) {
+; CHECK: f1:
+; CHECK: st %r3, 0(%r2)
+; CHECK: br %r14
+  store i32 %val, i32 *%dst
+  ret void
+}
+
+; Test a truncating i64 store.
+define void @f2(i32 *%dst, i64 %val) {
+  %word = trunc i64 %val to i32
+  store i32 %word, i32 *%dst
+  ret void
+}
+
+; Check the high end of the aligned ST range.
+define void @f3(i32 *%dst, i32 %val) {
+; CHECK: f3:
+; CHECK: st %r3, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%dst, i64 1023
+  store i32 %val, i32 *%ptr
+  ret void
+}
+
+; Check the next word up, which should use STY instead of ST.
+define void @f4(i32 *%dst, i32 %val) {
+; CHECK: f4:
+; CHECK: sty %r3, 4096(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%dst, i64 1024
+  store i32 %val, i32 *%ptr
+  ret void
+}
+
+; Check the high end of the aligned STY range.
+define void @f5(i32 *%dst, i32 %val) {
+; CHECK: f5:
+; CHECK: sty %r3, 524284(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%dst, i64 131071
+  store i32 %val, i32 *%ptr
+  ret void
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(i32 *%dst, i32 %val) {
+; CHECK: f6:
+; CHECK: agfi %r2, 524288
+; CHECK: st %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%dst, i64 131072
+  store i32 %val, i32 *%ptr
+  ret void
+}
+
+; Check the high end of the negative aligned STY range.
+define void @f7(i32 *%dst, i32 %val) {
+; CHECK: f7:
+; CHECK: sty %r3, -4(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%dst, i64 -1
+  store i32 %val, i32 *%ptr
+  ret void
+}
+
+; Check the low end of the STY range.
+define void @f8(i32 *%dst, i32 %val) {
+; CHECK: f8:
+; CHECK: sty %r3, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%dst, i64 -131072
+  store i32 %val, i32 *%ptr
+  ret void
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f9(i32 *%dst, i32 %val) {
+; CHECK: f9:
+; CHECK: agfi %r2, -524292
+; CHECK: st %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%dst, i64 -131073
+  store i32 %val, i32 *%ptr
+  ret void
+}
+
+; Check that ST allows an index.
+define void @f10(i64 %dst, i64 %index, i32 %val) {
+; CHECK: f10:
+; CHECK: st %r4, 4095(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %dst, %index
+  %add2 = add i64 %add1, 4095
+  %ptr = inttoptr i64 %add2 to i32 *
+  store i32 %val, i32 *%ptr
+  ret void
+}
+
+; Check that STY allows an index.
+define void @f11(i64 %dst, i64 %index, i32 %val) {
+; CHECK: f11:
+; CHECK: sty %r4, 4096(%r3,%r2)
+; CHECK: br %r14
+  %add1 = add i64 %dst, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i32 *
+  store i32 %val, i32 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-move-07.ll b/test/CodeGen/SystemZ/int-move-07.ll
new file mode 100644
index 0000000..ab21ab0
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-move-07.ll
@@ -0,0 +1,78 @@
+; Test 64-bit GPR stores.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check STG with no displacement.
+define void @f1(i64 *%dst, i64 %val) {
+; CHECK: f1:
+; CHECK: stg %r3, 0(%r2)
+; CHECK: br %r14
+  store i64 %val, i64 *%dst
+  ret void
+}
+
+; Check the high end of the aligned STG range.
+define void @f2(i64 *%dst, i64 %val) {
+; CHECK: f2:
+; CHECK: stg %r3, 524280(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%dst, i64 65535
+  store i64 %val, i64 *%ptr
+  ret void
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f3(i64 *%dst, i64 %val) {
+; CHECK: f3:
+; CHECK: agfi %r2, 524288
+; CHECK: stg %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%dst, i64 65536
+  store i64 %val, i64 *%ptr
+  ret void
+}
+
+; Check the high end of the negative aligned STG range.
+define void @f4(i64 *%dst, i64 %val) {
+; CHECK: f4:
+; CHECK: stg %r3, -8(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%dst, i64 -1
+  store i64 %val, i64 *%ptr
+  ret void
+}
+
+; Check the low end of the STG range.
+define void @f5(i64 *%dst, i64 %val) {
+; CHECK: f5:
+; CHECK: stg %r3, -524288(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%dst, i64 -65536
+  store i64 %val, i64 *%ptr
+  ret void
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(i64 *%dst, i64 %val) {
+; CHECK: f6:
+; CHECK: agfi %r2, -524296
+; CHECK: stg %r3, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%dst, i64 -65537
+  store i64 %val, i64 *%ptr
+  ret void
+}
+
+; Check that STG allows an index.
+define void @f7(i64 %dst, i64 %index, i64 %val) {
+; CHECK: f7:
+; CHECK: stg %r4, 524287({{%r3,%r2|%r2,%r3}})
+; CHECK: br %r14
+  %add1 = add i64 %dst, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i64 *
+  store i64 %val, i64 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-move-08.ll b/test/CodeGen/SystemZ/int-move-08.ll
new file mode 100644
index 0000000..5640fec
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-move-08.ll
@@ -0,0 +1,49 @@
+; Test 32-bit GPR accesses to a PC-relative location.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@gsrc16 = global i16 1
+@gsrc32 = global i32 1
+@gdst16 = global i16 2
+@gdst32 = global i32 2
+
+; Check sign-extending loads from i16.
+define i32 @f1() {
+; CHECK: f1:
+; CHECK: lhrl %r2, gsrc16
+; CHECK: br %r14
+  %val = load i16 *@gsrc16
+  %ext = sext i16 %val to i32
+  ret i32 %ext
+}
+
+; Check zero-extending loads from i16.
+define i32 @f2() {
+; CHECK: f2:
+; CHECK: llhrl %r2, gsrc16
+; CHECK: br %r14
+  %val = load i16 *@gsrc16
+  %ext = zext i16 %val to i32
+  ret i32 %ext
+}
+
+; Check truncating 16-bit stores.
+define void @f3(i32 %val) {
+; CHECK: f3:
+; CHECK: sthrl %r2, gdst16
+; CHECK: br %r14
+  %half = trunc i32 %val to i16
+  store i16 %half, i16 *@gdst16
+  ret void
+}
+
+; Check plain loads and stores.
+define void @f4() {
+; CHECK: f4:
+; CHECK: lrl %r0, gsrc32
+; CHECK: strl %r0, gdst32
+; CHECK: br %r14
+  %val = load i32 *@gsrc32
+  store i32 %val, i32 *@gdst32
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-move-09.ll b/test/CodeGen/SystemZ/int-move-09.ll
new file mode 100644
index 0000000..a7a8c82
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-move-09.ll
@@ -0,0 +1,81 @@
+; Test 64-bit GPR accesses to a PC-relative location.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@gsrc16 = global i16 1
+@gsrc32 = global i32 1
+@gsrc64 = global i64 1
+@gdst16 = global i16 2
+@gdst32 = global i32 2
+@gdst64 = global i64 2
+
+; Check sign-extending loads from i16.
+define i64 @f1() {
+; CHECK: f1:
+; CHECK: lghrl %r2, gsrc16
+; CHECK: br %r14
+  %val = load i16 *@gsrc16
+  %ext = sext i16 %val to i64
+  ret i64 %ext
+}
+
+; Check zero-extending loads from i16.
+define i64 @f2() {
+; CHECK: f2:
+; CHECK: llghrl %r2, gsrc16
+; CHECK: br %r14
+  %val = load i16 *@gsrc16
+  %ext = zext i16 %val to i64
+  ret i64 %ext
+}
+
+; Check sign-extending loads from i32.
+define i64 @f3() {
+; CHECK: f3:
+; CHECK: lgfrl %r2, gsrc32
+; CHECK: br %r14
+  %val = load i32 *@gsrc32
+  %ext = sext i32 %val to i64
+  ret i64 %ext
+}
+
+; Check zero-extending loads from i32.
+define i64 @f4() {
+; CHECK: f4:
+; CHECK: llgfrl %r2, gsrc32
+; CHECK: br %r14
+  %val = load i32 *@gsrc32
+  %ext = zext i32 %val to i64
+  ret i64 %ext
+}
+
+; Check truncating 16-bit stores.
+define void @f5(i64 %val) {
+; CHECK: f5:
+; CHECK: sthrl %r2, gdst16
+; CHECK: br %r14
+  %half = trunc i64 %val to i16
+  store i16 %half, i16 *@gdst16
+  ret void
+}
+
+; Check truncating 32-bit stores.
+define void @f6(i64 %val) {
+; CHECK: f6:
+; CHECK: strl %r2, gdst32
+; CHECK: br %r14
+  %word = trunc i64 %val to i32
+  store i32 %word, i32 *@gdst32
+  ret void
+}
+
+; Check plain loads and stores.
+define void @f7() {
+; CHECK: f7:
+; CHECK: lgrl %r0, gsrc64
+; CHECK: stgrl %r0, gdst64
+; CHECK: br %r14
+  %val = load i64 *@gsrc64
+  store i64 %val, i64 *@gdst64
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-mul-01.ll b/test/CodeGen/SystemZ/int-mul-01.ll
new file mode 100644
index 0000000..e1246e2
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-mul-01.ll
@@ -0,0 +1,131 @@
+; Test 32-bit multiplication in which the second operand is a sign-extended
+; i16 memory value.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the MH range.
+define i32 @f1(i32 %lhs, i16 *%src) {
+; CHECK: f1:
+; CHECK: mh %r2, 0(%r3)
+; CHECK: br %r14
+  %half = load i16 *%src
+  %rhs = sext i16 %half to i32
+  %res = mul i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the high end of the aligned MH range.
+define i32 @f2(i32 %lhs, i16 *%src) {
+; CHECK: f2:
+; CHECK: mh %r2, 4094(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 2047
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = mul i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the next halfword up, which should use MHY instead of MH.
+define i32 @f3(i32 %lhs, i16 *%src) {
+; CHECK: f3:
+; CHECK: mhy %r2, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 2048
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = mul i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the high end of the aligned MHY range.
+define i32 @f4(i32 %lhs, i16 *%src) {
+; CHECK: f4:
+; CHECK: mhy %r2, 524286(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262143
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = mul i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the next halfword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f5(i32 %lhs, i16 *%src) {
+; CHECK: f5:
+; CHECK: agfi %r3, 524288
+; CHECK: mh %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 262144
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = mul i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the high end of the negative aligned MHY range.
+define i32 @f6(i32 %lhs, i16 *%src) {
+; CHECK: f6:
+; CHECK: mhy %r2, -2(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -1
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = mul i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the low end of the MHY range.
+define i32 @f7(i32 %lhs, i16 *%src) {
+; CHECK: f7:
+; CHECK: mhy %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262144
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = mul i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check the next halfword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f8(i32 %lhs, i16 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r3, -524290
+; CHECK: mh %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i16 *%src, i64 -262145
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = mul i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check that MH allows an index.
+define i32 @f9(i32 %lhs, i64 %src, i64 %index) {
+; CHECK: f9:
+; CHECK: mh %r2, 4094({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4094
+  %ptr = inttoptr i64 %add2 to i16 *
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = mul i32 %lhs, %rhs
+  ret i32 %res
+}
+
+; Check that MHY allows an index.
+define i32 @f10(i32 %lhs, i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: mhy %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i16 *
+  %half = load i16 *%ptr
+  %rhs = sext i16 %half to i32
+  %res = mul i32 %lhs, %rhs
+  ret i32 %res
+}
diff --git a/test/CodeGen/SystemZ/int-mul-02.ll b/test/CodeGen/SystemZ/int-mul-02.ll
new file mode 100644
index 0000000..d39c4dd
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-mul-02.ll
@@ -0,0 +1,129 @@
+; Test 32-bit multiplication in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check MSR.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: msr %r2, %r3
+; CHECK: br %r14
+  %mul = mul i32 %a, %b
+  ret i32 %mul
+}
+
+; Check the low end of the MS range.
+define i32 @f2(i32 %a, i32 *%src) {
+; CHECK: f2:
+; CHECK: ms %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %mul = mul i32 %a, %b
+  ret i32 %mul
+}
+
+; Check the high end of the aligned MS range.
+define i32 @f3(i32 %a, i32 *%src) {
+; CHECK: f3:
+; CHECK: ms %r2, 4092(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1023
+  %b = load i32 *%ptr
+  %mul = mul i32 %a, %b
+  ret i32 %mul
+}
+
+; Check the next word up, which should use MSY instead of MS.
+define i32 @f4(i32 %a, i32 *%src) {
+; CHECK: f4:
+; CHECK: msy %r2, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1024
+  %b = load i32 *%ptr
+  %mul = mul i32 %a, %b
+  ret i32 %mul
+}
+
+; Check the high end of the aligned MSY range.
+define i32 @f5(i32 %a, i32 *%src) {
+; CHECK: f5:
+; CHECK: msy %r2, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %mul = mul i32 %a, %b
+  ret i32 %mul
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f6(i32 %a, i32 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r3, 524288
+; CHECK: ms %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %mul = mul i32 %a, %b
+  ret i32 %mul
+}
+
+; Check the high end of the negative aligned MSY range.
+define i32 @f7(i32 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK: msy %r2, -4(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %mul = mul i32 %a, %b
+  ret i32 %mul
+}
+
+; Check the low end of the MSY range.
+define i32 @f8(i32 %a, i32 *%src) {
+; CHECK: f8:
+; CHECK: msy %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %mul = mul i32 %a, %b
+  ret i32 %mul
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f9(i32 %a, i32 *%src) {
+; CHECK: f9:
+; CHECK: agfi %r3, -524292
+; CHECK: ms %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %mul = mul i32 %a, %b
+  ret i32 %mul
+}
+
+; Check that MS allows an index.
+define i32 @f10(i32 %a, i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: ms %r2, 4092({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4092
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %mul = mul i32 %a, %b
+  ret i32 %mul
+}
+
+; Check that MSY allows an index.
+define i32 @f11(i32 %a, i64 %src, i64 %index) {
+; CHECK: f11:
+; CHECK: msy %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %mul = mul i32 %a, %b
+  ret i32 %mul
+}
diff --git a/test/CodeGen/SystemZ/int-mul-03.ll b/test/CodeGen/SystemZ/int-mul-03.ll
new file mode 100644
index 0000000..ab4ef9e
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-mul-03.ll
@@ -0,0 +1,102 @@
+; Test multiplications between an i64 and a sign-extended i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check MSGFR.
+define i64 @f1(i64 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: msgfr %r2, %r3
+; CHECK: br %r14
+  %bext = sext i32 %b to i64
+  %mul = mul i64 %a, %bext
+  ret i64 %mul
+}
+
+; Check MSGF with no displacement.
+define i64 @f2(i64 %a, i32 *%src) {
+; CHECK: f2:
+; CHECK: msgf %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %bext = sext i32 %b to i64
+  %mul = mul i64 %a, %bext
+  ret i64 %mul
+}
+
+; Check the high end of the aligned MSGF range.
+define i64 @f3(i64 %a, i32 *%src) {
+; CHECK: f3:
+; CHECK: msgf %r2, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %mul = mul i64 %a, %bext
+  ret i64 %mul
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i32 *%src) {
+; CHECK: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: msgf %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %mul = mul i64 %a, %bext
+  ret i64 %mul
+}
+
+; Check the high end of the negative aligned MSGF range.
+define i64 @f5(i64 %a, i32 *%src) {
+; CHECK: f5:
+; CHECK: msgf %r2, -4(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %mul = mul i64 %a, %bext
+  ret i64 %mul
+}
+
+; Check the low end of the MSGF range.
+define i64 @f6(i64 %a, i32 *%src) {
+; CHECK: f6:
+; CHECK: msgf %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %mul = mul i64 %a, %bext
+  ret i64 %mul
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f7(i64 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK: agfi %r3, -524292
+; CHECK: msgf %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %mul = mul i64 %a, %bext
+  ret i64 %mul
+}
+
+; Check that MSGF allows an index.
+define i64 @f8(i64 %a, i64 %src, i64 %index) {
+; CHECK: f8:
+; CHECK: msgf %r2, 524284({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524284
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %mul = mul i64 %a, %bext
+  ret i64 %mul
+}
diff --git a/test/CodeGen/SystemZ/int-mul-04.ll b/test/CodeGen/SystemZ/int-mul-04.ll
new file mode 100644
index 0000000..94c2639
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-mul-04.ll
@@ -0,0 +1,94 @@
+; Test 64-bit addition in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check MSGR.
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: msgr %r2, %r3
+; CHECK: br %r14
+  %mul = mul i64 %a, %b
+  ret i64 %mul
+}
+
+; Check MSG with no displacement.
+define i64 @f2(i64 %a, i64 *%src) {
+; CHECK: f2:
+; CHECK: msg %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i64 *%src
+  %mul = mul i64 %a, %b
+  ret i64 %mul
+}
+
+; Check the high end of the aligned MSG range.
+define i64 @f3(i64 %a, i64 *%src) {
+; CHECK: f3:
+; CHECK: msg %r2, 524280(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %b = load i64 *%ptr
+  %mul = mul i64 %a, %b
+  ret i64 %mul
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i64 *%src) {
+; CHECK: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: msg %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %b = load i64 *%ptr
+  %mul = mul i64 %a, %b
+  ret i64 %mul
+}
+
+; Check the high end of the negative aligned MSG range.
+define i64 @f5(i64 %a, i64 *%src) {
+; CHECK: f5:
+; CHECK: msg %r2, -8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -1
+  %b = load i64 *%ptr
+  %mul = mul i64 %a, %b
+  ret i64 %mul
+}
+
+; Check the low end of the MSG range.
+define i64 @f6(i64 %a, i64 *%src) {
+; CHECK: f6:
+; CHECK: msg %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %b = load i64 *%ptr
+  %mul = mul i64 %a, %b
+  ret i64 %mul
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f7(i64 %a, i64 *%src) {
+; CHECK: f7:
+; CHECK: agfi %r3, -524296
+; CHECK: msg %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %b = load i64 *%ptr
+  %mul = mul i64 %a, %b
+  ret i64 %mul
+}
+
+; Check that MSG allows an index.
+define i64 @f8(i64 %a, i64 %src, i64 %index) {
+; CHECK: f8:
+; CHECK: msg %r2, 524280({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524280
+  %ptr = inttoptr i64 %add2 to i64 *
+  %b = load i64 *%ptr
+  %mul = mul i64 %a, %b
+  ret i64 %mul
+}
diff --git a/test/CodeGen/SystemZ/int-mul-05.ll b/test/CodeGen/SystemZ/int-mul-05.ll
new file mode 100644
index 0000000..5e4031b
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-mul-05.ll
@@ -0,0 +1,159 @@
+; Test 32-bit multiplication in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check multiplication by 2, which should use shifts.
+define i32 @f1(i32 %a, i32 *%dest) {
+; CHECK: f1:
+; CHECK: sll %r2, 1
+; CHECK: br %r14
+  %mul = mul i32 %a, 2
+  ret i32 %mul
+}
+
+; Check multiplication by 3.
+define i32 @f2(i32 %a, i32 *%dest) {
+; CHECK: f2:
+; CHECK: mhi %r2, 3
+; CHECK: br %r14
+  %mul = mul i32 %a, 3
+  ret i32 %mul
+}
+
+; Check the high end of the MHI range.
+define i32 @f3(i32 %a, i32 *%dest) {
+; CHECK: f3:
+; CHECK: mhi %r2, 32767
+; CHECK: br %r14
+  %mul = mul i32 %a, 32767
+  ret i32 %mul
+}
+
+; Check the next value up, which should use shifts.
+define i32 @f4(i32 %a, i32 *%dest) {
+; CHECK: f4:
+; CHECK: sll %r2, 15
+; CHECK: br %r14
+  %mul = mul i32 %a, 32768
+  ret i32 %mul
+}
+
+; Check the next value up again, which can use MSFI.
+define i32 @f5(i32 %a, i32 *%dest) {
+; CHECK: f5:
+; CHECK: msfi %r2, 32769
+; CHECK: br %r14
+  %mul = mul i32 %a, 32769
+  ret i32 %mul
+}
+
+; Check the high end of the MSFI range.
+define i32 @f6(i32 %a, i32 *%dest) {
+; CHECK: f6:
+; CHECK: msfi %r2, 2147483647
+; CHECK: br %r14
+  %mul = mul i32 %a, 2147483647
+  ret i32 %mul
+}
+
+; Check the next value up, which should use shifts.
+define i32 @f7(i32 %a, i32 *%dest) {
+; CHECK: f7:
+; CHECK: sll %r2, 31
+; CHECK: br %r14
+  %mul = mul i32 %a, 2147483648
+  ret i32 %mul
+}
+
+; Check the next value up again, which is treated as a negative value.
+define i32 @f8(i32 %a, i32 *%dest) {
+; CHECK: f8:
+; CHECK: msfi %r2, -2147483647
+; CHECK: br %r14
+  %mul = mul i32 %a, 2147483649
+  ret i32 %mul
+}
+
+; Check multiplication by -1, which is a negation.
+define i32 @f9(i32 %a, i32 *%dest) {
+; CHECK: f9:
+; CHECK: lcr %r2, %r2
+; CHECK: br %r14
+  %mul = mul i32 %a, -1
+  ret i32 %mul
+}
+
+; Check multiplication by -2, which should use shifts.
+define i32 @f10(i32 %a, i32 *%dest) {
+; CHECK: f10:
+; CHECK: sll %r2, 1
+; CHECK: lcr %r2, %r2
+; CHECK: br %r14
+  %mul = mul i32 %a, -2
+  ret i32 %mul
+}
+
+; Check multiplication by -3.
+define i32 @f11(i32 %a, i32 *%dest) {
+; CHECK: f11:
+; CHECK: mhi %r2, -3
+; CHECK: br %r14
+  %mul = mul i32 %a, -3
+  ret i32 %mul
+}
+
+; Check the lowest useful MHI value.
+define i32 @f12(i32 %a, i32 *%dest) {
+; CHECK: f12:
+; CHECK: mhi %r2, -32767
+; CHECK: br %r14
+  %mul = mul i32 %a, -32767
+  ret i32 %mul
+}
+
+; Check the next value down, which should use shifts.
+define i32 @f13(i32 %a, i32 *%dest) {
+; CHECK: f13:
+; CHECK: sll %r2, 15
+; CHECK: lcr %r2, %r2
+; CHECK: br %r14
+  %mul = mul i32 %a, -32768
+  ret i32 %mul
+}
+
+; Check the next value down again, which can use MSFI.
+define i32 @f14(i32 %a, i32 *%dest) {
+; CHECK: f14:
+; CHECK: msfi %r2, -32769
+; CHECK: br %r14
+  %mul = mul i32 %a, -32769
+  ret i32 %mul
+}
+
+; Check the lowest useful MSFI value.
+define i32 @f15(i32 %a, i32 *%dest) {
+; CHECK: f15:
+; CHECK: msfi %r2, -2147483647
+; CHECK: br %r14
+  %mul = mul i32 %a, -2147483647
+  ret i32 %mul
+}
+
+; Check the next value down, which should use shifts.
+define i32 @f16(i32 %a, i32 *%dest) {
+; CHECK: f16:
+; CHECK: sll %r2, 31
+; CHECK-NOT: lcr
+; CHECK: br %r14
+  %mul = mul i32 %a, -2147483648
+  ret i32 %mul
+}
+
+; Check the next value down again, which is treated as a positive value.
+define i32 @f17(i32 %a, i32 *%dest) {
+; CHECK: f17:
+; CHECK: msfi %r2, 2147483647
+; CHECK: br %r14
+  %mul = mul i32 %a, -2147483649
+  ret i32 %mul
+}
diff --git a/test/CodeGen/SystemZ/int-mul-06.ll b/test/CodeGen/SystemZ/int-mul-06.ll
new file mode 100644
index 0000000..a354605
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-mul-06.ll
@@ -0,0 +1,159 @@
+; Test 64-bit multiplication in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check multiplication by 2, which should use shifts.
+define i64 @f1(i64 %a, i64 *%dest) {
+; CHECK: f1:
+; CHECK: sllg %r2, %r2, 1
+; CHECK: br %r14
+  %mul = mul i64 %a, 2
+  ret i64 %mul
+}
+
+; Check multiplication by 3.
+define i64 @f2(i64 %a, i64 *%dest) {
+; CHECK: f2:
+; CHECK: mghi %r2, 3
+; CHECK: br %r14
+  %mul = mul i64 %a, 3
+  ret i64 %mul
+}
+
+; Check the high end of the MGHI range.
+define i64 @f3(i64 %a, i64 *%dest) {
+; CHECK: f3:
+; CHECK: mghi %r2, 32767
+; CHECK: br %r14
+  %mul = mul i64 %a, 32767
+  ret i64 %mul
+}
+
+; Check the next value up, which should use shifts.
+define i64 @f4(i64 %a, i64 *%dest) {
+; CHECK: f4:
+; CHECK: sllg %r2, %r2, 15
+; CHECK: br %r14
+  %mul = mul i64 %a, 32768
+  ret i64 %mul
+}
+
+; Check the next value up again, which can use MSGFI.
+define i64 @f5(i64 %a, i64 *%dest) {
+; CHECK: f5:
+; CHECK: msgfi %r2, 32769
+; CHECK: br %r14
+  %mul = mul i64 %a, 32769
+  ret i64 %mul
+}
+
+; Check the high end of the MSGFI range.
+define i64 @f6(i64 %a, i64 *%dest) {
+; CHECK: f6:
+; CHECK: msgfi %r2, 2147483647
+; CHECK: br %r14
+  %mul = mul i64 %a, 2147483647
+  ret i64 %mul
+}
+
+; Check the next value up, which should use shifts.
+define i64 @f7(i64 %a, i64 *%dest) {
+; CHECK: f7:
+; CHECK: sllg %r2, %r2, 31
+; CHECK: br %r14
+  %mul = mul i64 %a, 2147483648
+  ret i64 %mul
+}
+
+; Check the next value up again, which cannot use a constant multiplicatoin.
+define i64 @f8(i64 %a, i64 *%dest) {
+; CHECK: f8:
+; CHECK-NOT: msgfi
+; CHECK: br %r14
+  %mul = mul i64 %a, 2147483649
+  ret i64 %mul
+}
+
+; Check multiplication by -1, which is a negation.
+define i64 @f9(i64 %a, i64 *%dest) {
+; CHECK: f9:
+; CHECK: lcgr {{%r[0-5]}}, %r2
+; CHECK: br %r14
+  %mul = mul i64 %a, -1
+  ret i64 %mul
+}
+
+; Check multiplication by -2, which should use shifts.
+define i64 @f10(i64 %a, i64 *%dest) {
+; CHECK: f10:
+; CHECK: sllg [[SHIFTED:%r[0-5]]], %r2, 1
+; CHECK: lcgr %r2, [[SHIFTED]]
+; CHECK: br %r14
+  %mul = mul i64 %a, -2
+  ret i64 %mul
+}
+
+; Check multiplication by -3.
+define i64 @f11(i64 %a, i64 *%dest) {
+; CHECK: f11:
+; CHECK: mghi %r2, -3
+; CHECK: br %r14
+  %mul = mul i64 %a, -3
+  ret i64 %mul
+}
+
+; Check the lowest useful MGHI value.
+define i64 @f12(i64 %a, i64 *%dest) {
+; CHECK: f12:
+; CHECK: mghi %r2, -32767
+; CHECK: br %r14
+  %mul = mul i64 %a, -32767
+  ret i64 %mul
+}
+
+; Check the next value down, which should use shifts.
+define i64 @f13(i64 %a, i64 *%dest) {
+; CHECK: f13:
+; CHECK: sllg [[SHIFTED:%r[0-5]]], %r2, 15
+; CHECK: lcgr %r2, [[SHIFTED]]
+; CHECK: br %r14
+  %mul = mul i64 %a, -32768
+  ret i64 %mul
+}
+
+; Check the next value down again, which can use MSGFI.
+define i64 @f14(i64 %a, i64 *%dest) {
+; CHECK: f14:
+; CHECK: msgfi %r2, -32769
+; CHECK: br %r14
+  %mul = mul i64 %a, -32769
+  ret i64 %mul
+}
+
+; Check the lowest useful MSGFI value.
+define i64 @f15(i64 %a, i64 *%dest) {
+; CHECK: f15:
+; CHECK: msgfi %r2, -2147483647
+; CHECK: br %r14
+  %mul = mul i64 %a, -2147483647
+  ret i64 %mul
+}
+
+; Check the next value down, which should use shifts.
+define i64 @f16(i64 %a, i64 *%dest) {
+; CHECK: f16:
+; CHECK: sllg [[SHIFTED:%r[0-5]]], %r2, 31
+; CHECK: lcgr %r2, [[SHIFTED]]
+; CHECK: br %r14
+  %mul = mul i64 %a, -2147483648
+  ret i64 %mul
+}
+
+; Check the next value down again, which cannot use constant multiplication
+define i64 @f17(i64 %a, i64 *%dest) {
+; CHECK: f17:
+; CHECK-NOT: msgfi
+; CHECK: br %r14
+  %mul = mul i64 %a, -2147483649
+  ret i64 %mul
+}
diff --git a/test/CodeGen/SystemZ/int-mul-07.ll b/test/CodeGen/SystemZ/int-mul-07.ll
new file mode 100644
index 0000000..2459cc3
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-mul-07.ll
@@ -0,0 +1,64 @@
+; Test high-part i32->i64 multiplications.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; We don't provide *MUL_LOHI or MULH* for the patterns in this file,
+; but they should at least still work.
+
+; Check zero-extended multiplication in which only the high part is used.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: msgr
+; CHECK: br %r14
+  %ax = zext i32 %a to i64
+  %bx = zext i32 %b to i64
+  %mulx = mul i64 %ax, %bx
+  %highx = lshr i64 %mulx, 32
+  %high = trunc i64 %highx to i32
+  ret i32 %high
+}
+
+; Check sign-extended multiplication in which only the high part is used.
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: msgfr
+; CHECK: br %r14
+  %ax = sext i32 %a to i64
+  %bx = sext i32 %b to i64
+  %mulx = mul i64 %ax, %bx
+  %highx = lshr i64 %mulx, 32
+  %high = trunc i64 %highx to i32
+  ret i32 %high
+}
+
+; Check zero-extended multiplication in which the result is split into
+; high and low halves.
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: msgr
+; CHECK: br %r14
+  %ax = zext i32 %a to i64
+  %bx = zext i32 %b to i64
+  %mulx = mul i64 %ax, %bx
+  %highx = lshr i64 %mulx, 32
+  %high = trunc i64 %highx to i32
+  %low = trunc i64 %mulx to i32
+  %or = or i32 %high, %low
+  ret i32 %or
+}
+
+; Check sign-extended multiplication in which the result is split into
+; high and low halves.
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: msgfr
+; CHECK: br %r14
+  %ax = sext i32 %a to i64
+  %bx = sext i32 %b to i64
+  %mulx = mul i64 %ax, %bx
+  %highx = lshr i64 %mulx, 32
+  %high = trunc i64 %highx to i32
+  %low = trunc i64 %mulx to i32
+  %or = or i32 %high, %low
+  ret i32 %or
+}
diff --git a/test/CodeGen/SystemZ/int-mul-08.ll b/test/CodeGen/SystemZ/int-mul-08.ll
new file mode 100644
index 0000000..09ebe7a
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-mul-08.ll
@@ -0,0 +1,188 @@
+; Test high-part i64->i128 multiplications.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check zero-extended multiplication in which only the high part is used.
+define i64 @f1(i64 %dummy, i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: mlgr %r2, %r4
+; CHECK: br %r14
+  %ax = zext i64 %a to i128
+  %bx = zext i64 %b to i128
+  %mulx = mul i128 %ax, %bx
+  %highx = lshr i128 %mulx, 64
+  %high = trunc i128 %highx to i64
+  ret i64 %high
+}
+
+; Check sign-extended multiplication in which only the high part is used.
+; This needs a rather convoluted sequence.
+define i64 @f2(i64 %dummy, i64 %a, i64 %b) {
+; CHECK: f2:
+; CHECK: mlgr
+; CHECK: agr
+; CHECK: agr
+; CHECK: br %r14
+  %ax = sext i64 %a to i128
+  %bx = sext i64 %b to i128
+  %mulx = mul i128 %ax, %bx
+  %highx = lshr i128 %mulx, 64
+  %high = trunc i128 %highx to i64
+  ret i64 %high
+}
+
+; Check zero-extended multiplication in which only part of the high half
+; is used.
+define i64 @f3(i64 %dummy, i64 %a, i64 %b) {
+; CHECK: f3:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: mlgr %r2, %r4
+; CHECK: srlg %r2, %r2, 3
+; CHECK: br %r14
+  %ax = zext i64 %a to i128
+  %bx = zext i64 %b to i128
+  %mulx = mul i128 %ax, %bx
+  %highx = lshr i128 %mulx, 67
+  %high = trunc i128 %highx to i64
+  ret i64 %high
+}
+
+; Check zero-extended multiplication in which the result is split into
+; high and low halves.
+define i64 @f4(i64 %dummy, i64 %a, i64 %b) {
+; CHECK: f4:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: mlgr %r2, %r4
+; CHECK: ogr %r2, %r3
+; CHECK: br %r14
+  %ax = zext i64 %a to i128
+  %bx = zext i64 %b to i128
+  %mulx = mul i128 %ax, %bx
+  %highx = lshr i128 %mulx, 64
+  %high = trunc i128 %highx to i64
+  %low = trunc i128 %mulx to i64
+  %or = or i64 %high, %low
+  ret i64 %or
+}
+
+; Check division by a constant, which should use multiplication instead.
+define i64 @f5(i64 %dummy, i64 %a) {
+; CHECK: f5:
+; CHECK: mlgr %r2,
+; CHECK: srlg %r2, %r2,
+; CHECK: br %r14
+  %res = udiv i64 %a, 1234
+  ret i64 %res
+}
+
+; Check MLG with no displacement.
+define i64 @f6(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f6:
+; CHECK-NOT: {{%r[234]}}
+; CHECK: mlg %r2, 0(%r4)
+; CHECK: br %r14
+  %b = load i64 *%src
+  %ax = zext i64 %a to i128
+  %bx = zext i64 %b to i128
+  %mulx = mul i128 %ax, %bx
+  %highx = lshr i128 %mulx, 64
+  %high = trunc i128 %highx to i64
+  ret i64 %high
+}
+
+; Check the high end of the aligned MLG range.
+define i64 @f7(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f7:
+; CHECK: mlg %r2, 524280(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %b = load i64 *%ptr
+  %ax = zext i64 %a to i128
+  %bx = zext i64 %b to i128
+  %mulx = mul i128 %ax, %bx
+  %highx = lshr i128 %mulx, 64
+  %high = trunc i128 %highx to i64
+  ret i64 %high
+}
+
+; Check the next doubleword up, which requires separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f8(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r4, 524288
+; CHECK: mlg %r2, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %b = load i64 *%ptr
+  %ax = zext i64 %a to i128
+  %bx = zext i64 %b to i128
+  %mulx = mul i128 %ax, %bx
+  %highx = lshr i128 %mulx, 64
+  %high = trunc i128 %highx to i64
+  ret i64 %high
+}
+
+; Check the high end of the negative aligned MLG range.
+define i64 @f9(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f9:
+; CHECK: mlg %r2, -8(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -1
+  %b = load i64 *%ptr
+  %ax = zext i64 %a to i128
+  %bx = zext i64 %b to i128
+  %mulx = mul i128 %ax, %bx
+  %highx = lshr i128 %mulx, 64
+  %high = trunc i128 %highx to i64
+  ret i64 %high
+}
+
+; Check the low end of the MLG range.
+define i64 @f10(i64 %dummy, i64 %a, i64 *%src) {
+; CHECK: f10:
+; CHECK: mlg %r2, -524288(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %b = load i64 *%ptr
+  %ax = zext i64 %a to i128
+  %bx = zext i64 %b to i128
+  %mulx = mul i128 %ax, %bx
+  %highx = lshr i128 %mulx, 64
+  %high = trunc i128 %highx to i64
+  ret i64 %high
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f11(i64 *%dest, i64 %a, i64 *%src) {
+; CHECK: f11:
+; CHECK: agfi %r4, -524296
+; CHECK: mlg %r2, 0(%r4)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %b = load i64 *%ptr
+  %ax = zext i64 %a to i128
+  %bx = zext i64 %b to i128
+  %mulx = mul i128 %ax, %bx
+  %highx = lshr i128 %mulx, 64
+  %high = trunc i128 %highx to i64
+  ret i64 %high
+}
+
+; Check that MLG allows an index.
+define i64 @f12(i64 *%dest, i64 %a, i64 %src, i64 %index) {
+; CHECK: f12:
+; CHECK: mlg %r2, 524287(%r5,%r4)
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524287
+  %ptr = inttoptr i64 %add2 to i64 *
+  %b = load i64 *%ptr
+  %ax = zext i64 %a to i128
+  %bx = zext i64 %b to i128
+  %mulx = mul i128 %ax, %bx
+  %highx = lshr i128 %mulx, 64
+  %high = trunc i128 %highx to i64
+  ret i64 %high
+}
diff --git a/test/CodeGen/SystemZ/int-neg-01.ll b/test/CodeGen/SystemZ/int-neg-01.ll
new file mode 100644
index 0000000..6114f4e
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-neg-01.ll
@@ -0,0 +1,42 @@
+; Test integer negation.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test i32->i32 negation.
+define i32 @f1(i32 %val) {
+; CHECK: f1:
+; CHECK: lcr %r2, %r2
+; CHECK: br %r14
+  %neg = sub i32 0, %val
+  ret i32 %neg
+}
+
+; Test i32->i64 negation.
+define i64 @f2(i32 %val) {
+; CHECK: f2:
+; CHECK: lcgfr %r2, %r2
+; CHECK: br %r14
+  %ext = sext i32 %val to i64
+  %neg = sub i64 0, %ext
+  ret i64 %neg
+}
+
+; Test i32->i64 negation that uses an "in-register" form of sign extension.
+define i64 @f3(i64 %val) {
+; CHECK: f3:
+; CHECK: lcgfr %r2, %r2
+; CHECK: br %r14
+  %trunc = trunc i64 %val to i32
+  %ext = sext i32 %trunc to i64
+  %neg = sub i64 0, %ext
+  ret i64 %neg
+}
+
+; Test i64 negation.
+define i64 @f4(i64 %val) {
+; CHECK: f4:
+; CHECK: lcgr %r2, %r2
+; CHECK: br %r14
+  %neg = sub i64 0, %val
+  ret i64 %neg
+}
diff --git a/test/CodeGen/SystemZ/int-sub-01.ll b/test/CodeGen/SystemZ/int-sub-01.ll
new file mode 100644
index 0000000..9a73814
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-sub-01.ll
@@ -0,0 +1,129 @@
+; Test 32-bit subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check SR.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: sr %r2, %r3
+; CHECK: br %r14
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
+
+; Check the low end of the S range.
+define i32 @f2(i32 %a, i32 *%src) {
+; CHECK: f2:
+; CHECK: s %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
+
+; Check the high end of the aligned S range.
+define i32 @f3(i32 %a, i32 *%src) {
+; CHECK: f3:
+; CHECK: s %r2, 4092(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1023
+  %b = load i32 *%ptr
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
+
+; Check the next word up, which should use SY instead of S.
+define i32 @f4(i32 %a, i32 *%src) {
+; CHECK: f4:
+; CHECK: sy %r2, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1024
+  %b = load i32 *%ptr
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
+
+; Check the high end of the aligned SY range.
+define i32 @f5(i32 %a, i32 *%src) {
+; CHECK: f5:
+; CHECK: sy %r2, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f6(i32 %a, i32 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r3, 524288
+; CHECK: s %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
+
+; Check the high end of the negative aligned SY range.
+define i32 @f7(i32 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK: sy %r2, -4(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
+
+; Check the low end of the SY range.
+define i32 @f8(i32 %a, i32 *%src) {
+; CHECK: f8:
+; CHECK: sy %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f9(i32 %a, i32 *%src) {
+; CHECK: f9:
+; CHECK: agfi %r3, -524292
+; CHECK: s %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
+
+; Check that S allows an index.
+define i32 @f10(i32 %a, i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: s %r2, 4092({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4092
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
+
+; Check that SY allows an index.
+define i32 @f11(i32 %a, i64 %src, i64 %index) {
+; CHECK: f11:
+; CHECK: sy %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
diff --git a/test/CodeGen/SystemZ/int-sub-02.ll b/test/CodeGen/SystemZ/int-sub-02.ll
new file mode 100644
index 0000000..5150a96
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-sub-02.ll
@@ -0,0 +1,102 @@
+; Test subtractions of a sign-extended i32 from an i64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check SGFR.
+define i64 @f1(i64 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: sgfr %r2, %r3
+; CHECK: br %r14
+  %bext = sext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check SGF with no displacement.
+define i64 @f2(i64 %a, i32 *%src) {
+; CHECK: f2:
+; CHECK: sgf %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %bext = sext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check the high end of the aligned SGF range.
+define i64 @f3(i64 %a, i32 *%src) {
+; CHECK: f3:
+; CHECK: sgf %r2, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i32 *%src) {
+; CHECK: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: sgf %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check the high end of the negative aligned SGF range.
+define i64 @f5(i64 %a, i32 *%src) {
+; CHECK: f5:
+; CHECK: sgf %r2, -4(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check the low end of the SGF range.
+define i64 @f6(i64 %a, i32 *%src) {
+; CHECK: f6:
+; CHECK: sgf %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f7(i64 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK: agfi %r3, -524292
+; CHECK: sgf %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check that SGF allows an index.
+define i64 @f8(i64 %a, i64 %src, i64 %index) {
+; CHECK: f8:
+; CHECK: sgf %r2, 524284({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524284
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %bext = sext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
diff --git a/test/CodeGen/SystemZ/int-sub-03.ll b/test/CodeGen/SystemZ/int-sub-03.ll
new file mode 100644
index 0000000..73571b3
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-sub-03.ll
@@ -0,0 +1,102 @@
+; Test subtractions of a zero-extended i32 from an i64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check SLGFR.
+define i64 @f1(i64 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: slgfr %r2, %r3
+; CHECK: br %r14
+  %bext = zext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check SLGF with no displacement.
+define i64 @f2(i64 %a, i32 *%src) {
+; CHECK: f2:
+; CHECK: slgf %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %bext = zext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check the high end of the aligned SLGF range.
+define i64 @f3(i64 %a, i32 *%src) {
+; CHECK: f3:
+; CHECK: slgf %r2, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i32 *%src) {
+; CHECK: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: slgf %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check the high end of the negative aligned SLGF range.
+define i64 @f5(i64 %a, i32 *%src) {
+; CHECK: f5:
+; CHECK: slgf %r2, -4(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check the low end of the SLGF range.
+define i64 @f6(i64 %a, i32 *%src) {
+; CHECK: f6:
+; CHECK: slgf %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f7(i64 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK: agfi %r3, -524292
+; CHECK: slgf %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
+
+; Check that SLGF allows an index.
+define i64 @f8(i64 %a, i64 %src, i64 %index) {
+; CHECK: f8:
+; CHECK: slgf %r2, 524284({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524284
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i64
+  %sub = sub i64 %a, %bext
+  ret i64 %sub
+}
diff --git a/test/CodeGen/SystemZ/int-sub-04.ll b/test/CodeGen/SystemZ/int-sub-04.ll
new file mode 100644
index 0000000..545d342
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-sub-04.ll
@@ -0,0 +1,94 @@
+; Test 64-bit subtraction in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check SGR.
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: sgr %r2, %r3
+; CHECK: br %r14
+  %sub = sub i64 %a, %b
+  ret i64 %sub
+}
+
+; Check SG with no displacement.
+define i64 @f2(i64 %a, i64 *%src) {
+; CHECK: f2:
+; CHECK: sg %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i64 *%src
+  %sub = sub i64 %a, %b
+  ret i64 %sub
+}
+
+; Check the high end of the aligned SG range.
+define i64 @f3(i64 %a, i64 *%src) {
+; CHECK: f3:
+; CHECK: sg %r2, 524280(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %b = load i64 *%ptr
+  %sub = sub i64 %a, %b
+  ret i64 %sub
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i64 *%src) {
+; CHECK: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: sg %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %b = load i64 *%ptr
+  %sub = sub i64 %a, %b
+  ret i64 %sub
+}
+
+; Check the high end of the negative aligned SG range.
+define i64 @f5(i64 %a, i64 *%src) {
+; CHECK: f5:
+; CHECK: sg %r2, -8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -1
+  %b = load i64 *%ptr
+  %sub = sub i64 %a, %b
+  ret i64 %sub
+}
+
+; Check the low end of the SG range.
+define i64 @f6(i64 %a, i64 *%src) {
+; CHECK: f6:
+; CHECK: sg %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %b = load i64 *%ptr
+  %sub = sub i64 %a, %b
+  ret i64 %sub
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f7(i64 %a, i64 *%src) {
+; CHECK: f7:
+; CHECK: agfi %r3, -524296
+; CHECK: sg %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %b = load i64 *%ptr
+  %sub = sub i64 %a, %b
+  ret i64 %sub
+}
+
+; Check that SG allows an index.
+define i64 @f8(i64 %a, i64 %src, i64 %index) {
+; CHECK: f8:
+; CHECK: sg %r2, 524280({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524280
+  %ptr = inttoptr i64 %add2 to i64 *
+  %b = load i64 *%ptr
+  %sub = sub i64 %a, %b
+  ret i64 %sub
+}
diff --git a/test/CodeGen/SystemZ/int-sub-05.ll b/test/CodeGen/SystemZ/int-sub-05.ll
new file mode 100644
index 0000000..1475b24
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-sub-05.ll
@@ -0,0 +1,118 @@
+; Test 128-bit addition in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Test register addition.
+define void @f1(i128 *%ptr, i64 %high, i64 %low) {
+; CHECK: f1:
+; CHECK: slgr {{%r[0-5]}}, %r4
+; CHECK: slbgr {{%r[0-5]}}, %r3
+; CHECK: br %r14
+  %a = load i128 *%ptr
+  %highx = zext i64 %high to i128
+  %lowx = zext i64 %low to i128
+  %bhigh = shl i128 %highx, 64
+  %b = or i128 %bhigh, %lowx
+  %sub = sub i128 %a, %b
+  store i128 %sub, i128 *%ptr
+  ret void
+}
+
+; Test memory addition with no offset.
+define void @f2(i64 %addr) {
+; CHECK: f2:
+; CHECK: slg {{%r[0-5]}}, 8(%r2)
+; CHECK: slbg {{%r[0-5]}}, 0(%r2)
+; CHECK: br %r14
+  %bptr = inttoptr i64 %addr to i128 *
+  %aptr = getelementptr i128 *%bptr, i64 -8
+  %a = load i128 *%aptr
+  %b = load i128 *%bptr
+  %sub = sub i128 %a, %b
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Test the highest aligned offset that is in range of both SLG and SLBG.
+define void @f3(i64 %base) {
+; CHECK: f3:
+; CHECK: slg {{%r[0-5]}}, 524280(%r2)
+; CHECK: slbg {{%r[0-5]}}, 524272(%r2)
+; CHECK: br %r14
+  %addr = add i64 %base, 524272
+  %bptr = inttoptr i64 %addr to i128 *
+  %aptr = getelementptr i128 *%bptr, i64 -8
+  %a = load i128 *%aptr
+  %b = load i128 *%bptr
+  %sub = sub i128 %a, %b
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Test the next doubleword up, which requires separate address logic for SLG.
+define void @f4(i64 %base) {
+; CHECK: f4:
+; CHECK: lgr [[BASE:%r[1-5]]], %r2
+; CHECK: agfi [[BASE]], 524288
+; CHECK: slg {{%r[0-5]}}, 0([[BASE]])
+; CHECK: slbg {{%r[0-5]}}, 524280(%r2)
+; CHECK: br %r14
+  %addr = add i64 %base, 524280
+  %bptr = inttoptr i64 %addr to i128 *
+  %aptr = getelementptr i128 *%bptr, i64 -8
+  %a = load i128 *%aptr
+  %b = load i128 *%bptr
+  %sub = sub i128 %a, %b
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Test the next doubleword after that, which requires separate logic for
+; both instructions.  It would be better to create an anchor at 524288
+; that both instructions can use, but that isn't implemented yet.
+define void @f5(i64 %base) {
+; CHECK: f5:
+; CHECK: slg {{%r[0-5]}}, 0({{%r[1-5]}})
+; CHECK: slbg {{%r[0-5]}}, 0({{%r[1-5]}})
+; CHECK: br %r14
+  %addr = add i64 %base, 524288
+  %bptr = inttoptr i64 %addr to i128 *
+  %aptr = getelementptr i128 *%bptr, i64 -8
+  %a = load i128 *%aptr
+  %b = load i128 *%bptr
+  %sub = sub i128 %a, %b
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Test the lowest displacement that is in range of both SLG and SLBG.
+define void @f6(i64 %base) {
+; CHECK: f6:
+; CHECK: slg {{%r[0-5]}}, -524280(%r2)
+; CHECK: slbg {{%r[0-5]}}, -524288(%r2)
+; CHECK: br %r14
+  %addr = add i64 %base, -524288
+  %bptr = inttoptr i64 %addr to i128 *
+  %aptr = getelementptr i128 *%bptr, i64 -8
+  %a = load i128 *%aptr
+  %b = load i128 *%bptr
+  %sub = sub i128 %a, %b
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Test the next doubleword down, which is out of range of the SLBG.
+define void @f7(i64 %base) {
+; CHECK: f7:
+; CHECK: slg {{%r[0-5]}}, -524288(%r2)
+; CHECK: slbg {{%r[0-5]}}, 0({{%r[1-5]}})
+; CHECK: br %r14
+  %addr = add i64 %base, -524296
+  %bptr = inttoptr i64 %addr to i128 *
+  %aptr = getelementptr i128 *%bptr, i64 -8
+  %a = load i128 *%aptr
+  %b = load i128 *%bptr
+  %sub = sub i128 %a, %b
+  store i128 %sub, i128 *%aptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/int-sub-06.ll b/test/CodeGen/SystemZ/int-sub-06.ll
new file mode 100644
index 0000000..0e04d51
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-sub-06.ll
@@ -0,0 +1,165 @@
+; Test 128-bit addition in which the second operand is a zero-extended i32.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check register additions.  The XOR ensures that we don't instead zero-extend
+; %b into a register and use memory addition.
+define void @f1(i128 *%aptr, i32 %b) {
+; CHECK: f1:
+; CHECK: slgfr {{%r[0-5]}}, %r3
+; CHECK: slbgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %bext = zext i32 %b to i128
+  %sub = sub i128 %xor, %bext
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Like f1, but using an "in-register" extension.
+define void @f2(i128 *%aptr, i64 %b) {
+; CHECK: f2:
+; CHECK: slgfr {{%r[0-5]}}, %r3
+; CHECK: slbgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %trunc = trunc i64 %b to i32
+  %bext = zext i32 %trunc to i128
+  %sub = sub i128 %xor, %bext
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Test register addition in cases where the second operand is zero extended
+; from i64 rather than i32, but is later masked to i32 range.
+define void @f3(i128 *%aptr, i64 %b) {
+; CHECK: f3:
+; CHECK: slgfr {{%r[0-5]}}, %r3
+; CHECK: slbgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %bext = zext i64 %b to i128
+  %and = and i128 %bext, 4294967295
+  %sub = sub i128 %xor, %and
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Test SLGF with no offset.
+define void @f4(i128 *%aptr, i32 *%bsrc) {
+; CHECK: f4:
+; CHECK: slgf {{%r[0-5]}}, 0(%r3)
+; CHECK: slbgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %b = load i32 *%bsrc
+  %bext = zext i32 %b to i128
+  %sub = sub i128 %xor, %bext
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Check the high end of the SLGF range.
+define void @f5(i128 *%aptr, i32 *%bsrc) {
+; CHECK: f5:
+; CHECK: slgf {{%r[0-5]}}, 524284(%r3)
+; CHECK: slbgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %ptr = getelementptr i32 *%bsrc, i64 131071
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i128
+  %sub = sub i128 %xor, %bext
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Check the next word up, which must use separate address logic.
+; Other sequences besides this one would be OK.
+define void @f6(i128 *%aptr, i32 *%bsrc) {
+; CHECK: f6:
+; CHECK: agfi %r3, 524288
+; CHECK: slgf {{%r[0-5]}}, 0(%r3)
+; CHECK: slbgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %ptr = getelementptr i32 *%bsrc, i64 131072
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i128
+  %sub = sub i128 %xor, %bext
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Check the high end of the negative aligned SLGF range.
+define void @f7(i128 *%aptr, i32 *%bsrc) {
+; CHECK: f7:
+; CHECK: slgf {{%r[0-5]}}, -4(%r3)
+; CHECK: slbgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %ptr = getelementptr i32 *%bsrc, i128 -1
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i128
+  %sub = sub i128 %xor, %bext
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Check the low end of the SLGF range.
+define void @f8(i128 *%aptr, i32 *%bsrc) {
+; CHECK: f8:
+; CHECK: slgf {{%r[0-5]}}, -524288(%r3)
+; CHECK: slbgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %ptr = getelementptr i32 *%bsrc, i128 -131072
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i128
+  %sub = sub i128 %xor, %bext
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f9(i128 *%aptr, i32 *%bsrc) {
+; CHECK: f9:
+; CHECK: agfi %r3, -524292
+; CHECK: slgf {{%r[0-5]}}, 0(%r3)
+; CHECK: slbgr
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %ptr = getelementptr i32 *%bsrc, i128 -131073
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i128
+  %sub = sub i128 %xor, %bext
+  store i128 %sub, i128 *%aptr
+  ret void
+}
+
+; Check that SLGF allows an index.
+define void @f10(i128 *%aptr, i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: slgf {{%r[0-5]}}, 524284({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %a = load i128 *%aptr
+  %xor = xor i128 %a, 127
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524284
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %bext = zext i32 %b to i128
+  %sub = sub i128 %xor, %bext
+  store i128 %sub, i128 *%aptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/la-01.ll b/test/CodeGen/SystemZ/la-01.ll
new file mode 100644
index 0000000..b43e3f8
--- /dev/null
+++ b/test/CodeGen/SystemZ/la-01.ll
@@ -0,0 +1,80 @@
+; Test loads of symbolic addresses when generating small-model non-PIC.
+; All addresses can be treated as PC
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+@e4 = external global i32
+@d4 = global i32 1
+@e2 = external global i32, align 2
+@d2 = global i32 1, align 2
+@e1 = external global i32, align 1
+@d1 = global i32 1, align 1
+
+declare void @ef()
+define void @df() {
+  ret void
+}
+
+; Test a load of a fully-aligned external variable.
+define i32 *@f1() {
+; CHECK: f1:
+; CHECK: larl %r2, e4
+; CHECK-NEXT: br %r14
+  ret i32 *@e4
+}
+
+; Test a load of a fully-aligned local variable.
+define i32 *@f2() {
+; CHECK: f2:
+; CHECK: larl %r2, d4
+; CHECK-NEXT: br %r14
+  ret i32 *@d4
+}
+
+; Test a load of a 2-byte-aligned external variable.
+define i32 *@f3() {
+; CHECK: f3:
+; CHECK: larl %r2, e2
+; CHECK-NEXT: br %r14
+  ret i32 *@e2
+}
+
+; Test a load of a 2-byte-aligned local variable.
+define i32 *@f4() {
+; CHECK: f4:
+; CHECK: larl %r2, d2
+; CHECK-NEXT: br %r14
+  ret i32 *@d2
+}
+
+; Test a load of an unaligned external variable, which must go via the GOT.
+define i32 *@f5() {
+; CHECK: f5:
+; CHECK: lgrl %r2, e1@GOT
+; CHECK-NEXT: br %r14
+  ret i32 *@e1
+}
+
+; Test a load of an unaligned local variable, which must go via the GOT.
+define i32 *@f6() {
+; CHECK: f6:
+; CHECK: lgrl %r2, d1@GOT
+; CHECK-NEXT: br %r14
+  ret i32 *@d1
+}
+
+; Test a load of an external function.
+define void() *@f7() {
+; CHECK: f7:
+; CHECK: larl %r2, ef
+; CHECK-NEXT: br %r14
+  ret void() *@ef
+}
+
+; Test a load of a local function.
+define void() *@f8() {
+; CHECK: f8:
+; CHECK: larl %r2, df
+; CHECK-NEXT: br %r14
+  ret void() *@df
+}
diff --git a/test/CodeGen/SystemZ/la-02.ll b/test/CodeGen/SystemZ/la-02.ll
new file mode 100644
index 0000000..4c5374a
--- /dev/null
+++ b/test/CodeGen/SystemZ/la-02.ll
@@ -0,0 +1,87 @@
+; Test loads of symbolic addresses when generating medium- and
+; large-model non-PIC.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -code-model=medium | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -code-model=large | FileCheck %s
+
+@ev = external global i32
+@dv = global i32 0
+@pv = protected global i32 0
+@hv = hidden global i32 0
+
+declare void @ef()
+define void @df() {
+  ret void
+}
+define protected void @pf() {
+  ret void
+}
+define hidden void @hf() {
+  ret void
+}
+
+; Test loads of external variables.  There is no guarantee that the
+; variable will be in range of LARL.
+define i32 *@f1() {
+; CHECK: f1:
+; CHECK: lgrl %r2, ev@GOT
+; CHECK: br %r14
+  ret i32 *@ev
+}
+
+; ...likewise locally-defined normal-visibility variables.
+define i32 *@f2() {
+; CHECK: f2:
+; CHECK: lgrl %r2, dv@GOT
+; CHECK: br %r14
+  ret i32 *@dv
+}
+
+; ...likewise protected variables.
+define i32 *@f3() {
+; CHECK: f3:
+; CHECK: lgrl %r2, pv@GOT
+; CHECK: br %r14
+  ret i32 *@pv
+}
+
+; ...likewise hidden variables.
+define i32 *@f4() {
+; CHECK: f4:
+; CHECK: lgrl %r2, hv@GOT
+; CHECK: br %r14
+  ret i32 *@hv
+}
+
+; Check loads of external functions.  This could use LARL, but we don't have
+; code to detect that yet.
+define void() *@f5() {
+; CHECK: f5:
+; CHECK: lgrl %r2, ef@GOT
+; CHECK: br %r14
+  ret void() *@ef
+}
+
+; ...likewise locally-defined normal-visibility functions.
+define void() *@f6() {
+; CHECK: f6:
+; CHECK: lgrl %r2, df@GOT
+; CHECK: br %r14
+  ret void() *@df
+}
+
+; ...likewise protected functions.
+define void() *@f7() {
+; CHECK: f7:
+; CHECK: lgrl %r2, pf@GOT
+; CHECK: br %r14
+  ret void() *@pf
+}
+
+; ...likewise hidden functions.
+define void() *@f8() {
+; CHECK: f8:
+; CHECK: lgrl %r2, hf@GOT
+; CHECK: br %r14
+  ret void() *@hf
+}
diff --git a/test/CodeGen/SystemZ/la-03.ll b/test/CodeGen/SystemZ/la-03.ll
new file mode 100644
index 0000000..9449b2b
--- /dev/null
+++ b/test/CodeGen/SystemZ/la-03.ll
@@ -0,0 +1,85 @@
+; Test loads of symbolic addresses in PIC code.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s
+
+@ev = external global i32
+@dv = global i32 0
+@pv = protected global i32 0
+@hv = hidden global i32 0
+
+declare void @ef()
+define void @df() {
+  ret void
+}
+define protected void @pf() {
+  ret void
+}
+define hidden void @hf() {
+  ret void
+}
+
+; Test loads of external variables, which must go via the GOT.
+define i32 *@f1() {
+; CHECK: f1:
+; CHECK: lgrl %r2, ev@GOT
+; CHECK: br %r14
+  ret i32 *@ev
+}
+
+; Check loads of locally-defined normal-visibility variables, which might
+; be overridden.  The load must go via the GOT.
+define i32 *@f2() {
+; CHECK: f2:
+; CHECK: lgrl %r2, dv@GOT
+; CHECK: br %r14
+  ret i32 *@dv
+}
+
+; Check loads of protected variables, which in the small code model
+; must be in range of LARL.
+define i32 *@f3() {
+; CHECK: f3:
+; CHECK: larl %r2, pv
+; CHECK: br %r14
+  ret i32 *@pv
+}
+
+; ...likewise hidden variables.
+define i32 *@f4() {
+; CHECK: f4:
+; CHECK: larl %r2, hv
+; CHECK: br %r14
+  ret i32 *@hv
+}
+
+; Like f1, but for functions.
+define void() *@f5() {
+; CHECK: f5:
+; CHECK: lgrl %r2, ef@GOT
+; CHECK: br %r14
+  ret void() *@ef
+}
+
+; Like f2, but for functions.
+define void() *@f6() {
+; CHECK: f6:
+; CHECK: lgrl %r2, df@GOT
+; CHECK: br %r14
+  ret void() *@df
+}
+
+; Like f3, but for functions.
+define void() *@f7() {
+; CHECK: f7:
+; CHECK: larl %r2, pf
+; CHECK: br %r14
+  ret void() *@pf
+}
+
+; Like f4, but for functions.
+define void() *@f8() {
+; CHECK: f8:
+; CHECK: larl %r2, hf
+; CHECK: br %r14
+  ret void() *@hf
+}
diff --git a/test/CodeGen/SystemZ/la-04.ll b/test/CodeGen/SystemZ/la-04.ll
new file mode 100644
index 0000000..4c36364
--- /dev/null
+++ b/test/CodeGen/SystemZ/la-04.ll
@@ -0,0 +1,18 @@
+; Test blockaddress.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Do some arbitrary work and return the address of the following label.
+define i8 *@f1(i8 *%addr) {
+; CHECK: f1:
+; CHECK: mvi 0(%r2), 1
+; CHECK: [[LABEL:\.L.*]]:
+; CHECK: larl %r2, [[LABEL]]
+; CHECK: br %r14
+entry:
+  store i8 1, i8 *%addr
+  br label %b.lab
+
+b.lab:
+  ret i8 *blockaddress(@f1, %b.lab)
+}
diff --git a/test/CodeGen/SystemZ/lit.local.cfg b/test/CodeGen/SystemZ/lit.local.cfg
new file mode 100644
index 0000000..79528d1
--- /dev/null
+++ b/test/CodeGen/SystemZ/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'SystemZ' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/SystemZ/or-01.ll b/test/CodeGen/SystemZ/or-01.ll
new file mode 100644
index 0000000..20c9312
--- /dev/null
+++ b/test/CodeGen/SystemZ/or-01.ll
@@ -0,0 +1,129 @@
+; Test 32-bit ORs in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check OR.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: or %r2, %r3
+; CHECK: br %r14
+  %or = or i32 %a, %b
+  ret i32 %or
+}
+
+; Check the low end of the O range.
+define i32 @f2(i32 %a, i32 *%src) {
+; CHECK: f2:
+; CHECK: o %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %or = or i32 %a, %b
+  ret i32 %or
+}
+
+; Check the high end of the aligned O range.
+define i32 @f3(i32 %a, i32 *%src) {
+; CHECK: f3:
+; CHECK: o %r2, 4092(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1023
+  %b = load i32 *%ptr
+  %or = or i32 %a, %b
+  ret i32 %or
+}
+
+; Check the next word up, which should use OY instead of O.
+define i32 @f4(i32 %a, i32 *%src) {
+; CHECK: f4:
+; CHECK: oy %r2, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1024
+  %b = load i32 *%ptr
+  %or = or i32 %a, %b
+  ret i32 %or
+}
+
+; Check the high end of the aligned OY range.
+define i32 @f5(i32 %a, i32 *%src) {
+; CHECK: f5:
+; CHECK: oy %r2, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %or = or i32 %a, %b
+  ret i32 %or
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f6(i32 %a, i32 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r3, 524288
+; CHECK: o %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %or = or i32 %a, %b
+  ret i32 %or
+}
+
+; Check the high end of the negative aligned OY range.
+define i32 @f7(i32 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK: oy %r2, -4(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %or = or i32 %a, %b
+  ret i32 %or
+}
+
+; Check the low end of the OY range.
+define i32 @f8(i32 %a, i32 *%src) {
+; CHECK: f8:
+; CHECK: oy %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %or = or i32 %a, %b
+  ret i32 %or
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f9(i32 %a, i32 *%src) {
+; CHECK: f9:
+; CHECK: agfi %r3, -524292
+; CHECK: o %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %or = or i32 %a, %b
+  ret i32 %or
+}
+
+; Check that O allows an index.
+define i32 @f10(i32 %a, i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: o %r2, 4092({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4092
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %or = or i32 %a, %b
+  ret i32 %or
+}
+
+; Check that OY allows an index.
+define i32 @f11(i32 %a, i64 %src, i64 %index) {
+; CHECK: f11:
+; CHECK: oy %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %or = or i32 %a, %b
+  ret i32 %or
+}
diff --git a/test/CodeGen/SystemZ/or-02.ll b/test/CodeGen/SystemZ/or-02.ll
new file mode 100644
index 0000000..377a3e6
--- /dev/null
+++ b/test/CodeGen/SystemZ/or-02.ll
@@ -0,0 +1,66 @@
+; Test 32-bit ORs in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the lowest useful OILL value.
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: oill %r2, 1
+; CHECK: br %r14
+  %or = or i32 %a, 1
+  ret i32 %or
+}
+
+; Check the high end of the OILL range.
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: oill %r2, 65535
+; CHECK: br %r14
+  %or = or i32 %a, 65535
+  ret i32 %or
+}
+
+; Check the lowest useful OILH range, which is the next value up.
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: oilh %r2, 1
+; CHECK: br %r14
+  %or = or i32 %a, 65536
+  ret i32 %or
+}
+
+; Check the lowest useful OILF value, which is the next value up again.
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: oilf %r2, 65537
+; CHECK: br %r14
+  %or = or i32 %a, 65537
+  ret i32 %or
+}
+
+; Check the high end of the OILH range.
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: oilh %r2, 65535
+; CHECK: br %r14
+  %or = or i32 %a, -65536
+  ret i32 %or
+}
+
+; Check the next value up, which must use OILF instead.
+define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK: oilf %r2, 4294901761
+; CHECK: br %r14
+  %or = or i32 %a, -65535
+  ret i32 %or
+}
+
+; Check the highest useful OILF value.
+define i32 @f7(i32 %a) {
+; CHECK: f7:
+; CHECK: oilf %r2, 4294967294
+; CHECK: br %r14
+  %or = or i32 %a, -2
+  ret i32 %or
+}
diff --git a/test/CodeGen/SystemZ/or-03.ll b/test/CodeGen/SystemZ/or-03.ll
new file mode 100644
index 0000000..16f84f1
--- /dev/null
+++ b/test/CodeGen/SystemZ/or-03.ll
@@ -0,0 +1,94 @@
+; Test 64-bit ORs in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check OGR.
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: ogr %r2, %r3
+; CHECK: br %r14
+  %or = or i64 %a, %b
+  ret i64 %or
+}
+
+; Check OG with no displacement.
+define i64 @f2(i64 %a, i64 *%src) {
+; CHECK: f2:
+; CHECK: og %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i64 *%src
+  %or = or i64 %a, %b
+  ret i64 %or
+}
+
+; Check the high end of the aligned OG range.
+define i64 @f3(i64 %a, i64 *%src) {
+; CHECK: f3:
+; CHECK: og %r2, 524280(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %b = load i64 *%ptr
+  %or = or i64 %a, %b
+  ret i64 %or
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i64 *%src) {
+; CHECK: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: og %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %b = load i64 *%ptr
+  %or = or i64 %a, %b
+  ret i64 %or
+}
+
+; Check the high end of the negative aligned OG range.
+define i64 @f5(i64 %a, i64 *%src) {
+; CHECK: f5:
+; CHECK: og %r2, -8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -1
+  %b = load i64 *%ptr
+  %or = or i64 %a, %b
+  ret i64 %or
+}
+
+; Check the low end of the OG range.
+define i64 @f6(i64 %a, i64 *%src) {
+; CHECK: f6:
+; CHECK: og %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %b = load i64 *%ptr
+  %or = or i64 %a, %b
+  ret i64 %or
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f7(i64 %a, i64 *%src) {
+; CHECK: f7:
+; CHECK: agfi %r3, -524296
+; CHECK: og %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %b = load i64 *%ptr
+  %or = or i64 %a, %b
+  ret i64 %or
+}
+
+; Check that OG allows an index.
+define i64 @f8(i64 %a, i64 %src, i64 %index) {
+; CHECK: f8:
+; CHECK: og %r2, 524280({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524280
+  %ptr = inttoptr i64 %add2 to i64 *
+  %b = load i64 *%ptr
+  %or = or i64 %a, %b
+  ret i64 %or
+}
diff --git a/test/CodeGen/SystemZ/or-04.ll b/test/CodeGen/SystemZ/or-04.ll
new file mode 100644
index 0000000..a827842
--- /dev/null
+++ b/test/CodeGen/SystemZ/or-04.ll
@@ -0,0 +1,182 @@
+; Test 64-bit ORs in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the lowest useful OILL value.
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: oill %r2, 1
+; CHECK: br %r14
+  %or = or i64 %a, 1
+  ret i64 %or
+}
+
+; Check the high end of the OILL range.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: oill %r2, 65535
+; CHECK: br %r14
+  %or = or i64 %a, 65535
+  ret i64 %or
+}
+
+; Check the lowest useful OILH value, which is the next value up.
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: oilh %r2, 1
+; CHECK: br %r14
+  %or = or i64 %a, 65536
+  ret i64 %or
+}
+
+; Check the lowest useful OILF value, which is the next value up again.
+define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: oilf %r2, 4294901759
+; CHECK: br %r14
+  %or = or i64 %a, 4294901759
+  ret i64 %or
+}
+
+; Check the high end of the OILH range.
+define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: oilh %r2, 65535
+; CHECK: br %r14
+  %or = or i64 %a, 4294901760
+  ret i64 %or
+}
+
+; Check the high end of the OILF range.
+define i64 @f6(i64 %a) {
+; CHECK: f6:
+; CHECK: oilf %r2, 4294967295
+; CHECK: br %r14
+  %or = or i64 %a, 4294967295
+  ret i64 %or
+}
+
+; Check the lowest useful OIHL value, which is the next value up.
+define i64 @f7(i64 %a) {
+; CHECK: f7:
+; CHECK: oihl %r2, 1
+; CHECK: br %r14
+  %or = or i64 %a, 4294967296
+  ret i64 %or
+}
+
+; Check the next value up again, which must use two ORs.
+define i64 @f8(i64 %a) {
+; CHECK: f8:
+; CHECK: oihl %r2, 1
+; CHECK: oill %r2, 1
+; CHECK: br %r14
+  %or = or i64 %a, 4294967297
+  ret i64 %or
+}
+
+; Check the high end of the OILL range.
+define i64 @f9(i64 %a) {
+; CHECK: f9:
+; CHECK: oihl %r2, 1
+; CHECK: oill %r2, 65535
+; CHECK: br %r14
+  %or = or i64 %a, 4295032831
+  ret i64 %or
+}
+
+; Check the next value up, which must use OILH
+define i64 @f10(i64 %a) {
+; CHECK: f10:
+; CHECK: oihl %r2, 1
+; CHECK: oilh %r2, 1
+; CHECK: br %r14
+  %or = or i64 %a, 4295032832
+  ret i64 %or
+}
+
+; Check the next value up again, which must use OILF
+define i64 @f11(i64 %a) {
+; CHECK: f11:
+; CHECK: oihl %r2, 1
+; CHECK: oilf %r2, 65537
+; CHECK: br %r14
+  %or = or i64 %a, 4295032833
+  ret i64 %or
+}
+
+; Check the high end of the OIHL range.
+define i64 @f12(i64 %a) {
+; CHECK: f12:
+; CHECK: oihl %r2, 65535
+; CHECK: br %r14
+  %or = or i64 %a, 281470681743360
+  ret i64 %or
+}
+
+; Check a combination of the high end of the OIHL range and the high end
+; of the OILF range.
+define i64 @f13(i64 %a) {
+; CHECK: f13:
+; CHECK: oihl %r2, 65535
+; CHECK: oilf %r2, 4294967295
+; CHECK: br %r14
+  %or = or i64 %a, 281474976710655
+  ret i64 %or
+}
+
+; Check the lowest useful OIHH value.
+define i64 @f14(i64 %a) {
+; CHECK: f14:
+; CHECK: oihh %r2, 1
+; CHECK: br %r14
+  %or = or i64 %a, 281474976710656
+  ret i64 %or
+}
+
+; Check the next value up, which needs two ORs.
+define i64 @f15(i64 %a) {
+; CHECK: f15:
+; CHECK: oihh %r2, 1
+; CHECK: oill %r2, 1
+; CHECK: br %r14
+  %or = or i64 %a, 281474976710657
+  ret i64 %or
+}
+
+; Check the lowest useful OIHF value.
+define i64 @f16(i64 %a) {
+; CHECK: f16:
+; CHECK: oihf %r2, 65537
+; CHECK: br %r14
+  %or = or i64 %a, 281479271677952
+  ret i64 %or
+}
+
+; Check the high end of the OIHH range.
+define i64 @f17(i64 %a) {
+; CHECK: f17:
+; CHECK: oihh %r2, 65535
+; CHECK: br %r14
+  %or = or i64 %a, 18446462598732840960
+  ret i64 %or
+}
+
+; Check the high end of the OIHF range.
+define i64 @f18(i64 %a) {
+; CHECK: f18:
+; CHECK: oihf %r2, 4294967295
+; CHECK: br %r14
+  %or = or i64 %a, -4294967296
+  ret i64 %or
+}
+
+; Check the highest useful OR value.
+define i64 @f19(i64 %a) {
+; CHECK: f19:
+; CHECK: oihf %r2, 4294967295
+; CHECK: oilf %r2, 4294967294
+; CHECK: br %r14
+  %or = or i64 %a, -2
+  ret i64 %or
+}
diff --git a/test/CodeGen/SystemZ/or-05.ll b/test/CodeGen/SystemZ/or-05.ll
new file mode 100644
index 0000000..9b6c10d
--- /dev/null
+++ b/test/CodeGen/SystemZ/or-05.ll
@@ -0,0 +1,165 @@
+; Test ORs of a constant into a byte of memory.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the lowest useful constant, expressed as a signed integer.
+define void @f1(i8 *%ptr) {
+; CHECK: f1:
+; CHECK: oi 0(%r2), 1
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %or = or i8 %val, -255
+  store i8 %or, i8 *%ptr
+  ret void
+}
+
+; Check the highest useful constant, expressed as a signed integer.
+define void @f2(i8 *%ptr) {
+; CHECK: f2:
+; CHECK: oi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %or = or i8 %val, -2
+  store i8 %or, i8 *%ptr
+  ret void
+}
+
+; Check the lowest useful constant, expressed as an unsigned integer.
+define void @f3(i8 *%ptr) {
+; CHECK: f3:
+; CHECK: oi 0(%r2), 1
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %or = or i8 %val, 1
+  store i8 %or, i8 *%ptr
+  ret void
+}
+
+; Check the highest useful constant, expressed as a unsigned integer.
+define void @f4(i8 *%ptr) {
+; CHECK: f4:
+; CHECK: oi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %or = or i8 %val, 254
+  store i8 %or, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the OI range.
+define void @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: oi 4095(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4095
+  %val = load i8 *%ptr
+  %or = or i8 %val, 127
+  store i8 %or, i8 *%ptr
+  ret void
+}
+
+; Check the next byte up, which should use OIY instead of OI.
+define void @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: oiy 4096(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4096
+  %val = load i8 *%ptr
+  %or = or i8 %val, 127
+  store i8 %or, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the OIY range.
+define void @f7(i8 *%src) {
+; CHECK: f7:
+; CHECK: oiy 524287(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524287
+  %val = load i8 *%ptr
+  %or = or i8 %val, 127
+  store i8 %or, i8 *%ptr
+  ret void
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f8(i8 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r2, 524288
+; CHECK: oi 0(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524288
+  %val = load i8 *%ptr
+  %or = or i8 %val, 127
+  store i8 %or, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the negative OIY range.
+define void @f9(i8 *%src) {
+; CHECK: f9:
+; CHECK: oiy -1(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -1
+  %val = load i8 *%ptr
+  %or = or i8 %val, 127
+  store i8 %or, i8 *%ptr
+  ret void
+}
+
+; Check the low end of the OIY range.
+define void @f10(i8 *%src) {
+; CHECK: f10:
+; CHECK: oiy -524288(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524288
+  %val = load i8 *%ptr
+  %or = or i8 %val, 127
+  store i8 %or, i8 *%ptr
+  ret void
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f11(i8 *%src) {
+; CHECK: f11:
+; CHECK: agfi %r2, -524289
+; CHECK: oi 0(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524289
+  %val = load i8 *%ptr
+  %or = or i8 %val, 127
+  store i8 %or, i8 *%ptr
+  ret void
+}
+
+; Check that OI does not allow an index
+define void @f12(i64 %src, i64 %index) {
+; CHECK: f12:
+; CHECK: agr %r2, %r3
+; CHECK: oi 4095(%r2), 127
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4095
+  %ptr = inttoptr i64 %add2 to i8 *
+  %val = load i8 *%ptr
+  %or = or i8 %val, 127
+  store i8 %or, i8 *%ptr
+  ret void
+}
+
+; Check that OIY does not allow an index
+define void @f13(i64 %src, i64 %index) {
+; CHECK: f13:
+; CHECK: agr %r2, %r3
+; CHECK: oiy 4096(%r2), 127
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i8 *
+  %val = load i8 *%ptr
+  %or = or i8 %val, 127
+  store i8 %or, i8 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/or-06.ll b/test/CodeGen/SystemZ/or-06.ll
new file mode 100644
index 0000000..a24a18a
--- /dev/null
+++ b/test/CodeGen/SystemZ/or-06.ll
@@ -0,0 +1,108 @@
+; Test that we can use OI for byte operations that are expressed as i32
+; or i64 operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Zero extension to 32 bits, negative constant.
+define void @f1(i8 *%ptr) {
+; CHECK: f1:
+; CHECK: oi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %or = or i32 %ext, -2
+  %trunc = trunc i32 %or to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Zero extension to 64 bits, negative constant.
+define void @f2(i8 *%ptr) {
+; CHECK: f2:
+; CHECK: oi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %or = or i64 %ext, -2
+  %trunc = trunc i64 %or to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Zero extension to 32 bits, positive constant.
+define void @f3(i8 *%ptr) {
+; CHECK: f3:
+; CHECK: oi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %or = or i32 %ext, 254
+  %trunc = trunc i32 %or to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Zero extension to 64 bits, positive constant.
+define void @f4(i8 *%ptr) {
+; CHECK: f4:
+; CHECK: oi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %or = or i64 %ext, 254
+  %trunc = trunc i64 %or to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Sign extension to 32 bits, negative constant.
+define void @f5(i8 *%ptr) {
+; CHECK: f5:
+; CHECK: oi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %or = or i32 %ext, -2
+  %trunc = trunc i32 %or to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Sign extension to 64 bits, negative constant.
+define void @f6(i8 *%ptr) {
+; CHECK: f6:
+; CHECK: oi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %or = or i64 %ext, -2
+  %trunc = trunc i64 %or to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Sign extension to 32 bits, positive constant.
+define void @f7(i8 *%ptr) {
+; CHECK: f7:
+; CHECK: oi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %or = or i32 %ext, 254
+  %trunc = trunc i32 %or to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Sign extension to 64 bits, positive constant.
+define void @f8(i8 *%ptr) {
+; CHECK: f8:
+; CHECK: oi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %or = or i64 %ext, 254
+  %trunc = trunc i64 %or to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/shift-01.ll b/test/CodeGen/SystemZ/shift-01.ll
new file mode 100644
index 0000000..e5a459a
--- /dev/null
+++ b/test/CodeGen/SystemZ/shift-01.ll
@@ -0,0 +1,114 @@
+; Test 32-bit shifts left.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the SLL range.
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: sll %r2, 1
+; CHECK: br %r14
+  %shift = shl i32 %a, 1
+  ret i32 %shift
+}
+
+; Check the high end of the defined SLL range.
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: sll %r2, 31
+; CHECK: br %r14
+  %shift = shl i32 %a, 31
+  ret i32 %shift
+}
+
+; We don't generate shifts by out-of-range values.
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK-NOT: sll %r2, 32
+; CHECK: br %r14
+  %shift = shl i32 %a, 32
+  ret i32 %shift
+}
+
+; Make sure that we don't generate negative shift amounts.
+define i32 @f4(i32 %a, i32 %amt) {
+; CHECK: f4:
+; CHECK-NOT: sll %r2, -1{{.*}}
+; CHECK: br %r14
+  %sub = sub i32 %amt, 1
+  %shift = shl i32 %a, %sub
+  ret i32 %shift
+}
+
+; Check variable shifts.
+define i32 @f5(i32 %a, i32 %amt) {
+; CHECK: f5:
+; CHECK: sll %r2, 0(%r3)
+; CHECK: br %r14
+  %shift = shl i32 %a, %amt
+  ret i32 %shift
+}
+
+; Check shift amounts that have a constant term.
+define i32 @f6(i32 %a, i32 %amt) {
+; CHECK: f6:
+; CHECK: sll %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 10
+  %shift = shl i32 %a, %add
+  ret i32 %shift
+}
+
+; ...and again with a truncated 64-bit shift amount.
+define i32 @f7(i32 %a, i64 %amt) {
+; CHECK: f7:
+; CHECK: sll %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 10
+  %trunc = trunc i64 %add to i32
+  %shift = shl i32 %a, %trunc
+  ret i32 %shift
+}
+
+; Check shift amounts that have the largest in-range constant term.  We could
+; mask the amount instead.
+define i32 @f8(i32 %a, i32 %amt) {
+; CHECK: f8:
+; CHECK: sll %r2, 4095(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 4095
+  %shift = shl i32 %a, %add
+  ret i32 %shift
+}
+
+; Check the next value up.  Again, we could mask the amount instead.
+define i32 @f9(i32 %a, i32 %amt) {
+; CHECK: f9:
+; CHECK: ahi %r3, 4096
+; CHECK: sll %r2, 0(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 4096
+  %shift = shl i32 %a, %add
+  ret i32 %shift
+}
+
+; Check that we don't try to generate "indexed" shifts.
+define i32 @f10(i32 %a, i32 %b, i32 %c) {
+; CHECK: f10:
+; CHECK: ar {{%r3, %r4|%r4, %r3}}
+; CHECK: sll %r2, 0({{%r[34]}})
+; CHECK: br %r14
+  %add = add i32 %b, %c
+  %shift = shl i32 %a, %add
+  ret i32 %shift
+}
+
+; Check that the shift amount uses an address register.  It cannot be in %r0.
+define i32 @f11(i32 %a, i32 *%ptr) {
+; CHECK: f11:
+; CHECK: l %r1, 0(%r3)
+; CHECK: sll %r2, 0(%r1)
+; CHECK: br %r14
+  %amt = load i32 *%ptr
+  %shift = shl i32 %a, %amt
+  ret i32 %shift
+}
diff --git a/test/CodeGen/SystemZ/shift-02.ll b/test/CodeGen/SystemZ/shift-02.ll
new file mode 100644
index 0000000..38093a8
--- /dev/null
+++ b/test/CodeGen/SystemZ/shift-02.ll
@@ -0,0 +1,114 @@
+; Test 32-bit logical shifts right.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the SRL range.
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: srl %r2, 1
+; CHECK: br %r14
+  %shift = lshr i32 %a, 1
+  ret i32 %shift
+}
+
+; Check the high end of the defined SRL range.
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+  %shift = lshr i32 %a, 31
+  ret i32 %shift
+}
+
+; We don't generate shifts by out-of-range values.
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK-NOT: srl %r2, 32
+; CHECK: br %r14
+  %shift = lshr i32 %a, 32
+  ret i32 %shift
+}
+
+; Make sure that we don't generate negative shift amounts.
+define i32 @f4(i32 %a, i32 %amt) {
+; CHECK: f4:
+; CHECK-NOT: srl %r2, -1{{.*}}
+; CHECK: br %r14
+  %sub = sub i32 %amt, 1
+  %shift = lshr i32 %a, %sub
+  ret i32 %shift
+}
+
+; Check variable shifts.
+define i32 @f5(i32 %a, i32 %amt) {
+; CHECK: f5:
+; CHECK: srl %r2, 0(%r3)
+; CHECK: br %r14
+  %shift = lshr i32 %a, %amt
+  ret i32 %shift
+}
+
+; Check shift amounts that have a constant term.
+define i32 @f6(i32 %a, i32 %amt) {
+; CHECK: f6:
+; CHECK: srl %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 10
+  %shift = lshr i32 %a, %add
+  ret i32 %shift
+}
+
+; ...and again with a truncated 64-bit shift amount.
+define i32 @f7(i32 %a, i64 %amt) {
+; CHECK: f7:
+; CHECK: srl %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 10
+  %trunc = trunc i64 %add to i32
+  %shift = lshr i32 %a, %trunc
+  ret i32 %shift
+}
+
+; Check shift amounts that have the largest in-range constant term.  We could
+; mask the amount instead.
+define i32 @f8(i32 %a, i32 %amt) {
+; CHECK: f8:
+; CHECK: srl %r2, 4095(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 4095
+  %shift = lshr i32 %a, %add
+  ret i32 %shift
+}
+
+; Check the next value up.  Again, we could mask the amount instead.
+define i32 @f9(i32 %a, i32 %amt) {
+; CHECK: f9:
+; CHECK: ahi %r3, 4096
+; CHECK: srl %r2, 0(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 4096
+  %shift = lshr i32 %a, %add
+  ret i32 %shift
+}
+
+; Check that we don't try to generate "indexed" shifts.
+define i32 @f10(i32 %a, i32 %b, i32 %c) {
+; CHECK: f10:
+; CHECK: ar {{%r3, %r4|%r4, %r3}}
+; CHECK: srl %r2, 0({{%r[34]}})
+; CHECK: br %r14
+  %add = add i32 %b, %c
+  %shift = lshr i32 %a, %add
+  ret i32 %shift
+}
+
+; Check that the shift amount uses an address register.  It cannot be in %r0.
+define i32 @f11(i32 %a, i32 *%ptr) {
+; CHECK: f11:
+; CHECK: l %r1, 0(%r3)
+; CHECK: srl %r2, 0(%r1)
+; CHECK: br %r14
+  %amt = load i32 *%ptr
+  %shift = lshr i32 %a, %amt
+  ret i32 %shift
+}
diff --git a/test/CodeGen/SystemZ/shift-03.ll b/test/CodeGen/SystemZ/shift-03.ll
new file mode 100644
index 0000000..ca510f3
--- /dev/null
+++ b/test/CodeGen/SystemZ/shift-03.ll
@@ -0,0 +1,114 @@
+; Test 32-bit arithmetic shifts right.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the SRA range.
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: sra %r2, 1
+; CHECK: br %r14
+  %shift = ashr i32 %a, 1
+  ret i32 %shift
+}
+
+; Check the high end of the defined SRA range.
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: sra %r2, 31
+; CHECK: br %r14
+  %shift = ashr i32 %a, 31
+  ret i32 %shift
+}
+
+; We don't generate shifts by out-of-range values.
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK-NOT: sra %r2, 32
+; CHECK: br %r14
+  %shift = ashr i32 %a, 32
+  ret i32 %shift
+}
+
+; Make sure that we don't generate negative shift amounts.
+define i32 @f4(i32 %a, i32 %amt) {
+; CHECK: f4:
+; CHECK-NOT: sra %r2, -1{{.*}}
+; CHECK: br %r14
+  %sub = sub i32 %amt, 1
+  %shift = ashr i32 %a, %sub
+  ret i32 %shift
+}
+
+; Check variable shifts.
+define i32 @f5(i32 %a, i32 %amt) {
+; CHECK: f5:
+; CHECK: sra %r2, 0(%r3)
+; CHECK: br %r14
+  %shift = ashr i32 %a, %amt
+  ret i32 %shift
+}
+
+; Check shift amounts that have a constant term.
+define i32 @f6(i32 %a, i32 %amt) {
+; CHECK: f6:
+; CHECK: sra %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 10
+  %shift = ashr i32 %a, %add
+  ret i32 %shift
+}
+
+; ...and again with a truncated 64-bit shift amount.
+define i32 @f7(i32 %a, i64 %amt) {
+; CHECK: f7:
+; CHECK: sra %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 10
+  %trunc = trunc i64 %add to i32
+  %shift = ashr i32 %a, %trunc
+  ret i32 %shift
+}
+
+; Check shift amounts that have the largest in-range constant term.  We could
+; mask the amount instead.
+define i32 @f8(i32 %a, i32 %amt) {
+; CHECK: f8:
+; CHECK: sra %r2, 4095(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 4095
+  %shift = ashr i32 %a, %add
+  ret i32 %shift
+}
+
+; Check the next value up.  Again, we could mask the amount instead.
+define i32 @f9(i32 %a, i32 %amt) {
+; CHECK: f9:
+; CHECK: ahi %r3, 4096
+; CHECK: sra %r2, 0(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 4096
+  %shift = ashr i32 %a, %add
+  ret i32 %shift
+}
+
+; Check that we don't try to generate "indexed" shifts.
+define i32 @f10(i32 %a, i32 %b, i32 %c) {
+; CHECK: f10:
+; CHECK: ar {{%r3, %r4|%r4, %r3}}
+; CHECK: sra %r2, 0({{%r[34]}})
+; CHECK: br %r14
+  %add = add i32 %b, %c
+  %shift = ashr i32 %a, %add
+  ret i32 %shift
+}
+
+; Check that the shift amount uses an address register.  It cannot be in %r0.
+define i32 @f11(i32 %a, i32 *%ptr) {
+; CHECK: f11:
+; CHECK: l %r1, 0(%r3)
+; CHECK: sra %r2, 0(%r1)
+; CHECK: br %r14
+  %amt = load i32 *%ptr
+  %shift = ashr i32 %a, %amt
+  ret i32 %shift
+}
diff --git a/test/CodeGen/SystemZ/shift-04.ll b/test/CodeGen/SystemZ/shift-04.ll
new file mode 100644
index 0000000..0146a86
--- /dev/null
+++ b/test/CodeGen/SystemZ/shift-04.ll
@@ -0,0 +1,189 @@
+; Test 32-bit rotates left.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the RLL range.
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rll %r2, %r2, 1
+; CHECK: br %r14
+  %parta = shl i32 %a, 1
+  %partb = lshr i32 %a, 31
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
+
+; Check the high end of the defined RLL range.
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: rll %r2, %r2, 31
+; CHECK: br %r14
+  %parta = shl i32 %a, 31
+  %partb = lshr i32 %a, 1
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
+
+; We don't generate shifts by out-of-range values.
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK-NOT: rll
+; CHECK: br %r14
+  %parta = shl i32 %a, 32
+  %partb = lshr i32 %a, 0
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
+
+; Check variable shifts.
+define i32 @f4(i32 %a, i32 %amt) {
+; CHECK: f4:
+; CHECK: rll %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %amtb = sub i32 32, %amt
+  %parta = shl i32 %a, %amt
+  %partb = lshr i32 %a, %amtb
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
+
+; Check shift amounts that have a constant term.
+define i32 @f5(i32 %a, i32 %amt) {
+; CHECK: f5:
+; CHECK: rll %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 10
+  %sub = sub i32 32, %add
+  %parta = shl i32 %a, %add
+  %partb = lshr i32 %a, %sub
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
+
+; ...and again with a truncated 64-bit shift amount.
+define i32 @f6(i32 %a, i64 %amt) {
+; CHECK: f6:
+; CHECK: rll %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 10
+  %addtrunc = trunc i64 %add to i32
+  %sub = sub i32 32, %addtrunc
+  %parta = shl i32 %a, %addtrunc
+  %partb = lshr i32 %a, %sub
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
+
+; ...and again with a different truncation representation.
+define i32 @f7(i32 %a, i64 %amt) {
+; CHECK: f7:
+; CHECK: rll %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 10
+  %sub = sub i64 32, %add
+  %addtrunc = trunc i64 %add to i32
+  %subtrunc = trunc i64 %sub to i32
+  %parta = shl i32 %a, %addtrunc
+  %partb = lshr i32 %a, %subtrunc
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
+
+; Check shift amounts that have the largest in-range constant term.  We could
+; mask the amount instead.
+define i32 @f8(i32 %a, i32 %amt) {
+; CHECK: f8:
+; CHECK: rll %r2, %r2, 524287(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 524287
+  %sub = sub i32 32, %add
+  %parta = shl i32 %a, %add
+  %partb = lshr i32 %a, %sub
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
+
+; Check the next value up, which without masking must use a separate
+; addition.
+define i32 @f9(i32 %a, i32 %amt) {
+; CHECK: f9:
+; CHECK: afi %r3, 524288
+; CHECK: rll %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 524288
+  %sub = sub i32 32, %add
+  %parta = shl i32 %a, %add
+  %partb = lshr i32 %a, %sub
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
+
+; Check cases where 1 is subtracted from the shift amount.
+define i32 @f10(i32 %a, i32 %amt) {
+; CHECK: f10:
+; CHECK: rll %r2, %r2, -1(%r3)
+; CHECK: br %r14
+  %suba = sub i32 %amt, 1
+  %subb = sub i32 32, %suba
+  %parta = shl i32 %a, %suba
+  %partb = lshr i32 %a, %subb
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
+
+; Check the lowest value that can be subtracted from the shift amount.
+; Again, we could mask the shift amount instead.
+define i32 @f11(i32 %a, i32 %amt) {
+; CHECK: f11:
+; CHECK: rll %r2, %r2, -524288(%r3)
+; CHECK: br %r14
+  %suba = sub i32 %amt, 524288
+  %subb = sub i32 32, %suba
+  %parta = shl i32 %a, %suba
+  %partb = lshr i32 %a, %subb
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
+
+; Check the next value down, which without masking must use a separate
+; addition.
+define i32 @f12(i32 %a, i32 %amt) {
+; CHECK: f12:
+; CHECK: afi %r3, -524289
+; CHECK: rll %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %suba = sub i32 %amt, 524289
+  %subb = sub i32 32, %suba
+  %parta = shl i32 %a, %suba
+  %partb = lshr i32 %a, %subb
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
+
+; Check that we don't try to generate "indexed" shifts.
+define i32 @f13(i32 %a, i32 %b, i32 %c) {
+; CHECK: f13:
+; CHECK: ar {{%r3, %r4|%r4, %r3}}
+; CHECK: rll %r2, %r2, 0({{%r[34]}})
+; CHECK: br %r14
+  %add = add i32 %b, %c
+  %sub = sub i32 32, %add
+  %parta = shl i32 %a, %add
+  %partb = lshr i32 %a, %sub
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
+
+; Check that the shift amount uses an address register.  It cannot be in %r0.
+define i32 @f14(i32 %a, i32 *%ptr) {
+; CHECK: f14:
+; CHECK: l %r1, 0(%r3)
+; CHECK: rll %r2, %r2, 0(%r1)
+; CHECK: br %r14
+  %amt = load i32 *%ptr
+  %amtb = sub i32 32, %amt
+  %parta = shl i32 %a, %amt
+  %partb = lshr i32 %a, %amtb
+  %or = or i32 %parta, %partb
+  ret i32 %or
+}
diff --git a/test/CodeGen/SystemZ/shift-05.ll b/test/CodeGen/SystemZ/shift-05.ll
new file mode 100644
index 0000000..8c0ca93
--- /dev/null
+++ b/test/CodeGen/SystemZ/shift-05.ll
@@ -0,0 +1,149 @@
+; Test 32-bit shifts left.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the SLLG range.
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: sllg %r2, %r2, 1
+; CHECK: br %r14
+  %shift = shl i64 %a, 1
+  ret i64 %shift
+}
+
+; Check the high end of the defined SLLG range.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: sllg %r2, %r2, 63
+; CHECK: br %r14
+  %shift = shl i64 %a, 63
+  ret i64 %shift
+}
+
+; We don't generate shifts by out-of-range values.
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK-NOT: sllg
+; CHECK: br %r14
+  %shift = shl i64 %a, 64
+  ret i64 %shift
+}
+
+; Check variable shifts.
+define i64 @f4(i64 %a, i64 %amt) {
+; CHECK: f4:
+; CHECK: sllg %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %shift = shl i64 %a, %amt
+  ret i64 %shift
+}
+
+; Check shift amounts that have a constant term.
+define i64 @f5(i64 %a, i64 %amt) {
+; CHECK: f5:
+; CHECK: sllg %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 10
+  %shift = shl i64 %a, %add
+  ret i64 %shift
+}
+
+; ...and again with a sign-extended 32-bit shift amount.
+define i64 @f6(i64 %a, i32 %amt) {
+; CHECK: f6:
+; CHECK: sllg %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 10
+  %addext = sext i32 %add to i64
+  %shift = shl i64 %a, %addext
+  ret i64 %shift
+}
+
+; ...and now with a zero-extended 32-bit shift amount.
+define i64 @f7(i64 %a, i32 %amt) {
+; CHECK: f7:
+; CHECK: sllg %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 10
+  %addext = zext i32 %add to i64
+  %shift = shl i64 %a, %addext
+  ret i64 %shift
+}
+
+; Check shift amounts that have the largest in-range constant term.  We could
+; mask the amount instead.
+define i64 @f8(i64 %a, i64 %amt) {
+; CHECK: f8:
+; CHECK: sllg %r2, %r2, 524287(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 524287
+  %shift = shl i64 %a, %add
+  ret i64 %shift
+}
+
+; Check the next value up, which without masking must use a separate
+; addition.
+define i64 @f9(i64 %a, i64 %amt) {
+; CHECK: f9:
+; CHECK: a{{g?}}fi %r3, 524288
+; CHECK: sllg %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 524288
+  %shift = shl i64 %a, %add
+  ret i64 %shift
+}
+
+; Check cases where 1 is subtracted from the shift amount.
+define i64 @f10(i64 %a, i64 %amt) {
+; CHECK: f10:
+; CHECK: sllg %r2, %r2, -1(%r3)
+; CHECK: br %r14
+  %sub = sub i64 %amt, 1
+  %shift = shl i64 %a, %sub
+  ret i64 %shift
+}
+
+; Check the lowest value that can be subtracted from the shift amount.
+; Again, we could mask the shift amount instead.
+define i64 @f11(i64 %a, i64 %amt) {
+; CHECK: f11:
+; CHECK: sllg %r2, %r2, -524288(%r3)
+; CHECK: br %r14
+  %sub = sub i64 %amt, 524288
+  %shift = shl i64 %a, %sub
+  ret i64 %shift
+}
+
+; Check the next value down, which without masking must use a separate
+; addition.
+define i64 @f12(i64 %a, i64 %amt) {
+; CHECK: f12:
+; CHECK: a{{g?}}fi %r3, -524289
+; CHECK: sllg %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %sub = sub i64 %amt, 524289
+  %shift = shl i64 %a, %sub
+  ret i64 %shift
+}
+
+; Check that we don't try to generate "indexed" shifts.
+define i64 @f13(i64 %a, i64 %b, i64 %c) {
+; CHECK: f13:
+; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}}
+; CHECK: sllg %r2, %r2, 0({{%r[34]}})
+; CHECK: br %r14
+  %add = add i64 %b, %c
+  %shift = shl i64 %a, %add
+  ret i64 %shift
+}
+
+; Check that the shift amount uses an address register.  It cannot be in %r0.
+define i64 @f14(i64 %a, i64 *%ptr) {
+; CHECK: f14:
+; CHECK: l %r1, 4(%r3)
+; CHECK: sllg %r2, %r2, 0(%r1)
+; CHECK: br %r14
+  %amt = load i64 *%ptr
+  %shift = shl i64 %a, %amt
+  ret i64 %shift
+}
diff --git a/test/CodeGen/SystemZ/shift-06.ll b/test/CodeGen/SystemZ/shift-06.ll
new file mode 100644
index 0000000..5f600b4
--- /dev/null
+++ b/test/CodeGen/SystemZ/shift-06.ll
@@ -0,0 +1,149 @@
+; Test 32-bit logical shifts right.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the SRLG range.
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: srlg %r2, %r2, 1
+; CHECK: br %r14
+  %shift = lshr i64 %a, 1
+  ret i64 %shift
+}
+
+; Check the high end of the defined SRLG range.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: srlg %r2, %r2, 63
+; CHECK: br %r14
+  %shift = lshr i64 %a, 63
+  ret i64 %shift
+}
+
+; We don't generate shifts by out-of-range values.
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK-NOT: srlg
+; CHECK: br %r14
+  %shift = lshr i64 %a, 64
+  ret i64 %shift
+}
+
+; Check variable shifts.
+define i64 @f4(i64 %a, i64 %amt) {
+; CHECK: f4:
+; CHECK: srlg %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %shift = lshr i64 %a, %amt
+  ret i64 %shift
+}
+
+; Check shift amounts that have a constant term.
+define i64 @f5(i64 %a, i64 %amt) {
+; CHECK: f5:
+; CHECK: srlg %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 10
+  %shift = lshr i64 %a, %add
+  ret i64 %shift
+}
+
+; ...and again with a sign-extended 32-bit shift amount.
+define i64 @f6(i64 %a, i32 %amt) {
+; CHECK: f6:
+; CHECK: srlg %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 10
+  %addext = sext i32 %add to i64
+  %shift = lshr i64 %a, %addext
+  ret i64 %shift
+}
+
+; ...and now with a zero-extended 32-bit shift amount.
+define i64 @f7(i64 %a, i32 %amt) {
+; CHECK: f7:
+; CHECK: srlg %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 10
+  %addext = zext i32 %add to i64
+  %shift = lshr i64 %a, %addext
+  ret i64 %shift
+}
+
+; Check shift amounts that have the largest in-range constant term.  We could
+; mask the amount instead.
+define i64 @f8(i64 %a, i64 %amt) {
+; CHECK: f8:
+; CHECK: srlg %r2, %r2, 524287(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 524287
+  %shift = lshr i64 %a, %add
+  ret i64 %shift
+}
+
+; Check the next value up, which without masking must use a separate
+; addition.
+define i64 @f9(i64 %a, i64 %amt) {
+; CHECK: f9:
+; CHECK: a{{g?}}fi %r3, 524288
+; CHECK: srlg %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 524288
+  %shift = lshr i64 %a, %add
+  ret i64 %shift
+}
+
+; Check cases where 1 is subtracted from the shift amount.
+define i64 @f10(i64 %a, i64 %amt) {
+; CHECK: f10:
+; CHECK: srlg %r2, %r2, -1(%r3)
+; CHECK: br %r14
+  %sub = sub i64 %amt, 1
+  %shift = lshr i64 %a, %sub
+  ret i64 %shift
+}
+
+; Check the lowest value that can be subtracted from the shift amount.
+; Again, we could mask the shift amount instead.
+define i64 @f11(i64 %a, i64 %amt) {
+; CHECK: f11:
+; CHECK: srlg %r2, %r2, -524288(%r3)
+; CHECK: br %r14
+  %sub = sub i64 %amt, 524288
+  %shift = lshr i64 %a, %sub
+  ret i64 %shift
+}
+
+; Check the next value down, which without masking must use a separate
+; addition.
+define i64 @f12(i64 %a, i64 %amt) {
+; CHECK: f12:
+; CHECK: a{{g?}}fi %r3, -524289
+; CHECK: srlg %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %sub = sub i64 %amt, 524289
+  %shift = lshr i64 %a, %sub
+  ret i64 %shift
+}
+
+; Check that we don't try to generate "indexed" shifts.
+define i64 @f13(i64 %a, i64 %b, i64 %c) {
+; CHECK: f13:
+; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}}
+; CHECK: srlg %r2, %r2, 0({{%r[34]}})
+; CHECK: br %r14
+  %add = add i64 %b, %c
+  %shift = lshr i64 %a, %add
+  ret i64 %shift
+}
+
+; Check that the shift amount uses an address register.  It cannot be in %r0.
+define i64 @f14(i64 %a, i64 *%ptr) {
+; CHECK: f14:
+; CHECK: l %r1, 4(%r3)
+; CHECK: srlg %r2, %r2, 0(%r1)
+; CHECK: br %r14
+  %amt = load i64 *%ptr
+  %shift = lshr i64 %a, %amt
+  ret i64 %shift
+}
diff --git a/test/CodeGen/SystemZ/shift-07.ll b/test/CodeGen/SystemZ/shift-07.ll
new file mode 100644
index 0000000..ef583e8
--- /dev/null
+++ b/test/CodeGen/SystemZ/shift-07.ll
@@ -0,0 +1,149 @@
+; Test 32-bit arithmetic shifts right.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the SRAG range.
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: srag %r2, %r2, 1
+; CHECK: br %r14
+  %shift = ashr i64 %a, 1
+  ret i64 %shift
+}
+
+; Check the high end of the defined SRAG range.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: srag %r2, %r2, 63
+; CHECK: br %r14
+  %shift = ashr i64 %a, 63
+  ret i64 %shift
+}
+
+; We don't generate shifts by out-of-range values.
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK-NOT: srag
+; CHECK: br %r14
+  %shift = ashr i64 %a, 64
+  ret i64 %shift
+}
+
+; Check variable shifts.
+define i64 @f4(i64 %a, i64 %amt) {
+; CHECK: f4:
+; CHECK: srag %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %shift = ashr i64 %a, %amt
+  ret i64 %shift
+}
+
+; Check shift amounts that have a constant term.
+define i64 @f5(i64 %a, i64 %amt) {
+; CHECK: f5:
+; CHECK: srag %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 10
+  %shift = ashr i64 %a, %add
+  ret i64 %shift
+}
+
+; ...and again with a sign-extended 32-bit shift amount.
+define i64 @f6(i64 %a, i32 %amt) {
+; CHECK: f6:
+; CHECK: srag %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 10
+  %addext = sext i32 %add to i64
+  %shift = ashr i64 %a, %addext
+  ret i64 %shift
+}
+
+; ...and now with a zero-extended 32-bit shift amount.
+define i64 @f7(i64 %a, i32 %amt) {
+; CHECK: f7:
+; CHECK: srag %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 10
+  %addext = zext i32 %add to i64
+  %shift = ashr i64 %a, %addext
+  ret i64 %shift
+}
+
+; Check shift amounts that have the largest in-range constant term.  We could
+; mask the amount instead.
+define i64 @f8(i64 %a, i64 %amt) {
+; CHECK: f8:
+; CHECK: srag %r2, %r2, 524287(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 524287
+  %shift = ashr i64 %a, %add
+  ret i64 %shift
+}
+
+; Check the next value up, which without masking must use a separate
+; addition.
+define i64 @f9(i64 %a, i64 %amt) {
+; CHECK: f9:
+; CHECK: a{{g?}}fi %r3, 524288
+; CHECK: srag %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 524288
+  %shift = ashr i64 %a, %add
+  ret i64 %shift
+}
+
+; Check cases where 1 is subtracted from the shift amount.
+define i64 @f10(i64 %a, i64 %amt) {
+; CHECK: f10:
+; CHECK: srag %r2, %r2, -1(%r3)
+; CHECK: br %r14
+  %sub = sub i64 %amt, 1
+  %shift = ashr i64 %a, %sub
+  ret i64 %shift
+}
+
+; Check the lowest value that can be subtracted from the shift amount.
+; Again, we could mask the shift amount instead.
+define i64 @f11(i64 %a, i64 %amt) {
+; CHECK: f11:
+; CHECK: srag %r2, %r2, -524288(%r3)
+; CHECK: br %r14
+  %sub = sub i64 %amt, 524288
+  %shift = ashr i64 %a, %sub
+  ret i64 %shift
+}
+
+; Check the next value down, which without masking must use a separate
+; addition.
+define i64 @f12(i64 %a, i64 %amt) {
+; CHECK: f12:
+; CHECK: a{{g?}}fi %r3, -524289
+; CHECK: srag %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %sub = sub i64 %amt, 524289
+  %shift = ashr i64 %a, %sub
+  ret i64 %shift
+}
+
+; Check that we don't try to generate "indexed" shifts.
+define i64 @f13(i64 %a, i64 %b, i64 %c) {
+; CHECK: f13:
+; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}}
+; CHECK: srag %r2, %r2, 0({{%r[34]}})
+; CHECK: br %r14
+  %add = add i64 %b, %c
+  %shift = ashr i64 %a, %add
+  ret i64 %shift
+}
+
+; Check that the shift amount uses an address register.  It cannot be in %r0.
+define i64 @f14(i64 %a, i64 *%ptr) {
+; CHECK: f14:
+; CHECK: l %r1, 4(%r3)
+; CHECK: srag %r2, %r2, 0(%r1)
+; CHECK: br %r14
+  %amt = load i64 *%ptr
+  %shift = ashr i64 %a, %amt
+  ret i64 %shift
+}
diff --git a/test/CodeGen/SystemZ/shift-08.ll b/test/CodeGen/SystemZ/shift-08.ll
new file mode 100644
index 0000000..0688a06
--- /dev/null
+++ b/test/CodeGen/SystemZ/shift-08.ll
@@ -0,0 +1,190 @@
+; Test 32-bit rotates left.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the low end of the RLLG range.
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: rllg %r2, %r2, 1
+; CHECK: br %r14
+  %parta = shl i64 %a, 1
+  %partb = lshr i64 %a, 63
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
+
+; Check the high end of the defined RLLG range.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: rllg %r2, %r2, 63
+; CHECK: br %r14
+  %parta = shl i64 %a, 63
+  %partb = lshr i64 %a, 1
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
+
+; We don't generate shifts by out-of-range values.
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK-NOT: rllg
+; CHECK: br %r14
+  %parta = shl i64 %a, 64
+  %partb = lshr i64 %a, 0
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
+
+; Check variable shifts.
+define i64 @f4(i64 %a, i64 %amt) {
+; CHECK: f4:
+; CHECK: rllg %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %amtb = sub i64 64, %amt
+  %parta = shl i64 %a, %amt
+  %partb = lshr i64 %a, %amtb
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
+
+; Check shift amounts that have a constant term.
+define i64 @f5(i64 %a, i64 %amt) {
+; CHECK: f5:
+; CHECK: rllg %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 10
+  %sub = sub i64 64, %add
+  %parta = shl i64 %a, %add
+  %partb = lshr i64 %a, %sub
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
+
+; ...and again with a sign-extended 32-bit shift amount.
+define i64 @f6(i64 %a, i32 %amt) {
+; CHECK: f6:
+; CHECK: rllg %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 10
+  %sub = sub i32 64, %add
+  %addext = sext i32 %add to i64
+  %subext = sext i32 %sub to i64
+  %parta = shl i64 %a, %addext
+  %partb = lshr i64 %a, %subext
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
+
+; ...and now with a zero-extended 32-bit shift amount.
+define i64 @f7(i64 %a, i32 %amt) {
+; CHECK: f7:
+; CHECK: rllg %r2, %r2, 10(%r3)
+; CHECK: br %r14
+  %add = add i32 %amt, 10
+  %sub = sub i32 64, %add
+  %addext = zext i32 %add to i64
+  %subext = zext i32 %sub to i64
+  %parta = shl i64 %a, %addext
+  %partb = lshr i64 %a, %subext
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
+
+; Check shift amounts that have the largest in-range constant term.  We could
+; mask the amount instead.
+define i64 @f8(i64 %a, i64 %amt) {
+; CHECK: f8:
+; CHECK: rllg %r2, %r2, 524287(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 524287
+  %sub = sub i64 64, %add
+  %parta = shl i64 %a, %add
+  %partb = lshr i64 %a, %sub
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
+
+; Check the next value up, which without masking must use a separate
+; addition.
+define i64 @f9(i64 %a, i64 %amt) {
+; CHECK: f9:
+; CHECK: a{{g?}}fi %r3, 524288
+; CHECK: rllg %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %add = add i64 %amt, 524288
+  %sub = sub i64 64, %add
+  %parta = shl i64 %a, %add
+  %partb = lshr i64 %a, %sub
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
+
+; Check cases where 1 is subtracted from the shift amount.
+define i64 @f10(i64 %a, i64 %amt) {
+; CHECK: f10:
+; CHECK: rllg %r2, %r2, -1(%r3)
+; CHECK: br %r14
+  %suba = sub i64 %amt, 1
+  %subb = sub i64 64, %suba
+  %parta = shl i64 %a, %suba
+  %partb = lshr i64 %a, %subb
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
+
+; Check the lowest value that can be subtracted from the shift amount.
+; Again, we could mask the shift amount instead.
+define i64 @f11(i64 %a, i64 %amt) {
+; CHECK: f11:
+; CHECK: rllg %r2, %r2, -524288(%r3)
+; CHECK: br %r14
+  %suba = sub i64 %amt, 524288
+  %subb = sub i64 64, %suba
+  %parta = shl i64 %a, %suba
+  %partb = lshr i64 %a, %subb
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
+
+; Check the next value down, which without masking must use a separate
+; addition.
+define i64 @f12(i64 %a, i64 %amt) {
+; CHECK: f12:
+; CHECK: a{{g?}}fi %r3, -524289
+; CHECK: rllg %r2, %r2, 0(%r3)
+; CHECK: br %r14
+  %suba = sub i64 %amt, 524289
+  %subb = sub i64 64, %suba
+  %parta = shl i64 %a, %suba
+  %partb = lshr i64 %a, %subb
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
+
+; Check that we don't try to generate "indexed" shifts.
+define i64 @f13(i64 %a, i64 %b, i64 %c) {
+; CHECK: f13:
+; CHECK: a{{g?}}r {{%r3, %r4|%r4, %r3}}
+; CHECK: rllg %r2, %r2, 0({{%r[34]}})
+; CHECK: br %r14
+  %add = add i64 %b, %c
+  %sub = sub i64 64, %add
+  %parta = shl i64 %a, %add
+  %partb = lshr i64 %a, %sub
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
+
+; Check that the shift amount uses an address register.  It cannot be in %r0.
+define i64 @f14(i64 %a, i64 *%ptr) {
+; CHECK: f14:
+; CHECK: l %r1, 4(%r3)
+; CHECK: rllg %r2, %r2, 0(%r1)
+; CHECK: br %r14
+  %amt = load i64 *%ptr
+  %amtb = sub i64 64, %amt
+  %parta = shl i64 %a, %amt
+  %partb = lshr i64 %a, %amtb
+  %or = or i64 %parta, %partb
+  ret i64 %or
+}
diff --git a/test/CodeGen/SystemZ/tls-01.ll b/test/CodeGen/SystemZ/tls-01.ll
new file mode 100644
index 0000000..49037ad5
--- /dev/null
+++ b/test/CodeGen/SystemZ/tls-01.ll
@@ -0,0 +1,22 @@
+; Test initial-exec TLS accesses.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP
+
+@x = thread_local global i32 0
+
+; The offset must be loaded from the constant pool.  It doesn't really
+; matter whether we use LARL/AG or LGRL/AGR for the last part.
+define i32 *@foo() {
+; CHECK-CP: .LCP{{.*}}:
+; CHECK-CP: .quad x@NTPOFF
+;
+; CHECK-MAIN: foo:
+; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0
+; CHECK-MAIN: sllg %r2, [[HIGH]], 32
+; CHECK-MAIN: ear %r2, %a1
+; CHECK-MAIN: larl %r1, .LCP{{.*}}
+; CHECK-MAIN: ag %r2, 0(%r1)
+; CHECK-MAIN: br %r14
+  ret i32 *@x
+}
diff --git a/test/CodeGen/SystemZ/xor-01.ll b/test/CodeGen/SystemZ/xor-01.ll
new file mode 100644
index 0000000..30bdbe7
--- /dev/null
+++ b/test/CodeGen/SystemZ/xor-01.ll
@@ -0,0 +1,129 @@
+; Test 32-bit XORs in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check XR.
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: xr %r2, %r3
+; CHECK: br %r14
+  %xor = xor i32 %a, %b
+  ret i32 %xor
+}
+
+; Check the low end of the X range.
+define i32 @f2(i32 %a, i32 *%src) {
+; CHECK: f2:
+; CHECK: x %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i32 *%src
+  %xor = xor i32 %a, %b
+  ret i32 %xor
+}
+
+; Check the high end of the aligned X range.
+define i32 @f3(i32 %a, i32 *%src) {
+; CHECK: f3:
+; CHECK: x %r2, 4092(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1023
+  %b = load i32 *%ptr
+  %xor = xor i32 %a, %b
+  ret i32 %xor
+}
+
+; Check the next word up, which should use XY instead of X.
+define i32 @f4(i32 %a, i32 *%src) {
+; CHECK: f4:
+; CHECK: xy %r2, 4096(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 1024
+  %b = load i32 *%ptr
+  %xor = xor i32 %a, %b
+  ret i32 %xor
+}
+
+; Check the high end of the aligned XY range.
+define i32 @f5(i32 %a, i32 *%src) {
+; CHECK: f5:
+; CHECK: xy %r2, 524284(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131071
+  %b = load i32 *%ptr
+  %xor = xor i32 %a, %b
+  ret i32 %xor
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f6(i32 %a, i32 *%src) {
+; CHECK: f6:
+; CHECK: agfi %r3, 524288
+; CHECK: x %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 131072
+  %b = load i32 *%ptr
+  %xor = xor i32 %a, %b
+  ret i32 %xor
+}
+
+; Check the high end of the negative aligned XY range.
+define i32 @f7(i32 %a, i32 *%src) {
+; CHECK: f7:
+; CHECK: xy %r2, -4(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -1
+  %b = load i32 *%ptr
+  %xor = xor i32 %a, %b
+  ret i32 %xor
+}
+
+; Check the low end of the XY range.
+define i32 @f8(i32 %a, i32 *%src) {
+; CHECK: f8:
+; CHECK: xy %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131072
+  %b = load i32 *%ptr
+  %xor = xor i32 %a, %b
+  ret i32 %xor
+}
+
+; Check the next word down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i32 @f9(i32 %a, i32 *%src) {
+; CHECK: f9:
+; CHECK: agfi %r3, -524292
+; CHECK: x %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i32 *%src, i64 -131073
+  %b = load i32 *%ptr
+  %xor = xor i32 %a, %b
+  ret i32 %xor
+}
+
+; Check that X allows an index.
+define i32 @f10(i32 %a, i64 %src, i64 %index) {
+; CHECK: f10:
+; CHECK: x %r2, 4092({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4092
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %xor = xor i32 %a, %b
+  ret i32 %xor
+}
+
+; Check that XY allows an index.
+define i32 @f11(i32 %a, i64 %src, i64 %index) {
+; CHECK: f11:
+; CHECK: xy %r2, 4096({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i32 *
+  %b = load i32 *%ptr
+  %xor = xor i32 %a, %b
+  ret i32 %xor
+}
diff --git a/test/CodeGen/SystemZ/xor-02.ll b/test/CodeGen/SystemZ/xor-02.ll
new file mode 100644
index 0000000..c2b52b9
--- /dev/null
+++ b/test/CodeGen/SystemZ/xor-02.ll
@@ -0,0 +1,40 @@
+; Test 32-bit XORs in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the lowest useful XILF value.
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: xilf %r2, 1
+; CHECK: br %r14
+  %xor = xor i32 %a, 1
+  ret i32 %xor
+}
+
+; Check the high end of the signed range.
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: xilf %r2, 2147483647
+; CHECK: br %r14
+  %xor = xor i32 %a, 2147483647
+  ret i32 %xor
+}
+
+; Check the low end of the signed range, which should be treated
+; as a positive value.
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: xilf %r2, 2147483648
+; CHECK: br %r14
+  %xor = xor i32 %a, -2147483648
+  ret i32 %xor
+}
+
+; Check the high end of the XILF range.
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: xilf %r2, 4294967295
+; CHECK: br %r14
+  %xor = xor i32 %a, 4294967295
+  ret i32 %xor
+}
diff --git a/test/CodeGen/SystemZ/xor-03.ll b/test/CodeGen/SystemZ/xor-03.ll
new file mode 100644
index 0000000..a4851b3
--- /dev/null
+++ b/test/CodeGen/SystemZ/xor-03.ll
@@ -0,0 +1,94 @@
+; Test 64-bit XORs in which the second operand is variable.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check XGR.
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: xgr %r2, %r3
+; CHECK: br %r14
+  %xor = xor i64 %a, %b
+  ret i64 %xor
+}
+
+; Check XG with no displacement.
+define i64 @f2(i64 %a, i64 *%src) {
+; CHECK: f2:
+; CHECK: xg %r2, 0(%r3)
+; CHECK: br %r14
+  %b = load i64 *%src
+  %xor = xor i64 %a, %b
+  ret i64 %xor
+}
+
+; Check the high end of the aligned XG range.
+define i64 @f3(i64 %a, i64 *%src) {
+; CHECK: f3:
+; CHECK: xg %r2, 524280(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65535
+  %b = load i64 *%ptr
+  %xor = xor i64 %a, %b
+  ret i64 %xor
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f4(i64 %a, i64 *%src) {
+; CHECK: f4:
+; CHECK: agfi %r3, 524288
+; CHECK: xg %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 65536
+  %b = load i64 *%ptr
+  %xor = xor i64 %a, %b
+  ret i64 %xor
+}
+
+; Check the high end of the negative aligned XG range.
+define i64 @f5(i64 %a, i64 *%src) {
+; CHECK: f5:
+; CHECK: xg %r2, -8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -1
+  %b = load i64 *%ptr
+  %xor = xor i64 %a, %b
+  ret i64 %xor
+}
+
+; Check the low end of the XG range.
+define i64 @f6(i64 %a, i64 *%src) {
+; CHECK: f6:
+; CHECK: xg %r2, -524288(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65536
+  %b = load i64 *%ptr
+  %xor = xor i64 %a, %b
+  ret i64 %xor
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f7(i64 %a, i64 *%src) {
+; CHECK: f7:
+; CHECK: agfi %r3, -524296
+; CHECK: xg %r2, 0(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr i64 *%src, i64 -65537
+  %b = load i64 *%ptr
+  %xor = xor i64 %a, %b
+  ret i64 %xor
+}
+
+; Check that XG allows an index.
+define i64 @f8(i64 %a, i64 %src, i64 %index) {
+; CHECK: f8:
+; CHECK: xg %r2, 524280({{%r4,%r3|%r3,%r4}})
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 524280
+  %ptr = inttoptr i64 %add2 to i64 *
+  %b = load i64 *%ptr
+  %xor = xor i64 %a, %b
+  ret i64 %xor
+}
diff --git a/test/CodeGen/SystemZ/xor-04.ll b/test/CodeGen/SystemZ/xor-04.ll
new file mode 100644
index 0000000..cc141d3
--- /dev/null
+++ b/test/CodeGen/SystemZ/xor-04.ll
@@ -0,0 +1,69 @@
+; Test 64-bit XORs in which the second operand is constant.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the lowest useful XILF value.
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: xilf %r2, 1
+; CHECK: br %r14
+  %xor = xor i64 %a, 1
+  ret i64 %xor
+}
+
+; Check the high end of the XILF range.
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: xilf %r2, 4294967295
+; CHECK: br %r14
+  %xor = xor i64 %a, 4294967295
+  ret i64 %xor
+}
+
+; Check the lowest useful XIHF value, which is one up from the above.
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: xihf %r2, 1
+; CHECK: br %r14
+  %xor = xor i64 %a, 4294967296
+  ret i64 %xor
+}
+
+; Check the next value up again, which needs a combination of XIHF and XILF.
+define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: xihf %r2, 1
+; CHECK: xilf %r2, 4294967295
+; CHECK: br %r14
+  %xor = xor i64 %a, 8589934591
+  ret i64 %xor
+}
+
+; Check the high end of the XIHF range.
+define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: xihf %r2, 4294967295
+; CHECK: br %r14
+  %xor = xor i64 %a, -4294967296
+  ret i64 %xor
+}
+
+; Check the next value up, which again must use XIHF and XILF.
+define i64 @f6(i64 %a) {
+; CHECK: f6:
+; CHECK: xihf %r2, 4294967295
+; CHECK: xilf %r2, 1
+; CHECK: br %r14
+  %xor = xor i64 %a, -4294967295
+  ret i64 %xor
+}
+
+; Check full bitwise negation
+define i64 @f7(i64 %a) {
+; CHECK: f7:
+; CHECK: xihf %r2, 4294967295
+; CHECK: xilf %r2, 4294967295
+; CHECK: br %r14
+  %xor = xor i64 %a, -1
+  ret i64 %xor
+}
diff --git a/test/CodeGen/SystemZ/xor-05.ll b/test/CodeGen/SystemZ/xor-05.ll
new file mode 100644
index 0000000..9ef0d20
--- /dev/null
+++ b/test/CodeGen/SystemZ/xor-05.ll
@@ -0,0 +1,165 @@
+; Test XORs of a constant into a byte of memory.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Check the lowest useful constant, expressed as a signed integer.
+define void @f1(i8 *%ptr) {
+; CHECK: f1:
+; CHECK: xi 0(%r2), 1
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, -255
+  store i8 %xor, i8 *%ptr
+  ret void
+}
+
+; Check the highest useful constant, expressed as a signed integer.
+define void @f2(i8 *%ptr) {
+; CHECK: f2:
+; CHECK: xi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, -2
+  store i8 %xor, i8 *%ptr
+  ret void
+}
+
+; Check the lowest useful constant, expressed as an unsigned integer.
+define void @f3(i8 *%ptr) {
+; CHECK: f3:
+; CHECK: xi 0(%r2), 1
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, 1
+  store i8 %xor, i8 *%ptr
+  ret void
+}
+
+; Check the highest useful constant, expressed as a unsigned integer.
+define void @f4(i8 *%ptr) {
+; CHECK: f4:
+; CHECK: xi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, 254
+  store i8 %xor, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the XI range.
+define void @f5(i8 *%src) {
+; CHECK: f5:
+; CHECK: xi 4095(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4095
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, 127
+  store i8 %xor, i8 *%ptr
+  ret void
+}
+
+; Check the next byte up, which should use XIY instead of XI.
+define void @f6(i8 *%src) {
+; CHECK: f6:
+; CHECK: xiy 4096(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 4096
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, 127
+  store i8 %xor, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the XIY range.
+define void @f7(i8 *%src) {
+; CHECK: f7:
+; CHECK: xiy 524287(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524287
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, 127
+  store i8 %xor, i8 *%ptr
+  ret void
+}
+
+; Check the next byte up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f8(i8 *%src) {
+; CHECK: f8:
+; CHECK: agfi %r2, 524288
+; CHECK: xi 0(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 524288
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, 127
+  store i8 %xor, i8 *%ptr
+  ret void
+}
+
+; Check the high end of the negative XIY range.
+define void @f9(i8 *%src) {
+; CHECK: f9:
+; CHECK: xiy -1(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -1
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, 127
+  store i8 %xor, i8 *%ptr
+  ret void
+}
+
+; Check the low end of the XIY range.
+define void @f10(i8 *%src) {
+; CHECK: f10:
+; CHECK: xiy -524288(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524288
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, 127
+  store i8 %xor, i8 *%ptr
+  ret void
+}
+
+; Check the next byte down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f11(i8 *%src) {
+; CHECK: f11:
+; CHECK: agfi %r2, -524289
+; CHECK: xi 0(%r2), 127
+; CHECK: br %r14
+  %ptr = getelementptr i8 *%src, i64 -524289
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, 127
+  store i8 %xor, i8 *%ptr
+  ret void
+}
+
+; Check that XI does not allow an index
+define void @f12(i64 %src, i64 %index) {
+; CHECK: f12:
+; CHECK: agr %r2, %r3
+; CHECK: xi 4095(%r2), 127
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4095
+  %ptr = inttoptr i64 %add2 to i8 *
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, 127
+  store i8 %xor, i8 *%ptr
+  ret void
+}
+
+; Check that XIY does not allow an index
+define void @f13(i64 %src, i64 %index) {
+; CHECK: f13:
+; CHECK: agr %r2, %r3
+; CHECK: xiy 4096(%r2), 127
+; CHECK: br %r14
+  %add1 = add i64 %src, %index
+  %add2 = add i64 %add1, 4096
+  %ptr = inttoptr i64 %add2 to i8 *
+  %val = load i8 *%ptr
+  %xor = xor i8 %val, 127
+  store i8 %xor, i8 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/SystemZ/xor-06.ll b/test/CodeGen/SystemZ/xor-06.ll
new file mode 100644
index 0000000..0ffff47
--- /dev/null
+++ b/test/CodeGen/SystemZ/xor-06.ll
@@ -0,0 +1,108 @@
+; Test that we can use XI for byte operations that are expressed as i32
+; or i64 operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Zero extension to 32 bits, negative constant.
+define void @f1(i8 *%ptr) {
+; CHECK: f1:
+; CHECK: xi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %xor = xor i32 %ext, -2
+  %trunc = trunc i32 %xor to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Zero extension to 64 bits, negative constant.
+define void @f2(i8 *%ptr) {
+; CHECK: f2:
+; CHECK: xi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %xor = xor i64 %ext, -2
+  %trunc = trunc i64 %xor to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Zero extension to 32 bits, positive constant.
+define void @f3(i8 *%ptr) {
+; CHECK: f3:
+; CHECK: xi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i32
+  %xor = xor i32 %ext, 254
+  %trunc = trunc i32 %xor to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Zero extension to 64 bits, positive constant.
+define void @f4(i8 *%ptr) {
+; CHECK: f4:
+; CHECK: xi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = zext i8 %val to i64
+  %xor = xor i64 %ext, 254
+  %trunc = trunc i64 %xor to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Sign extension to 32 bits, negative constant.
+define void @f5(i8 *%ptr) {
+; CHECK: f5:
+; CHECK: xi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %xor = xor i32 %ext, -2
+  %trunc = trunc i32 %xor to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Sign extension to 64 bits, negative constant.
+define void @f6(i8 *%ptr) {
+; CHECK: f6:
+; CHECK: xi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %xor = xor i64 %ext, -2
+  %trunc = trunc i64 %xor to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Sign extension to 32 bits, positive constant.
+define void @f7(i8 *%ptr) {
+; CHECK: f7:
+; CHECK: xi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i32
+  %xor = xor i32 %ext, 254
+  %trunc = trunc i32 %xor to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
+
+; Sign extension to 64 bits, positive constant.
+define void @f8(i8 *%ptr) {
+; CHECK: f8:
+; CHECK: xi 0(%r2), 254
+; CHECK: br %r14
+  %val = load i8 *%ptr
+  %ext = sext i8 %val to i64
+  %xor = xor i64 %ext, 254
+  %trunc = trunc i64 %xor to i8
+  store i8 %trunc, i8 *%ptr
+  ret void
+}
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index f8c438c..680976e 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -20,8 +20,8 @@ define void @test2() {
 
 define i32 @test3() {
 ; CHECK: test3:
-; CHECK: ldr.n r2, LCPI
-; CHECK: add sp, r2
+; CHECK: ldr.n r1, LCPI
+; CHECK: add sp, r1
 ; CHECK: ldr.n r1, LCPI
 ; CHECK: add r1, sp
 ; CHECK: subs r4, r7, #4
diff --git a/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
index 502b138..e905cb9 100644
--- a/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
+++ b/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
@@ -18,13 +18,13 @@ define hidden void @func(i8* %Data) nounwind ssp {
   tail call void @def(%"myclass"* %2) nounwind
   %3 = getelementptr inbounds i8* %Data, i32 8
   %4 = bitcast i8* %3 to i8**
-  %5 = load i8** %4, align 4, !tbaa !0
+  %5 = load i8** %4, align 4
   tail call void @ghi(i8* %5) nounwind
   %6 = bitcast i8* %Data to void (i8*)**
-  %7 = load void (i8*)** %6, align 4, !tbaa !0
+  %7 = load void (i8*)** %6, align 4
   %8 = getelementptr inbounds i8* %Data, i32 4
   %9 = bitcast i8* %8 to i8**
-  %10 = load i8** %9, align 4, !tbaa !0
+  %10 = load i8** %9, align 4
   %11 = icmp eq i8* %Data, null
   br i1 %11, label %14, label %12
 
@@ -47,7 +47,3 @@ declare void @abc(%"myclass"*)
 declare void @ghi(i8*)
 
 declare %"myclass"* @jkl(%"myclass"*) nounwind
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
index 2e4cb1f..cb90bf6 100644
--- a/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
+++ b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
@@ -4,7 +4,9 @@
 ; it makes a ton of annoying overlapping live ranges.  This code should not
 ; cause spills!
 ;
-; RUN: llc < %s -march=x86 -stats 2>&1 | not grep spilled
+; RUN: llc < %s -march=x86 -stats 2>&1 | FileCheck %s
+
+; CHECK-NOT: spilled
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
index c5c74d1..c4b08a3 100644
--- a/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
+++ b/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
@@ -1,7 +1,8 @@
 ; PR850
-; RUN: llc < %s -march=x86 -x86-asm-syntax=att > %t
-; RUN: grep "movl 4(%eax),%ebp" %t
-; RUN: grep "movl 0(%eax), %ebx" %t
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att | FileCheck %s
+
+; CHECK: {{movl 4[(]%eax[)],%ebp}}
+; CHECK: {{movl 0[(]%eax[)], %ebx}}
 
 define i32 @foo(i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i) {
 	%tmp9.i.i = call i32 asm sideeffect "push %ebp\0Apush %ebx\0Amovl 4($2),%ebp\0Amovl 0($2), %ebx\0Amovl $1,%eax\0Aint  $$0x80\0Apop  %ebx\0Apop %ebp", "={ax},i,0,{cx},{dx},{si},{di}"( i32 192, i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i )		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2006-11-27-SelectLegalize.ll b/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
index ea2e6db..ba83a8d 100644
--- a/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
+++ b/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=x86 | grep test.*1
+; RUN: llc < %s -march=x86 | FileCheck %s
 ; PR1016
 
+; CHECK: {{test.*1}}
+
 define i32 @test(i32 %A, i32 %B, i32 %C) {
         %a = trunc i32 %A to i1         ; <i1> [#uses=1]
         %D = select i1 %a, i32 %B, i32 %C               ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
index 18b06dc..366f583 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -mcpu=yonah -march=x86 | \
-; RUN:   grep "cmpltsd %xmm0, %xmm0"
+; RUN: llc < %s -mcpu=yonah -march=x86 | FileCheck %s
+
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
 
+; CHECK: {{cmpltsd %xmm0, %xmm0}}
 
 define void @acoshf() {
 	%tmp19 = tail call <2 x double> asm sideeffect "pcmpeqd $0, $0 \0A\09 cmpltsd $0, $0", "=x,0,~{dirflag},~{fpsr},~{flags}"( <2 x double> zeroinitializer )		; <<2 x double>> [#uses=0]
diff --git a/test/CodeGen/X86/2007-04-24-Huge-Stack.ll b/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
index 7528129..648718c 100644
--- a/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
+++ b/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=x86-64 | not grep 4294967112
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 ; PR1348
 
+; CHECK-NOT: 4294967112
+
 	%struct.md5_ctx = type { i32, i32, i32, i32, [2 x i32], i32, [128 x i8], [4294967288 x i8] }
 
 define i8* @md5_buffer(i8* %buffer, i64 %len, i8* %resblock) {
diff --git a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
index b27ef83..38fc5e1 100644
--- a/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
+++ b/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep punpckhwd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; CHECK-NOT: punpckhwd
 
 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
 
diff --git a/test/CodeGen/X86/2007-06-15-IntToMMX.ll b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
index 660d4fe..5612d9e 100644
--- a/test/CodeGen/X86/2007-06-15-IntToMMX.ll
+++ b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep paddusw
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | FileCheck %s
+
+; CHECK: paddusw
+
 @R = external global x86_mmx          ; <x86_mmx*> [#uses=1]
 
 define void @foo(<1 x i64> %A, <1 x i64> %B) {
diff --git a/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
index 62624a7..4f7ae32 100644
--- a/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
+++ b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86 | not grep movl
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK-NOT: movl
 
 define zeroext i8 @t(i8 zeroext  %x, i8 zeroext  %y)   {
 	%tmp2 = add i8 %x, 2
diff --git a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
index d3120f3..82052b1 100644
--- a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep inc | not grep PTR
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
+
+; CHECK: inc
+; CHECK-NOT: PTR
+; CHECK: {{$}}
 
 define signext   i16 @t(i32* %bitptr, i32* %source, i8** %byteptr, i32 %scale, i32 %round) {
 entry:
diff --git a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
index 56a109a..c467024 100644
--- a/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
+++ b/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s -relocation-model=static | grep "foo str$"
+; RUN: llc < %s -relocation-model=static | FileCheck %s
 ; PR1761
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-pc-linux"
 @str = internal constant [12 x i8] c"init/main.c\00"		; <[12 x i8]*> [#uses=1]
 
+; CHECK: {{foo str$}}
+
 define i32 @unknown_bootoption() {
 entry:
 	tail call void asm sideeffect "foo ${0:c}\0A", "i,~{dirflag},~{fpsr},~{flags}"( i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) )
diff --git a/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll b/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
index 6997d53..e8c957b 100644
--- a/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
+++ b/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -o - | grep sinl
+; RUN: llc < %s -o - | FileCheck %s
+
+; CHECK: sinl
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
index a52b365..b06b249 100644
--- a/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
+++ b/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s | grep "a:" | not grep ax
-; RUN: llc < %s | grep "b:" | not grep ax
+; RUN: llc < %s | FileCheck %s
 ; PR2078
 ; The clobber list says that "ax" is clobbered.  Make sure that eax isn't 
 ; allocated to the input/output register.
@@ -15,6 +14,10 @@ entry:
 	ret void
 }
 
+; CHECK: a:
+; CHECK-NOT: ax
+; CHECK: {{$}}
+
 define void @test2(i16* %block, i8* %pixels, i32 %line_size) nounwind  {
 entry:
 	%tmp1 = getelementptr i16* %block, i32 64		; <i16*> [#uses=1]
@@ -22,3 +25,6 @@ entry:
 	ret void
 }
 
+; CHECK: b:
+; CHECK-NOT: ax
+; CHECK: {{$}}
diff --git a/test/CodeGen/X86/2008-11-06-testb.ll b/test/CodeGen/X86/2008-11-06-testb.ll
index f8f317c..e7caa7a 100644
--- a/test/CodeGen/X86/2008-11-06-testb.ll
+++ b/test/CodeGen/X86/2008-11-06-testb.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | grep testb
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; CHECK: testb
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
index 9ea34e2..5bec179 100644
--- a/test/CodeGen/X86/2009-02-25-CommuteBug.ll
+++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
@@ -1,7 +1,9 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | not grep commuted
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | FileCheck %s
 ; rdar://6608609
 
+; CHECK-NOT: commuted
+
 define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
 entry:
 	%tmp.i2 = bitcast <2 x double> %B to <2 x i64>		; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/2009-03-25-TestBug.ll b/test/CodeGen/X86/2009-03-25-TestBug.ll
index f40fddc..cc1d73d 100644
--- a/test/CodeGen/X86/2009-03-25-TestBug.ll
+++ b/test/CodeGen/X86/2009-03-25-TestBug.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86 -o %t
-; RUN: not grep and %t
-; RUN: not grep shr %t
+; RUN: llc < %s -march=x86 | FileCheck %s
 ; rdar://6661955
 
+; CHECK-NOT: and
+; CHECK-NOT: shr
+
 @hello = internal constant [7 x i8] c"hello\0A\00"
 @world = internal constant [7 x i8] c"world\0A\00"
 
diff --git a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
index 0607eda..679a65d 100644
--- a/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
@@ -1,8 +1,10 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats 2>&1 | grep "Number of modref unfolded"
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats 2>&1 | FileCheck %s
 ; XFAIL: *
 ; 69408 removed the opportunity for this optimization to work
 
+; CHECK: {{Number of modref unfolded}}
+
 	%struct.SHA512_CTX = type { [8 x i64], i64, i64, %struct.anon, i32, i32 }
 	%struct.anon = type { [16 x i64] }
 @K512 = external constant [80 x i64], align 32		; <[80 x i64]*> [#uses=2]
diff --git a/test/CodeGen/X86/2009-04-24.ll b/test/CodeGen/X86/2009-04-24.ll
index 08bf9e3..d104c87 100644
--- a/test/CodeGen/X86/2009-04-24.ll
+++ b/test/CodeGen/X86/2009-04-24.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -optimize-regalloc=0 -relocation-model=pic > %t2
-; RUN: grep "leaq.*TLSGD" %t2
-; RUN: grep "__tls_get_addr" %t2
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -optimize-regalloc=0 -relocation-model=pic | FileCheck %s
 ; PR4004
 
+; CHECK: {{leaq.*TLSGD}}
+; CHECK: {{__tls_get_addr}}
+
 @i = thread_local global i32 15
 
 define i32 @f() {
diff --git a/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
index 738b5fb..7468acb 100644
--- a/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
+++ b/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -relocation-model=static > %t
-; RUN: grep "1: ._pv_cpu_ops+8" %t
-; RUN: grep "2: ._G" %t
+; RUN: llc < %s -relocation-model=static | FileCheck %s
 ; PR4152
 
+; CHECK: {{1: ._pv_cpu_ops[+]8}}
+; CHECK: {{2: ._G}}
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
 	%struct.pv_cpu_ops = type { i32, [2 x i32] }
diff --git a/test/CodeGen/X86/2009-05-23-available_externally.ll b/test/CodeGen/X86/2009-05-23-available_externally.ll
index 94773d9..c990108 100644
--- a/test/CodeGen/X86/2009-05-23-available_externally.ll
+++ b/test/CodeGen/X86/2009-05-23-available_externally.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=pic | grep atoi | grep PLT
+; RUN: llc < %s -relocation-model=pic | FileCheck %s
 ; PR4253
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -9,6 +9,9 @@ entry:
 	ret i32 %call
 }
 
+; CHECK: foo
+; CHECK: {{atoi.+PLT}}
+
 define available_externally fastcc i32 @atoi(i8* %__nptr) nounwind readonly {
 entry:
 	%call = tail call i64 @strtol(i8* nocapture %__nptr, i8** null, i32 10) nounwind readonly		; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
index 3076322..3061dc2 100644
--- a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
+++ b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+mmx,+sse2 | not grep movl
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+mmx,+sse2 | FileCheck %s
+
+; CHECK-NOT: movl
 
 define <8 x i8> @a(i8 zeroext %x) nounwind {
   %r = insertelement <8 x i8> undef, i8 %x, i32 0
diff --git a/test/CodeGen/X86/2009-08-08-CastError.ll b/test/CodeGen/X86/2009-08-08-CastError.ll
index 2dc812d..748c5a8 100644
--- a/test/CodeGen/X86/2009-08-08-CastError.ll
+++ b/test/CodeGen/X86/2009-08-08-CastError.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-pc-mingw64 | grep movabsq
+; RUN: llc < %s -mtriple=x86_64-pc-mingw64 | FileCheck %s
+
+; CHECK: movabsq
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 8ab93fc..7650a5c 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -203,7 +203,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, %0 (float, float, float, float)* @__divsc3, null, null, metadata !43, i32 1922} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !45} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !44, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !44, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !45, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !9, metadata !9, metadata !9, metadata !9}
 !6 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SCtype", i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 6519ca0..6510ff1 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -25,7 +25,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786484, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null} ; [ DW_TAG_variable ]
 !1 = metadata !{i32 786473, metadata !36} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !36, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !32, metadata !31, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !36, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !32, metadata !31,  metadata !31, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !4 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
 !5 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo, null, null, metadata !33, i32 13} ; [ DW_TAG_subprogram ]
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index 4ea3bf0..ee00dba 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -27,7 +27,7 @@ entry:
 !0 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, metadata !15, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !17, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !6}
 !6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll b/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
index b22a391..b5679e6 100644
--- a/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
+++ b/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
@@ -1,4 +1,5 @@
-; RUN: llc -fast-isel -march=x86 < %s | grep %fs:
+; RUN: llc -fast-isel -march=x86 < %s | FileCheck %s
+; CHECK: %fs:
 
 define i32 @test1(i32 addrspace(257)* %arg) nounwind {
        %tmp = load i32 addrspace(257)* %arg
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index aaa562a..91711bb 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -80,7 +80,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
 !2 = metadata !{i32 786473, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !46, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !46, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
 !5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
 !6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
index 31a6822..8719f73 100644
--- a/test/CodeGen/X86/2010-11-02-DbgParameter.ll
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -19,7 +19,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo, null, null, metadata !16, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 117922)", i1 true, metadata !"", i32 0, null, null, metadata !15, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 117922)", i1 true, metadata !"", i32 0, null, null, metadata !15, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 2355528..14fb3e4 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -73,7 +73,7 @@ declare i32 @puts(i8* nocapture) nounwind
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !31, i32 12, metadata !"clang version 2.9 (trunk 124117)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !31, i32 12, metadata !"clang version 2.9 (trunk 124117)", i1 true, metadata !"", i32 0, null, null, metadata !28, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
index 54d2b40..6d91109 100644
--- a/test/CodeGen/X86/2011-09-14-valcoalesce.ll
+++ b/test/CodeGen/X86/2011-09-14-valcoalesce.ll
@@ -96,7 +96,7 @@ while.body.i188:                                  ; preds = %for.end173.i, %if.e
 while.body85.i:                                   ; preds = %while.body85.i, %while.body.i188
   %aFreq.0518.i = phi i32 [ %add93.i, %while.body85.i ], [ 0, %while.body.i188 ]
   %inc87.i = add nsw i32 0, 1
-  %tmp91.i = load i32* undef, align 4, !tbaa !0
+  %tmp91.i = load i32* undef, align 4
   %add93.i = add nsw i32 %tmp91.i, %aFreq.0518.i
   %or.cond514.i = and i1 undef, false
   br i1 %or.cond514.i, label %while.body85.i, label %while.end.i
@@ -168,7 +168,3 @@ if.end85:                                         ; preds = %entry
 }
 
 declare void @fprintf(...) nounwind
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
index 832a8eb..501a810 100644
--- a/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
+++ b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
@@ -109,7 +109,7 @@ bb49:                                             ; preds = %bb49, %bb48
   %tmp51 = add i32 %tmp50, undef
   %tmp52 = add i32 %tmp50, undef
   %tmp53 = getelementptr i32* %tmp13, i32 %tmp52
-  %tmp54 = load i32* %tmp53, align 4, !tbaa !0
+  %tmp54 = load i32* %tmp53, align 4
   %tmp55 = add i32 %tmp50, 1
   %tmp56 = icmp eq i32 %tmp55, %tmp8
   br i1 %tmp56, label %bb57, label %bb49
@@ -127,7 +127,7 @@ bb61:                                             ; preds = %bb61, %bb59
   %tmp62 = phi i32 [ %tmp65, %bb61 ], [ 0, %bb59 ]
   %tmp63 = add i32 %tmp62, %tmp14
   %tmp64 = getelementptr i32* %tmp13, i32 %tmp63
-  store i32 0, i32* %tmp64, align 4, !tbaa !0
+  store i32 0, i32* %tmp64, align 4
   %tmp65 = add i32 %tmp62, 1
   %tmp66 = icmp eq i32 %tmp65, %tmp8
   br i1 %tmp66, label %bb67, label %bb61
@@ -149,7 +149,3 @@ declare void @Pjii(i32*, i32, i32) optsize
 declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
 
 declare void @OnOverFlow() noreturn optsize ssp align 2
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
index 9525653..9164eb9 100644
--- a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -18,7 +18,7 @@ define signext i16 @subdivp(%struct.node.0.27* nocapture %p, double %dsq, double
 entry:
   call void @llvm.dbg.declare(metadata !{%struct.hgstruct.2.29* %hg}, metadata !4)
   %type = getelementptr inbounds %struct.node.0.27* %p, i64 0, i32 0
-  %0 = load i16* %type, align 2, !tbaa !8
+  %0 = load i16* %type, align 2
   %cmp = icmp eq i16 %0, 1
   br i1 %cmp, label %return, label %for.cond.preheader
 
@@ -45,7 +45,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !5 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
 !6 = metadata !{i32 786454, metadata !11, null, metadata !"hgstruct", i32 492, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ]
 !7 = metadata !{i32 786451, metadata !11, null, metadata !"", i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [from ]
-!8 = metadata !{metadata !"short", metadata !9}
-!9 = metadata !{metadata !"omnipotent char", metadata !10}
-!10 = metadata !{metadata !"Simple C/C++ TBAA"}
 !11 = metadata !{metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh"}
diff --git a/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
index 03b6bde..f0c7781 100644
--- a/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
+++ b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 define void @main() #0 {
 entry:
-  %0 = load <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32, !tbaa !0
+  %0 = load <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32
   %bitcast.i = extractelement <8 x float> %0, i32 0
   %vecinit.i.i = insertelement <4 x float> undef, float %bitcast.i, i32 0
   %vecinit2.i.i = insertelement <4 x float> %vecinit.i.i, float 0.000000e+00, i32 1
@@ -17,7 +17,7 @@ entry:
   %vecinit4.i.i = insertelement <4 x float> %vecinit3.i.i, float 0.000000e+00, i32 3
   %1 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecinit4.i.i) #2
   %vecext.i.i = extractelement <4 x float> %1, i32 0
-  store float %vecext.i.i, float* getelementptr inbounds ([8 x float]* @e, i64 0, i64 0), align 16, !tbaa !0
+  store float %vecext.i.i, float* getelementptr inbounds ([8 x float]* @e, i64 0, i64 0), align 16
   unreachable
 }
 
@@ -26,6 +26,3 @@ declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) #1
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/2013-05-06-ConactVectorCrash.ll b/test/CodeGen/X86/2013-05-06-ConactVectorCrash.ll
new file mode 100644
index 0000000..9203417
--- /dev/null
+++ b/test/CodeGen/X86/2013-05-06-ConactVectorCrash.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86
+
+; Make sure this doesn't crash
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-win32"
+
+define void @foo() {
+  %1 = shufflevector <3 x i8> undef, <3 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %2 = shufflevector <2 x i8> %1, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %3 = shufflevector <4 x i8> undef, <4 x i8> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  store <4 x i8> %3, <4 x i8>* undef
+  ret void
+}
diff --git a/test/CodeGen/X86/MachineSink-DbgValue.ll b/test/CodeGen/X86/MachineSink-DbgValue.ll
index 227ef34..13a6444 100644
--- a/test/CodeGen/X86/MachineSink-DbgValue.ll
+++ b/test/CodeGen/X86/MachineSink-DbgValue.ll
@@ -27,7 +27,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 03d2e47..5fe08ed 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -119,8 +119,8 @@ entry:
 
 ; X64: test8:
 ; X64: addq
-; X64-NEXT: sbbq
-; X64-NEXT: testb
+; X64-NEXT: setb
+; X64: ret
 
 define i32 @test9(i32 %x, i32 %y) nounwind readnone {
   %cmp = icmp eq i32 %x, 10
diff --git a/test/CodeGen/X86/asm-invalid-register-class-crasher.ll b/test/CodeGen/X86/asm-invalid-register-class-crasher.ll
new file mode 100644
index 0000000..24e2284
--- /dev/null
+++ b/test/CodeGen/X86/asm-invalid-register-class-crasher.ll
@@ -0,0 +1,9 @@
+; RUN: not llc < %s -mtriple=i386-apple-darwin 2>&1 %t
+
+; Previously, this would assert in an assert build, but crash in a release build.
+; No FileCheck, just make sure we handle this gracefully.
+define i64 @t1(i64* %p, i64 %val) #0 {
+entry:
+  %0 = tail call i64 asm sideeffect "xaddq $0, $1", "=q,*m,0,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %p, i64 %val)
+  ret i64 %0
+}
diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
index 2a34e02..6237b66 100644
--- a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
+++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=atom 2>&1 | \
-; RUN:     grep "calll" | not grep "("
-; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=core2 2>&1 | \
-; RUN:     grep "calll" | grep "*funcp"
+; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=atom  | \
+; RUN:     FileCheck --check-prefix=ATOM %s
+; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=core2 | \
+; RUN:     FileCheck --check-prefix=CORE2 %s
+; ATOM: calll *{{%[a-z]+}}
+; CORE2: calll *funcp
 ;
 ; original source code built with clang -S -emit-llvm -M32 test32.c:
 ;
@@ -18,10 +20,6 @@
 ;     }
 ;   }
 ;
-; ModuleID = 'test32.c'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-target triple = "i386-unknown-linux-gnu"
-
 @sum = external global i32
 @a = common global i32 0, align 4
 @i = common global i32 0, align 4
@@ -74,4 +72,3 @@ for.end:                                          ; preds = %for.cond
   ret void
 }
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
index bcfbd61..a196d81 100644
--- a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
+++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=atom 2>&1 | \
-; RUN:     grep "callq" | not grep "("
-; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=core2 2>&1 | \
-; RUN:     grep "callq" | grep "*funcp"
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=atom  | \
+; RUN:    FileCheck --check-prefix=ATOM %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=core2 | \
+; RUN:    FileCheck --check-prefix=CORE2 %s
+; ATOM: callq *{{%[a-z]+[0-9]*}}
+; CORE2: callq *funcp
 ;
 ; Original source code built with clang -S -emit-llvm -m64 test64.c:
 ;   int a, b, c, d, e, f, g, h, i, j, k, l, m, n;
@@ -19,9 +21,6 @@
 ;     }
 ;   }
 ;   
-; ModuleID = 'test64.c'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
 
 @sum = external global i32
 @a = common global i32 0, align 4
@@ -88,4 +87,3 @@ for.end:                                          ; preds = %for.cond
   ret void
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/atom-fixup-lea1.ll b/test/CodeGen/X86/atom-fixup-lea1.ll
new file mode 100644
index 0000000..4651bf2
--- /dev/null
+++ b/test/CodeGen/X86/atom-fixup-lea1.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
+; CHECK: addl
+; CHECK-NEXT:leal
+; CHECK-NEXT:decl
+; CHECK-NEXT:jne
+
+; Test for the FixupLEAs pre-emit pass. An LEA should be substituted for the ADD
+; that increments the array pointer because it is within 5 instructions of the
+; corresponding load. The ADD precedes the load by following the loop back edge.
+
+; Original C code
+;int test(int n, int * array)
+;{
+;  int sum = 0;
+;  for(int i = 0; i < n; i++)
+;    sum += array[i];
+;  return sum;
+;}
+
+define i32 @test(i32 %n, i32* nocapture %array) {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:
+  %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %array, i32 %i.06
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.05
+  %inc = add nsw i32 %i.06, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/CodeGen/X86/atom-fixup-lea2.ll b/test/CodeGen/X86/atom-fixup-lea2.ll
new file mode 100644
index 0000000..1855ea1
--- /dev/null
+++ b/test/CodeGen/X86/atom-fixup-lea2.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
+; CHECK:BB#5
+; CHECK-NEXT:leal
+; CHECK-NEXT:leal
+; CHECK-NEXT:leal
+; CHECK-NEXT:movl
+
+
+; Test for fixup lea pre-emit pass. LEA instructions should be substituted for
+; ADD instructions which compute the address and index of the load because they
+; precede the load within 5 instructions. An LEA should also be substituted for
+; an ADD which computes part of the index because it precedes the index LEA
+; within 5 instructions, this substitution is referred to as backwards chaining.
+
+; Original C Code
+;struct node_t
+;{
+;  int k, m, n, p;
+;  int * array;
+;};
+
+;extern struct node_t getnode();
+
+;int test()
+;{
+;  int sum = 0;
+;  struct node_t n = getnode();
+;  if(n.array != 0 && n.p > 0 && n.k > 0 && n.n > 0 && n.m > 0) {
+;    sum = ((int*)((int)n.array + n.p) )[ n.k + n.m + n.n ];
+;  }
+;  return sum;
+;}
+
+%struct.node_t = type { i32, i32, i32, i32, i32* }
+
+define i32 @test() {
+entry:
+  %n = alloca %struct.node_t, align 4
+  call void bitcast (void (%struct.node_t*, ...)* @getnode to void (%struct.node_t*)*)(%struct.node_t* sret %n)
+  %array = getelementptr inbounds %struct.node_t* %n, i32 0, i32 4
+  %0 = load i32** %array, align 4
+  %cmp = icmp eq i32* %0, null
+  br i1 %cmp, label %if.end, label %land.lhs.true
+
+land.lhs.true:
+  %p = getelementptr inbounds %struct.node_t* %n, i32 0, i32 3
+  %1 = load i32* %p, align 4
+  %cmp1 = icmp sgt i32 %1, 0
+  br i1 %cmp1, label %land.lhs.true2, label %if.end
+
+land.lhs.true2:
+  %k = getelementptr inbounds %struct.node_t* %n, i32 0, i32 0
+  %2 = load i32* %k, align 4
+  %cmp3 = icmp sgt i32 %2, 0
+  br i1 %cmp3, label %land.lhs.true4, label %if.end
+
+land.lhs.true4:
+  %n5 = getelementptr inbounds %struct.node_t* %n, i32 0, i32 2
+  %3 = load i32* %n5, align 4
+  %cmp6 = icmp sgt i32 %3, 0
+  br i1 %cmp6, label %land.lhs.true7, label %if.end
+
+land.lhs.true7:
+  %m = getelementptr inbounds %struct.node_t* %n, i32 0, i32 1
+  %4 = load i32* %m, align 4
+  %cmp8 = icmp sgt i32 %4, 0
+  br i1 %cmp8, label %if.then, label %if.end
+
+if.then:
+  %add = add i32 %3, %2
+  %add12 = add i32 %add, %4
+  %5 = ptrtoint i32* %0 to i32
+  %add15 = add nsw i32 %1, %5
+  %6 = inttoptr i32 %add15 to i32*
+  %arrayidx = getelementptr inbounds i32* %6, i32 %add12
+  %7 = load i32* %arrayidx, align 4
+  br label %if.end
+
+if.end:
+  %sum.0 = phi i32 [ %7, %if.then ], [ 0, %land.lhs.true7 ], [ 0, %land.lhs.true4 ], [ 0, %land.lhs.true2 ], [ 0, %land.lhs.true ], [ 0, %entry ]
+  ret i32 %sum.0
+}
+
+declare void @getnode(%struct.node_t* sret, ...)
diff --git a/test/CodeGen/X86/atom-fixup-lea3.ll b/test/CodeGen/X86/atom-fixup-lea3.ll
new file mode 100644
index 0000000..311b0b3
--- /dev/null
+++ b/test/CodeGen/X86/atom-fixup-lea3.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
+; CHECK: addl ([[reg:%[a-z]+]])
+; CHECK-NEXT: addl $4, [[reg]]
+
+; Test for the FixupLEAs pre-emit pass.
+; An LEA should NOT be substituted for the ADD instruction
+; that increments the array pointer if it is greater than 5 instructions
+; away from the memory reference that uses it.
+
+; Original C code: clang -m32 -S -O2
+;int test(int n, int * array, int * m, int * array2)
+;{
+;  int i, j = 0;
+;  int sum = 0;
+;  for (i = 0, j = 0; i < n;) {
+;    ++i;
+;    *m += array2[j++];
+;    sum += array[i];
+;  }
+;  return sum;
+;}
+
+define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %m, i32* nocapture %array2) #0 {
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %.pre = load i32* %m, align 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %0 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add, %for.body ]
+  %sum.010 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ]
+  %j.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc1, %for.body ]
+  %inc1 = add nsw i32 %j.09, 1
+  %arrayidx = getelementptr inbounds i32* %array2, i32 %j.09
+  %1 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %1
+  store i32 %add, i32* %m, align 4
+  %arrayidx2 = getelementptr inbounds i32* %array, i32 %inc1
+  %2 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %2, %sum.010
+  %exitcond = icmp eq i32 %inc1, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/CodeGen/X86/atomic-dagsched.ll b/test/CodeGen/X86/atomic-dagsched.ll
index 0e7cf8c..05e630b 100644
--- a/test/CodeGen/X86/atomic-dagsched.ll
+++ b/test/CodeGen/X86/atomic-dagsched.ll
@@ -18,8 +18,8 @@ loop.cond:                                        ; preds = %test.exit, %entry
   br i1 %3, label %return, label %loop
 
 loop:                                             ; preds = %loop.cond
-  %4 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8, !tbaa !0
-  %5 = load i64* %4, align 8, !tbaa !3
+  %4 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
+  %5 = load i64* %4, align 8
   %vector.size.i = ashr i64 %5, 3
   %num.vector.wi.i = shl i64 %vector.size.i, 3
   %6 = icmp eq i64 %vector.size.i, 0
@@ -65,8 +65,8 @@ scalarIf.i:                                       ; preds = %vector_kernel_entry
   br i1 %18, label %test.exit, label %dim_0_pre_head.i
 
 dim_0_pre_head.i:                                 ; preds = %scalarIf.i
-  %19 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8, !tbaa !0
-  %20 = load i64* %19, align 8, !tbaa !3
+  %19 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
+  %20 = load i64* %19, align 8
   %21 = trunc i64 %20 to i32
   %22 = mul i64 %vector.size.i, 8
   br label %scalar_kernel_entry.i
@@ -76,10 +76,10 @@ scalar_kernel_entry.i:                            ; preds = %scalar_kernel_entry
   %23 = bitcast i8* %asr.iv6 to i32 addrspace(1)*
   %24 = bitcast i8* %ptrtoarg4 to i32 addrspace(1)*
   %scevgep16 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
-  %25 = load i32 addrspace(1)* %scevgep16, align 4, !tbaa !4
+  %25 = load i32 addrspace(1)* %scevgep16, align 4
   %26 = atomicrmw min i32 addrspace(1)* %24, i32 %25 seq_cst
   %scevgep15 = getelementptr i32 addrspace(1)* %23, i64 %asr.iv12
-  store i32 %21, i32 addrspace(1)* %scevgep15, align 4, !tbaa !4
+  store i32 %21, i32 addrspace(1)* %scevgep15, align 4
   %asr.iv.next13 = add i64 %asr.iv12, 1
   %dim_0_cmp.to.max.i = icmp eq i64 %5, %asr.iv.next13
   br i1 %dim_0_cmp.to.max.i, label %test.exit, label %scalar_kernel_entry.i
@@ -97,12 +97,6 @@ return:                                           ; preds = %loop.cond
   ret void
 }
 
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"long", metadata !1}
-!4 = metadata !{metadata !"int", metadata !1}
-
 ; CHECK: test
 ; CHECK: decq
 ; CHECK-NOT: cmpxchgl
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll
index 95854c7..64c4627 100644
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -121,3 +121,13 @@ define <16 x i16> @build_vec_16x16(i16 %a) nounwind readonly {
   %res = insertelement <16 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %a, i32 0
   ret <16 x i16> %res
 }
+
+;;; Check that VMOVPQIto64rr generates the assembly string "vmovd".  Previously
+;;; an incorrect mnemonic of "movd" was printed for this instruction.
+; CHECK: VMOVPQIto64rr
+; CHECK: vmovd
+define i64 @VMOVPQIto64rr(<2 x i64> %a) {
+entry:
+  %vecext.i = extractelement <2 x i64> %a, i32 0
+  ret i64 %vecext.i
+}
diff --git a/test/CodeGen/X86/avx-brcond.ll b/test/CodeGen/X86/avx-brcond.ll
new file mode 100644
index 0000000..d52ae52
--- /dev/null
+++ b/test/CodeGen/X86/avx-brcond.ll
@@ -0,0 +1,150 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+declare i32 @llvm.x86.avx.ptestz.256(<4 x i64> %p1, <4 x i64> %p2) nounwind
+declare i32 @llvm.x86.avx.ptestc.256(<4 x i64> %p1, <4 x i64> %p2) nounwind
+
+define <4 x float> @test1(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test1:
+; CHECK: vptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test3(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test3:
+; CHECK: vptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test4(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test4:
+; CHECK: vptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test6(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test6:
+; CHECK: vptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test7(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test7:
+; CHECK: vptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp eq i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test8(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test8:
+; CHECK: vptest
+; CHECK-NEXT:	je
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp ne i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index 5534712..271fb42 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -524,7 +524,7 @@ entry:
   br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1
 
 entry.if.then_crit_edge:
-  %.pre14 = load i8* undef, align 1, !tbaa !0
+  %.pre14 = load i8* undef, align 1
   br label %if.then
 
 lor.lhs.false:
@@ -537,7 +537,7 @@ exit:
 if.then:
   %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ]
   %1 = and i8 %0, 1
-  store i8 %1, i8* undef, align 4, !tbaa !0
+  store i8 %1, i8* undef, align 4
   br label %if.end
 
 if.end:
diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
index 44670c8..bc4032b 100644
--- a/test/CodeGen/X86/brcond.ll
+++ b/test/CodeGen/X86/brcond.ll
@@ -108,3 +108,150 @@ bb2:                                              ; preds = %entry, %bb1
   ret float %.0
 }
 
+declare i32 @llvm.x86.sse41.ptestz(<4 x float> %p1, <4 x float> %p2) nounwind
+declare i32 @llvm.x86.sse41.ptestc(<4 x float> %p1, <4 x float> %p2) nounwind
+
+define <4 x float> @test5(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test5:
+; CHECK: ptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test7(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test7:
+; CHECK: ptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test8(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test8:
+; CHECK: ptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test10(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test10:
+; CHECK: ptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test11(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test11:
+; CHECK: ptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp eq i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test12(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test12:
+; CHECK: ptest
+; CHECK-NEXT:	je
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp ne i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll
index 3bb9124..d69bfa6 100644
--- a/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/test/CodeGen/X86/bswap-inline-asm.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin > %t
-; RUN: not grep InlineAsm %t
-; RUN: FileCheck %s < %t
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix CHK %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; CHK-NOT: InlineAsm
 
 ; CHECK: foo:
 ; CHECK: bswapq
diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll
index 39a784de..e28923b 100644
--- a/test/CodeGen/X86/bt.ll
+++ b/test/CodeGen/X86/bt.ll
@@ -522,11 +522,8 @@ UnifiedReturnBlock:		; preds = %entry
 
 declare void @foo()
 
-; rdar://12755626
 define zeroext i1 @invert(i32 %flags, i32 %flag) nounwind {
-; CHECK: invert
-; CHECK: btl %eax, %ecx
-; CHECK: setae
+; CHECK: btl
 entry:
   %neg = xor i32 %flags, -1
   %shl = shl i32 1, %flag
diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll
index 38cda4d..8753594 100644
--- a/test/CodeGen/X86/call-imm.ll
+++ b/test/CodeGen/X86/call-imm.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=static | grep "call.*12345678"
-; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | not grep "call.*12345678"
-; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep "call.*12345678"
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=static | FileCheck -check-prefix X86STA %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | FileCheck -check-prefix X86PIC %s
+; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | FileCheck -check-prefix X86DYN %s
 
 ; Call to immediate is not safe on x86-64 unless we *know* that the
 ; call will be within 32-bits pcrel from the dest immediate.
 
-; RUN: llc < %s -march=x86-64 | grep "call.*\*%rax"
+; RUN: llc < %s -march=x86-64 | FileCheck -check-prefix X64 %s
 
 ; PR3666
 ; PR3773
@@ -16,3 +16,8 @@ entry:
 	%0 = call i32 inttoptr (i32 12345678 to i32 (i32)*)(i32 0) nounwind		; <i32> [#uses=1]
 	ret i32 %0
 }
+
+; X86STA: {{call.*12345678}}
+; X86PIC-NOT: {{call.*12345678}}
+; X86DYN: {{call.*12345678}}
+; X64: {{call.*[*]%rax}}
diff --git a/test/CodeGen/X86/coalescer-identity.ll b/test/CodeGen/X86/coalescer-identity.ll
index 9c72ee6..1aac095 100644
--- a/test/CodeGen/X86/coalescer-identity.ll
+++ b/test/CodeGen/X86/coalescer-identity.ll
@@ -12,10 +12,10 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 define void @func() nounwind uwtable ssp {
 for.body.lr.ph:
-  %0 = load i32* @g2, align 4, !tbaa !0
+  %0 = load i32* @g2, align 4
   %tobool6 = icmp eq i32 %0, 0
   %s.promoted = load i16* @s, align 2
-  %.pre = load i32* @g1, align 4, !tbaa !0
+  %.pre = load i32* @g1, align 4
   br i1 %tobool6, label %for.body.us, label %for.body
 
 for.body.us:                                      ; preds = %for.body.lr.ph, %for.inc.us
@@ -43,11 +43,11 @@ for.inc.us:                                       ; preds = %cond.end.us, %for.b
 cond.end.us:                                      ; preds = %if.then7.us, %cond.false.us
   %4 = phi i32 [ 0, %cond.false.us ], [ %1, %if.then7.us ]
   %cond.us = phi i32 [ 0, %cond.false.us ], [ %v.010.us, %if.then7.us ]
-  store i32 %cond.us, i32* @g0, align 4, !tbaa !0
+  store i32 %cond.us, i32* @g0, align 4
   br label %for.inc.us
 
 cond.false.us:                                    ; preds = %if.then7.us
-  store i32 0, i32* @g1, align 4, !tbaa !0
+  store i32 0, i32* @g1, align 4
   br label %cond.end.us
 
 if.then7.us:                                      ; preds = %for.body.us
@@ -76,7 +76,3 @@ for.end:                                          ; preds = %for.inc.us, %for.bo
   store i16 %dec12.lcssa, i16* @s, align 2
   ret void
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/code_placement_align_all.ll b/test/CodeGen/X86/code_placement_align_all.ll
new file mode 100644
index 0000000..1e5e8f7
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_align_all.ll
@@ -0,0 +1,22 @@
+; RUN: llc  -mcpu=corei7 -mtriple=x86_64-linux -align-all-blocks=16 < %s | FileCheck %s
+
+;CHECK: foo
+;CHECK: .align  65536, 0x90
+;CHECK: .align  65536, 0x90
+;CHECK: .align  65536, 0x90
+;CHECK: ret
+define i32 @foo(i32 %t, i32 %l) nounwind readnone ssp uwtable {
+  %1 = icmp eq i32 %t, 0
+  br i1 %1, label %4, label %2
+
+; <label>:2                                       ; preds = %0
+  %3 = add nsw i32 %t, 2
+  ret i32 %3
+
+; <label>:4                                       ; preds = %0
+  %5 = icmp eq i32 %l, 0
+  %. = select i1 %5, i32 0, i32 5
+  ret i32 %.
+}
+
+
diff --git a/test/CodeGen/X86/codegen-prepare.ll b/test/CodeGen/X86/codegen-prepare.ll
new file mode 100644
index 0000000..e8ee070
--- /dev/null
+++ b/test/CodeGen/X86/codegen-prepare.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+; Check that the CodeGenPrepare Pass
+; does not wrongly rewrite the address computed by Instruction %4
+; as [12 + Base:%this].
+
+; This test makes sure that:
+; - both the store and the first load instructions
+;   within basic block labeled 'if.then' are not removed. 
+; - the store instruction stores a value at address [60 + %this]
+; - the first load instruction loads a value at address [12 + %this]
+
+%class.A = type { %struct.B }
+%struct.B = type { %class.C, %class.D, %class.C, %class.D }
+%class.C = type { float, float, float }
+%class.D = type { [3 x %class.C] }
+
+define linkonce_odr void @foo(%class.A* nocapture %this, i32 %BoolValue) nounwind uwtable {
+entry:
+  %cmp = icmp eq i32 %BoolValue, 0
+  %address1 = getelementptr inbounds %class.A* %this, i64 0, i32 0, i32 3
+  %address2 = getelementptr inbounds %class.A* %this, i64 0, i32 0, i32 1
+  br i1 %cmp, label %if.else, label %if.then
+
+if.then:                                         ; preds = %entry
+  %0 = getelementptr inbounds %class.D* %address2, i64 0, i32 0, i64 0, i32 0
+  %1 = load float* %0, align 4 
+  %2 = getelementptr inbounds float* %0, i64 3
+  %3 = load float* %2, align 4 
+  %4 = getelementptr inbounds %class.D* %address1, i64 0, i32 0, i64 0, i32 0
+  store float %1, float* %4, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %if.else, %entry
+  ret void
+}
+
+; CHECK: foo:
+; CHECK: movss 12([[THIS:%[a-zA-Z0-9]+]]), [[REGISTER:%[a-zA-Z0-9]+]]
+; CHECK-NEXT: movss [[REGISTER]], 60([[THIS]])
+
diff --git a/test/CodeGen/X86/commute-intrinsic.ll b/test/CodeGen/X86/commute-intrinsic.ll
index d810cb1..7d5ca47 100644
--- a/test/CodeGen/X86/commute-intrinsic.ll
+++ b/test/CodeGen/X86/commute-intrinsic.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | not grep movaps
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | FileCheck %s
+
+; CHECK-NOT: movaps
 
 @a = external global <2 x i64>		; <<2 x i64>*> [#uses=1]
 
diff --git a/test/CodeGen/X86/compact-unwind.ll b/test/CodeGen/X86/compact-unwind.ll
new file mode 100644
index 0000000..8c4fa27
--- /dev/null
+++ b/test/CodeGen/X86/compact-unwind.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -disable-cfi -disable-fp-elim -mtriple x86_64-apple-darwin11 | FileCheck %s
+
+%ty = type { i8* }
+
+@gv = external global i32
+
+; This is aligning the stack with a push of a random register.
+; CHECK: pushq %rax
+
+; Even though we can't encode %rax into the compact unwind, We still want to be
+; able to generate a compact unwind encoding in this particular case.
+;
+; CHECK: __LD,__compact_unwind
+; CHECK: _foo ## Range Start
+; CHECK: 16842753 ## Compact Unwind Encoding: 0x1010001
+
+define i8* @foo(i64 %size) {
+  %addr = alloca i64, align 8
+  %tmp20 = load i32* @gv, align 4
+  %tmp21 = call i32 @bar()
+  %tmp25 = load i64* %addr, align 8
+  %tmp26 = inttoptr i64 %tmp25 to %ty*
+  %tmp29 = getelementptr inbounds %ty* %tmp26, i64 0, i32 0
+  %tmp34 = load i8** %tmp29, align 8
+  %tmp35 = getelementptr inbounds i8* %tmp34, i64 %size
+  store i8* %tmp35, i8** %tmp29, align 8
+  ret i8* null
+}
+
+declare i32 @bar()
diff --git a/test/CodeGen/X86/compiler_used.ll b/test/CodeGen/X86/compiler_used.ll
index be8de5e..d38ce91 100644
--- a/test/CodeGen/X86/compiler_used.ll
+++ b/test/CodeGen/X86/compiler_used.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep no_dead_strip | count 1
-; We should have a .no_dead_strip directive for Z but not for X/Y.
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s
 
 @X = internal global i8 4
 @Y = internal global i32 123
@@ -7,3 +6,7 @@
 
 @llvm.used = appending global [1 x i8*] [ i8* @Z ], section "llvm.metadata"
 @llvm.compiler_used = appending global [2 x i8*] [ i8* @X, i8* bitcast (i32* @Y to i8*)], section "llvm.metadata"
+
+; CHECK-NOT: .no_dead_strip
+; CHECK: .no_dead_strip	_Z
+; CHECK-NOT: .no_dead_strip
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 6d21962..852b642 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -238,7 +238,7 @@ declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
 
 define void @_ZNK4llvm17MipsFrameLowering12emitPrologueERNS_15MachineFunctionE() ssp align 2 {
 bb:
-  %tmp = load %t9** undef, align 4, !tbaa !0
+  %tmp = load %t9** undef, align 4
   %tmp2 = getelementptr inbounds %t9* %tmp, i32 0, i32 0
   %tmp3 = getelementptr inbounds %t9* %tmp, i32 0, i32 0, i32 0, i32 0, i32 1
   br label %bb4
diff --git a/test/CodeGen/X86/dbg-byval-parameter.ll b/test/CodeGen/X86/dbg-byval-parameter.ll
index aca06a2..719a526 100644
--- a/test/CodeGen/X86/dbg-byval-parameter.ll
+++ b/test/CodeGen/X86/dbg-byval-parameter.ll
@@ -30,7 +30,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7}
 !6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/X86/dbg-const-int.ll b/test/CodeGen/X86/dbg-const-int.ll
index aabc206..f72729c 100644
--- a/test/CodeGen/X86/dbg-const-int.ll
+++ b/test/CodeGen/X86/dbg-const-int.ll
@@ -14,7 +14,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 132191)", i1 true, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 132191)", i1 true, metadata !"", i32 0, null, null, metadata !11, null,  null, null} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/X86/dbg-const.ll b/test/CodeGen/X86/dbg-const.ll
index a9b8f1f..5c2e62b 100644
--- a/test/CodeGen/X86/dbg-const.ll
+++ b/test/CodeGen/X86/dbg-const.ll
@@ -20,7 +20,7 @@ declare i32 @bar() nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar, null, null, metadata !14, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 114183)", i1 true, metadata !"", i32 0, null, null, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 114183)", i1 true, metadata !"", i32 0, null, null, metadata !13, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null}
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
diff --git a/test/CodeGen/X86/dbg-i128-const.ll b/test/CodeGen/X86/dbg-i128-const.ll
index 17d6457..cc612b2 100644
--- a/test/CodeGen/X86/dbg-i128-const.ll
+++ b/test/CodeGen/X86/dbg-i128-const.ll
@@ -19,7 +19,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !2 = metadata !{i32 786443, metadata !4, metadata !3, i32 26, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !3 = metadata !{i32 786478, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i128 (i128, i128)* @__foo, null, null, null, i32 26} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786449, i32 1, metadata !4, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 786449, i32 1, metadata !4, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !12, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !6 = metadata !{i32 786453, metadata !13, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !7 = metadata !{metadata !8, metadata !8, metadata !8}
 !8 = metadata !{i32 786454, metadata !14, metadata !4, metadata !"ti_int", i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
diff --git a/test/CodeGen/X86/dbg-large-unsigned-const.ll b/test/CodeGen/X86/dbg-large-unsigned-const.ll
index ff16318..c381cd7 100644
--- a/test/CodeGen/X86/dbg-large-unsigned-const.ll
+++ b/test/CodeGen/X86/dbg-large-unsigned-const.ll
@@ -7,8 +7,8 @@ define zeroext i1 @_Z3iseRKxS0_(i64* nocapture %LHS, i64* nocapture %RHS) nounwi
 entry:
   tail call void @llvm.dbg.value(metadata !{i64* %LHS}, i64 0, metadata !7), !dbg !13
   tail call void @llvm.dbg.value(metadata !{i64* %RHS}, i64 0, metadata !11), !dbg !14
-  %tmp1 = load i64* %LHS, align 4, !dbg !15, !tbaa !17
-  %tmp3 = load i64* %RHS, align 4, !dbg !15, !tbaa !17
+  %tmp1 = load i64* %LHS, align 4, !dbg !15
+  %tmp3 = load i64* %RHS, align 4, !dbg !15
   %cmp = icmp eq i64 %tmp1, %tmp3, !dbg !15
   ret i1 %cmp, !dbg !15
 }
@@ -47,9 +47,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !14 = metadata !{i32 2, i32 49, metadata !1, null}
 !15 = metadata !{i32 3, i32 3, metadata !16, null}
 !16 = metadata !{i32 786443, metadata !2, metadata !1, i32 2, i32 54, i32 0} ; [ DW_TAG_lexical_block ]
-!17 = metadata !{metadata !"long long", metadata !18}
-!18 = metadata !{metadata !"omnipotent char", metadata !19}
-!19 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !20 = metadata !{i32 6, i32 19, metadata !6, null}
 !21 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ]
 !22 = metadata !{i32 7, i32 10, metadata !23, null}
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index baad6c0..30d0305 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -47,7 +47,7 @@ declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 879} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !28, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
 !5 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"UTItype", i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
diff --git a/test/CodeGen/X86/dbg-prolog-end.ll b/test/CodeGen/X86/dbg-prolog-end.ll
index 26bac2e..d1774cc 100644
--- a/test/CodeGen/X86/dbg-prolog-end.ll
+++ b/test/CodeGen/X86/dbg-prolog-end.ll
@@ -35,7 +35,7 @@ entry:
 !llvm.dbg.cu = !{!0}
 !18 = metadata !{metadata !1, metadata !6}
 
-!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131100)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131100)", i1 false, metadata !"", i32 0, null, null, metadata !18, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
 !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/CodeGen/X86/dbg-subrange.ll
index 6090185..b08d68a 100644
--- a/test/CodeGen/X86/dbg-subrange.ll
+++ b/test/CodeGen/X86/dbg-subrange.ll
@@ -14,7 +14,7 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11,  metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720942, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
diff --git a/test/CodeGen/X86/dbg-value-dag-combine.ll b/test/CodeGen/X86/dbg-value-dag-combine.ll
index fcbf64f..c63235e 100644
--- a/test/CodeGen/X86/dbg-value-dag-combine.ll
+++ b/test/CodeGen/X86/dbg-value-dag-combine.ll
@@ -27,7 +27,7 @@ entry:
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null, metadata !5}
 !5 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
diff --git a/test/CodeGen/X86/dbg-value-isel.ll b/test/CodeGen/X86/dbg-value-isel.ll
index 55be3b1..acc360e 100644
--- a/test/CodeGen/X86/dbg-value-isel.ll
+++ b/test/CodeGen/X86/dbg-value-isel.ll
@@ -82,7 +82,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !20, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !19, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !20, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !19, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null, metadata !5}
 !5 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll
index 2a1916f..a6c3e13 100644
--- a/test/CodeGen/X86/dbg-value-location.ll
+++ b/test/CodeGen/X86/dbg-value-location.ll
@@ -49,7 +49,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo, null, null, null, i32 19510} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !27, i32 12, metadata !"clang version 2.9 (trunk 124753)", i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !27, i32 12, metadata !"clang version 2.9 (trunk 124753)", i1 true, metadata !"", i32 0, null, null, metadata !24, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll
index 6766dbe..b068bbb 100644
--- a/test/CodeGen/X86/dbg-value-range.ll
+++ b/test/CodeGen/X86/dbg-value-range.ll
@@ -6,7 +6,7 @@ define i32 @bar(%struct.a* nocapture %b) nounwind ssp {
 entry:
   tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !6), !dbg !13
   %tmp1 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !14
-  %tmp2 = load i32* %tmp1, align 4, !dbg !14, !tbaa !15
+  %tmp2 = load i32* %tmp1, align 4, !dbg !14
   tail call void @llvm.dbg.value(metadata !{i32 %tmp2}, i64 0, metadata !11), !dbg !14
   %call = tail call i32 (...)* @foo(i32 %tmp2) nounwind , !dbg !18
   %add = add nsw i32 %tmp2, 1, !dbg !19
@@ -21,7 +21,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar, null, null, metadata !21, i32 0} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !22, i32 12, metadata !"clang version 2.9 (trunk 122997)", i1 true, metadata !"", i32 0, null, null, metadata !20, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !22, i32 12, metadata !"clang version 2.9 (trunk 122997)", i1 true, metadata !"", i32 0, null, null, metadata !20, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
@@ -34,9 +34,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !12 = metadata !{i32 786443, metadata !22, metadata !0, i32 5, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
 !13 = metadata !{i32 5, i32 19, metadata !0, null}
 !14 = metadata !{i32 6, i32 14, metadata !12, null}
-!15 = metadata !{metadata !"int", metadata !16}
-!16 = metadata !{metadata !"omnipotent char", metadata !17}
-!17 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !18 = metadata !{i32 7, i32 2, metadata !12, null}
 !19 = metadata !{i32 8, i32 2, metadata !12, null}
 !20 = metadata !{metadata !0}
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index d591f94..5121ed1 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | \
-; RUN:   grep "add	ESP, 8"
+; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | FileCheck %s
+; CHECK: add ESP, 8
 
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
index 9233d3f..21fae4a 100644
--- a/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
+++ b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O0 -relocation-model=pic < %s | not grep call
+; RUN: llc -O0 -relocation-model=pic < %s | FileCheck %s
+; CHECK-NOT: call
 ; rdar://8396318
 
 ; Don't emit a PIC base register if no addresses are needed.
diff --git a/test/CodeGen/X86/fast-isel-constpool.ll b/test/CodeGen/X86/fast-isel-constpool.ll
index b3adb80..bbbaeb2 100644
--- a/test/CodeGen/X86/fast-isel-constpool.ll
+++ b/test/CodeGen/X86/fast-isel-constpool.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -fast-isel | grep "LCPI0_0(%rip)"
+; RUN: llc < %s -fast-isel | FileCheck %s
+; CHECK: LCPI0_0(%rip)
+
 ; Make sure fast isel uses rip-relative addressing when required.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
diff --git a/test/CodeGen/X86/fast-isel-divrem-x86-64.ll b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
new file mode 100644
index 0000000..45494f1
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+
+define i64 @test_sdiv64(i64 %dividend, i64 %divisor) nounwind {
+entry:
+  %result = sdiv i64 %dividend, %divisor
+  ret i64 %result
+}
+
+; CHECK: test_sdiv64:
+; CHECK: cqto
+; CHECK: idivq
+
+define i64 @test_srem64(i64 %dividend, i64 %divisor) nounwind {
+entry:
+  %result = srem i64 %dividend, %divisor
+  ret i64 %result
+}
+
+; CHECK: test_srem64:
+; CHECK: cqto
+; CHECK: idivq
+
+define i64 @test_udiv64(i64 %dividend, i64 %divisor) nounwind {
+entry:
+  %result = udiv i64 %dividend, %divisor
+  ret i64 %result
+}
+
+; CHECK: test_udiv64:
+; CHECK: xorl
+; CHECK: divq
+
+define i64 @test_urem64(i64 %dividend, i64 %divisor) nounwind {
+entry:
+  %result = urem i64 %dividend, %divisor
+  ret i64 %result
+}
+
+; CHECK: test_urem64:
+; CHECK: xorl
+; CHECK: divq
diff --git a/test/CodeGen/X86/fast-isel-divrem.ll b/test/CodeGen/X86/fast-isel-divrem.ll
new file mode 100644
index 0000000..7aba7f7
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-divrem.ll
@@ -0,0 +1,122 @@
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+
+define i8 @test_sdiv8(i8 %dividend, i8 %divisor) nounwind {
+entry:
+  %result = sdiv i8 %dividend, %divisor
+  ret i8 %result
+}
+
+; CHECK: test_sdiv8:
+; CHECK: movsbw
+; CHECK: idivb
+
+define i8 @test_srem8(i8 %dividend, i8 %divisor) nounwind {
+entry:
+  %result = srem i8 %dividend, %divisor
+  ret i8 %result
+}
+
+; CHECK: test_srem8:
+; CHECK: movsbw
+; CHECK: idivb
+
+define i8 @test_udiv8(i8 %dividend, i8 %divisor) nounwind {
+entry:
+  %result = udiv i8 %dividend, %divisor
+  ret i8 %result
+}
+
+; CHECK: test_udiv8:
+; CHECK: movzbw
+; CHECK: divb
+
+define i8 @test_urem8(i8 %dividend, i8 %divisor) nounwind {
+entry:
+  %result = urem i8 %dividend, %divisor
+  ret i8 %result
+}
+
+; CHECK: test_urem8:
+; CHECK: movzbw
+; CHECK: divb
+
+define i16 @test_sdiv16(i16 %dividend, i16 %divisor) nounwind {
+entry:
+  %result = sdiv i16 %dividend, %divisor
+  ret i16 %result
+}
+
+; CHECK: test_sdiv16:
+; CHECK: cwtd
+; CHECK: idivw
+
+define i16 @test_srem16(i16 %dividend, i16 %divisor) nounwind {
+entry:
+  %result = srem i16 %dividend, %divisor
+  ret i16 %result
+}
+
+; CHECK: test_srem16:
+; CHECK: cwtd
+; CHECK: idivw
+
+define i16 @test_udiv16(i16 %dividend, i16 %divisor) nounwind {
+entry:
+  %result = udiv i16 %dividend, %divisor
+  ret i16 %result
+}
+
+; CHECK: test_udiv16:
+; CHECK: xorl
+; CHECK: divw
+
+define i16 @test_urem16(i16 %dividend, i16 %divisor) nounwind {
+entry:
+  %result = urem i16 %dividend, %divisor
+  ret i16 %result
+}
+
+; CHECK: test_urem16:
+; CHECK: xorl
+; CHECK: divw
+
+define i32 @test_sdiv32(i32 %dividend, i32 %divisor) nounwind {
+entry:
+  %result = sdiv i32 %dividend, %divisor
+  ret i32 %result
+}
+
+; CHECK: test_sdiv32:
+; CHECK: cltd
+; CHECK: idivl
+
+define i32 @test_srem32(i32 %dividend, i32 %divisor) nounwind {
+entry:
+  %result = srem i32 %dividend, %divisor
+  ret i32 %result
+}
+
+; CHECK: test_srem32:
+; CHECK: cltd
+; CHECK: idivl
+
+define i32 @test_udiv32(i32 %dividend, i32 %divisor) nounwind {
+entry:
+  %result = udiv i32 %dividend, %divisor
+  ret i32 %result
+}
+
+; CHECK: test_udiv32:
+; CHECK: xorl
+; CHECK: divl
+
+define i32 @test_urem32(i32 %dividend, i32 %divisor) nounwind {
+entry:
+  %result = urem i32 %dividend, %divisor
+  ret i32 %result
+}
+
+; CHECK: test_urem32:
+; CHECK: xorl
+; CHECK: divl
diff --git a/test/CodeGen/X86/fast-isel-fneg.ll b/test/CodeGen/X86/fast-isel-fneg.ll
index f42a4a2..67fdad2 100644
--- a/test/CodeGen/X86/fast-isel-fneg.ll
+++ b/test/CodeGen/X86/fast-isel-fneg.ll
@@ -1,5 +1,9 @@
 ; RUN: llc < %s -fast-isel -fast-isel-abort -mtriple=x86_64-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | grep xor | count 2
+; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | FileCheck --check-prefix=SSE2 %s
+
+; SSE2: xor
+; SSE2: xor
+; SSE2-NOT: xor
 
 ; CHECK: doo:
 ; CHECK: xor
diff --git a/test/CodeGen/X86/fast-isel-gv.ll b/test/CodeGen/X86/fast-isel-gv.ll
index cb2464e..de75095 100644
--- a/test/CodeGen/X86/fast-isel-gv.ll
+++ b/test/CodeGen/X86/fast-isel-gv.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -fast-isel | grep "_kill@GOTPCREL(%rip)"
+; RUN: llc < %s -fast-isel | FileCheck %s
+; CHECK: _kill@GOTPCREL(%rip)
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
 @f = global i8 (...)* @kill		; <i8 (...)**> [#uses=1]
diff --git a/test/CodeGen/X86/fast-isel-tailcall.ll b/test/CodeGen/X86/fast-isel-tailcall.ll
index c3e527c..79ff79d4 100644
--- a/test/CodeGen/X86/fast-isel-tailcall.ll
+++ b/test/CodeGen/X86/fast-isel-tailcall.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -fast-isel -tailcallopt -march=x86 | not grep add
+; RUN: llc < %s -fast-isel -tailcallopt -march=x86 | FileCheck %s
+; CHECK-NOT: add
 ; PR4154
 
 ; On x86, -tailcallopt changes the ABI so the caller shouldn't readjust
diff --git a/test/CodeGen/X86/fast-isel-unaligned-store.ll b/test/CodeGen/X86/fast-isel-unaligned-store.ll
new file mode 100644
index 0000000..7ce7f67
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-unaligned-store.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
+
+define i32 @test_store_32(i32* nocapture %addr, i32 %value) {
+entry:
+  store i32 %value, i32* %addr, align 1
+  ret i32 %value
+}
+
+; CHECK: ret
+
+define i16 @test_store_16(i16* nocapture %addr, i16 %value) {
+entry:
+  store i16 %value, i16* %addr, align 1
+  ret i16 %value
+}
+
+; CHECK: ret
diff --git a/test/CodeGen/X86/fastcall-correct-mangling.ll b/test/CodeGen/X86/fastcall-correct-mangling.ll
index 33b18bb..3569d36 100644
--- a/test/CodeGen/X86/fastcall-correct-mangling.ll
+++ b/test/CodeGen/X86/fastcall-correct-mangling.ll
@@ -7,3 +7,8 @@ define x86_fastcallcc void @func(i64 %X, i8 %Y, i8 %G, i16 %Z) {
         ret void
 }
 
+define x86_fastcallcc i32 @"\01DoNotMangle"(i32 %a) {
+; CHECK: DoNotMangle:
+entry:
+  ret i32 %a
+}
diff --git a/test/CodeGen/X86/fastcc-2.ll b/test/CodeGen/X86/fastcc-2.ll
index d044a2a..e11cdd1 100644
--- a/test/CodeGen/X86/fastcc-2.ll
+++ b/test/CodeGen/X86/fastcc-2.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep movsd
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 1
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s
+; CHECK: movsd
+; CHECK-NOT: mov
 
 define i32 @foo() nounwind {
 entry:
diff --git a/test/CodeGen/X86/fastcc-byval.ll b/test/CodeGen/X86/fastcc-byval.ll
index f1204d6..e6828e4 100644
--- a/test/CodeGen/X86/fastcc-byval.ll
+++ b/test/CodeGen/X86/fastcc-byval.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -tailcallopt=false | grep "movl[[:space:]]*8(%esp), %eax" | count 2
+; RUN: llc < %s -tailcallopt=false | FileCheck %s
+; CHECK: movl 8(%esp), %eax 
+; CHECK: movl 8(%esp), %eax 
+; CHECK-NOT: movl 8(%esp), %eax 
+
 ; PR3122
 ; rdar://6400815
 
diff --git a/test/CodeGen/X86/fastcc-sret.ll b/test/CodeGen/X86/fastcc-sret.ll
index d457418..97814db 100644
--- a/test/CodeGen/X86/fastcc-sret.ll
+++ b/test/CodeGen/X86/fastcc-sret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -tailcallopt=false | grep ret | not grep 4
+; RUN: llc < %s -march=x86 -tailcallopt=false | FileCheck %s
 
 	%struct.foo = type { [4 x i32] }
 
@@ -9,6 +9,8 @@ entry:
 	store i32 1, i32* %tmp3, align 8
         ret void
 }
+; CHECK: bar
+; CHECK: ret{{[^4]*$}}
 
 @dst = external global i32
 
@@ -21,3 +23,5 @@ define void @foo() nounwind {
         store i32 %tmp6, i32* @dst
         ret void
 }
+; CHECK: foo
+; CHECK: ret{{[^4]*$}}
diff --git a/test/CodeGen/X86/fastcc3struct.ll b/test/CodeGen/X86/fastcc3struct.ll
index 84f8ef6..98dc2f5 100644
--- a/test/CodeGen/X86/fastcc3struct.ll
+++ b/test/CodeGen/X86/fastcc3struct.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=x86 -o %t
-; RUN: grep "movl	.48, %ecx" %t
-; RUN: grep "movl	.24, %edx" %t
-; RUN: grep "movl	.12, %eax" %t
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK: movl {{.}}12, %eax
+; CHECK: movl {{.}}24, %edx
+; CHECK: movl {{.}}48, %ecx
 
 %0 = type { i32, i32, i32 }
 
diff --git a/test/CodeGen/X86/fold-imm.ll b/test/CodeGen/X86/fold-imm.ll
index f1fcbcf..16e4786 100644
--- a/test/CodeGen/X86/fold-imm.ll
+++ b/test/CodeGen/X86/fold-imm.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 | grep inc
-; RUN: llc < %s -march=x86 | grep add | grep 4
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @test(i32 %X) nounwind {
 entry:
@@ -7,8 +6,16 @@ entry:
 	ret i32 %0
 }
 
+; CHECK: test
+; CHECK: inc
+; CHECK: ret
+
 define i32 @test2(i32 %X) nounwind {
 entry:
 	%0 = add i32 %X, 4
 	ret i32 %0
 }
+
+; CHECK: test2
+; CHECK: {{add.*4.*$}}
+; CHECK: ret
diff --git a/test/CodeGen/X86/fp-elim-and-no-fp-elim.ll b/test/CodeGen/X86/fp-elim-and-no-fp-elim.ll
new file mode 100644
index 0000000..3468a45
--- /dev/null
+++ b/test/CodeGen/X86/fp-elim-and-no-fp-elim.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
+
+define void @bar(i32 %argc) #0 {
+; CHECK: bar:
+; CHECK: pushq %rbp
+entry:
+  %conv = sitofp i32 %argc to double
+  %mul = fmul double %conv, 3.792700e+01
+  %conv1 = fptrunc double %mul to float
+  %div = fdiv double 9.273700e+02, %conv
+  %conv3 = fptrunc double %div to float
+  tail call void @foo(float %conv1, float %conv3)
+  ret void
+}
+
+define void @qux(i32 %argc) #1 {
+; CHECK: qux:
+; CHECK-NOT: pushq %rbp
+entry:
+  %conv = sitofp i32 %argc to double
+  %mul = fmul double %conv, 3.792700e+01
+  %conv1 = fptrunc double %mul to float
+  %div = fdiv double 9.273700e+02, %conv
+  %conv3 = fptrunc double %div to float
+  tail call void @foo(float %conv1, float %conv3)
+  ret void
+}
+
+declare void @foo(float, float)
+
+attributes #0 = { "no-frame-pointer-elim"="true" }
+attributes #1 = { "no-frame-pointer-elim"="false" }
diff --git a/test/CodeGen/X86/fp-immediate-shorten.ll b/test/CodeGen/X86/fp-immediate-shorten.ll
index 62d8100..dc59c5a 100644
--- a/test/CodeGen/X86/fp-immediate-shorten.ll
+++ b/test/CodeGen/X86/fp-immediate-shorten.ll
@@ -1,7 +1,8 @@
 ;; Test that this FP immediate is stored in the constant pool as a float.
 
-; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | \
-; RUN:   grep ".long.1123418112"
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | FileCheck %s
+
+; CHECK: {{.long.1123418112}}
 
 define double @D() {
         ret double 1.230000e+02
diff --git a/test/CodeGen/X86/fp_load_cast_fold.ll b/test/CodeGen/X86/fp_load_cast_fold.ll
index a160ac6..72ea12f 100644
--- a/test/CodeGen/X86/fp_load_cast_fold.ll
+++ b/test/CodeGen/X86/fp_load_cast_fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep fild | not grep ESP
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define double @short(i16* %P) {
         %V = load i16* %P               ; <i16> [#uses=1]
@@ -18,3 +18,9 @@ define double @long(i64* %P) {
         ret double %V2
 }
 
+; CHECK: long
+; CHECK: fild
+; CHECK-NOT: ESP
+; CHECK-NOT: esp
+; CHECK: {{$}}
+; CHECK: ret
diff --git a/test/CodeGen/X86/long-setcc.ll b/test/CodeGen/X86/long-setcc.ll
index e0165fb..13046d8 100644
--- a/test/CodeGen/X86/long-setcc.ll
+++ b/test/CodeGen/X86/long-setcc.ll
@@ -1,18 +1,31 @@
-; RUN: llc < %s -march=x86 | grep cmp | count 1
-; RUN: llc < %s -march=x86 | grep shr | count 1
-; RUN: llc < %s -march=x86 | grep xor | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i1 @t1(i64 %x) nounwind {
 	%B = icmp slt i64 %x, 0
 	ret i1 %B
 }
 
+; CHECK: t1
+; CHECK: shrl
+; CHECK-NOT: shrl
+; CHECK: ret
+
 define i1 @t2(i64 %x) nounwind {
 	%tmp = icmp ult i64 %x, 4294967296
 	ret i1 %tmp
 }
 
+; CHECK: t2
+; CHECK: cmp
+; CHECK-NOT: cmp
+; CHECK: ret
+
 define i1 @t3(i32 %x) nounwind {
 	%tmp = icmp ugt i32 %x, -1
 	ret i1 %tmp
 }
+
+; CHECK: t3
+; CHECK: xor
+; CHECK-NOT: xor
+; CHECK: ret
diff --git a/test/CodeGen/X86/lsr-normalization.ll b/test/CodeGen/X86/lsr-normalization.ll
index 932141d..bbf8f01 100644
--- a/test/CodeGen/X86/lsr-normalization.ll
+++ b/test/CodeGen/X86/lsr-normalization.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep div | count 1
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 ; rdar://8168938
 
 ; This testcase involves SCEV normalization with the exit value from
@@ -6,6 +6,9 @@
 ; loop. The expression should be properly normalized and simplified,
 ; and require only a single division.
 
+; CHECK: div
+; CHECK-NOT: div
+
 %0 = type { %0*, %0* }
 
 @0 = private constant [13 x i8] c"Result: %lu\0A\00" ; <[13 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll
index 6566f56..b2aea90 100644
--- a/test/CodeGen/X86/lsr-static-addr.ll
+++ b/test/CodeGen/X86/lsr-static-addr.ll
@@ -17,7 +17,7 @@
 ; ATOM-NEXT: movsd A(,%rax,8)
 ; ATOM-NEXT: mulsd
 ; ATOM-NEXT: movsd
-; ATOM-NEXT: incq %rax
+; ATOM-NEXT: leaq 1(%rax), %rax
 
 @A = external global [0 x double]
 
diff --git a/test/CodeGen/X86/misched-copy.ll b/test/CodeGen/X86/misched-copy.ll
new file mode 100644
index 0000000..0450cfb
--- /dev/null
+++ b/test/CodeGen/X86/misched-copy.ll
@@ -0,0 +1,49 @@
+; REQUIRES: asserts
+; RUN: llc < %s -march=x86 -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+;
+; Test scheduling of copy instructions.
+;
+; Argument copies should be hoisted to the top of the block.
+; Return copies should be sunk to the end.
+; MUL_HiLo PhysReg use copies should be just above the mul.
+; MUL_HiLo PhysReg def copies should be just below the mul.
+;
+; CHECK:      *** Final schedule for BB#1 ***
+; CHECK-NEXT: %EAX<def> = COPY
+; CHECK:      MUL32r %vreg{{[0-9]+}}, %EAX<imp-def>, %EDX<imp-def>, %EFLAGS<imp-def,dead>, %EAX<imp-use>;
+; CHECK-NEXT: COPY %E{{[AD]}}X;
+; CHECK-NEXT: COPY %E{{[AD]}}X;
+; CHECK:      DIVSSrm
+define i64 @mulhoist(i32 %a, i32 %b) #0 {
+entry:
+  br label %body
+
+body:
+  %convb = sitofp i32 %b to float
+  ; Generates an iMUL64r to legalize types.
+  %aa = zext i32 %a to i64
+  %mul = mul i64 %aa, 74383
+  ; Do some dependent long latency stuff.
+  %trunc = trunc i64 %mul to i32
+  %convm = sitofp i32 %trunc to float
+  %divm = fdiv float %convm, 0.75
+  ;%addmb = fadd float %divm, %convb
+  ;%divmb = fdiv float %addmb, 0.125
+  ; Do some independent long latency stuff.
+  %conva = sitofp i32 %a to float
+  %diva = fdiv float %conva, 0.75
+  %addab = fadd float %diva, %convb
+  %divab = fdiv float %addab, 0.125
+  br label %end
+
+end:
+  %val = fptosi float %divab to i64
+  %add = add i64 %mul, %val
+  ret i64 %add
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/misched-matmul.ll b/test/CodeGen/X86/misched-matmul.ll
index 0f6e442..15e8a0a 100644
--- a/test/CodeGen/X86/misched-matmul.ll
+++ b/test/CodeGen/X86/misched-matmul.ll
@@ -12,86 +12,86 @@
 define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
 entry:
   %arrayidx1.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 0
-  %0 = load double* %arrayidx1.i, align 8, !tbaa !0
+  %0 = load double* %arrayidx1.i, align 8
   %arrayidx3.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 0
-  %1 = load double* %arrayidx3.i, align 8, !tbaa !0
+  %1 = load double* %arrayidx3.i, align 8
   %mul.i = fmul double %0, %1
   %arrayidx5.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 1
-  %2 = load double* %arrayidx5.i, align 8, !tbaa !0
+  %2 = load double* %arrayidx5.i, align 8
   %arrayidx7.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 0
-  %3 = load double* %arrayidx7.i, align 8, !tbaa !0
+  %3 = load double* %arrayidx7.i, align 8
   %mul8.i = fmul double %2, %3
   %add.i = fadd double %mul.i, %mul8.i
   %arrayidx10.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 2
-  %4 = load double* %arrayidx10.i, align 8, !tbaa !0
+  %4 = load double* %arrayidx10.i, align 8
   %arrayidx12.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 0
-  %5 = load double* %arrayidx12.i, align 8, !tbaa !0
+  %5 = load double* %arrayidx12.i, align 8
   %mul13.i = fmul double %4, %5
   %add14.i = fadd double %add.i, %mul13.i
   %arrayidx16.i = getelementptr inbounds [4 x double]* %A, i64 0, i64 3
-  %6 = load double* %arrayidx16.i, align 8, !tbaa !0
+  %6 = load double* %arrayidx16.i, align 8
   %arrayidx18.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 0
-  %7 = load double* %arrayidx18.i, align 8, !tbaa !0
+  %7 = load double* %arrayidx18.i, align 8
   %mul19.i = fmul double %6, %7
   %add20.i = fadd double %add14.i, %mul19.i
   %arrayidx25.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 1
-  %8 = load double* %arrayidx25.i, align 8, !tbaa !0
+  %8 = load double* %arrayidx25.i, align 8
   %mul26.i = fmul double %0, %8
   %arrayidx30.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 1
-  %9 = load double* %arrayidx30.i, align 8, !tbaa !0
+  %9 = load double* %arrayidx30.i, align 8
   %mul31.i = fmul double %2, %9
   %add32.i = fadd double %mul26.i, %mul31.i
   %arrayidx36.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 1
-  %10 = load double* %arrayidx36.i, align 8, !tbaa !0
+  %10 = load double* %arrayidx36.i, align 8
   %mul37.i = fmul double %4, %10
   %add38.i = fadd double %add32.i, %mul37.i
   %arrayidx42.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 1
-  %11 = load double* %arrayidx42.i, align 8, !tbaa !0
+  %11 = load double* %arrayidx42.i, align 8
   %mul43.i = fmul double %6, %11
   %add44.i = fadd double %add38.i, %mul43.i
   %arrayidx49.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 2
-  %12 = load double* %arrayidx49.i, align 8, !tbaa !0
+  %12 = load double* %arrayidx49.i, align 8
   %mul50.i = fmul double %0, %12
   %arrayidx54.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 2
-  %13 = load double* %arrayidx54.i, align 8, !tbaa !0
+  %13 = load double* %arrayidx54.i, align 8
   %mul55.i = fmul double %2, %13
   %add56.i = fadd double %mul50.i, %mul55.i
   %arrayidx60.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 2
-  %14 = load double* %arrayidx60.i, align 8, !tbaa !0
+  %14 = load double* %arrayidx60.i, align 8
   %mul61.i = fmul double %4, %14
   %add62.i = fadd double %add56.i, %mul61.i
   %arrayidx66.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 2
-  %15 = load double* %arrayidx66.i, align 8, !tbaa !0
+  %15 = load double* %arrayidx66.i, align 8
   %mul67.i = fmul double %6, %15
   %add68.i = fadd double %add62.i, %mul67.i
   %arrayidx73.i = getelementptr inbounds [4 x double]* %B, i64 0, i64 3
-  %16 = load double* %arrayidx73.i, align 8, !tbaa !0
+  %16 = load double* %arrayidx73.i, align 8
   %mul74.i = fmul double %0, %16
   %arrayidx78.i = getelementptr inbounds [4 x double]* %B, i64 1, i64 3
-  %17 = load double* %arrayidx78.i, align 8, !tbaa !0
+  %17 = load double* %arrayidx78.i, align 8
   %mul79.i = fmul double %2, %17
   %add80.i = fadd double %mul74.i, %mul79.i
   %arrayidx84.i = getelementptr inbounds [4 x double]* %B, i64 2, i64 3
-  %18 = load double* %arrayidx84.i, align 8, !tbaa !0
+  %18 = load double* %arrayidx84.i, align 8
   %mul85.i = fmul double %4, %18
   %add86.i = fadd double %add80.i, %mul85.i
   %arrayidx90.i = getelementptr inbounds [4 x double]* %B, i64 3, i64 3
-  %19 = load double* %arrayidx90.i, align 8, !tbaa !0
+  %19 = load double* %arrayidx90.i, align 8
   %mul91.i = fmul double %6, %19
   %add92.i = fadd double %add86.i, %mul91.i
   %arrayidx95.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 0
-  %20 = load double* %arrayidx95.i, align 8, !tbaa !0
+  %20 = load double* %arrayidx95.i, align 8
   %mul98.i = fmul double %1, %20
   %arrayidx100.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 1
-  %21 = load double* %arrayidx100.i, align 8, !tbaa !0
+  %21 = load double* %arrayidx100.i, align 8
   %mul103.i = fmul double %3, %21
   %add104.i = fadd double %mul98.i, %mul103.i
   %arrayidx106.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 2
-  %22 = load double* %arrayidx106.i, align 8, !tbaa !0
+  %22 = load double* %arrayidx106.i, align 8
   %mul109.i = fmul double %5, %22
   %add110.i = fadd double %add104.i, %mul109.i
   %arrayidx112.i = getelementptr inbounds [4 x double]* %A, i64 1, i64 3
-  %23 = load double* %arrayidx112.i, align 8, !tbaa !0
+  %23 = load double* %arrayidx112.i, align 8
   %mul115.i = fmul double %7, %23
   %add116.i = fadd double %add110.i, %mul115.i
   %mul122.i = fmul double %8, %20
@@ -116,18 +116,18 @@ entry:
   %mul187.i = fmul double %19, %23
   %add188.i = fadd double %add182.i, %mul187.i
   %arrayidx191.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 0
-  %24 = load double* %arrayidx191.i, align 8, !tbaa !0
+  %24 = load double* %arrayidx191.i, align 8
   %mul194.i = fmul double %1, %24
   %arrayidx196.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 1
-  %25 = load double* %arrayidx196.i, align 8, !tbaa !0
+  %25 = load double* %arrayidx196.i, align 8
   %mul199.i = fmul double %3, %25
   %add200.i = fadd double %mul194.i, %mul199.i
   %arrayidx202.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 2
-  %26 = load double* %arrayidx202.i, align 8, !tbaa !0
+  %26 = load double* %arrayidx202.i, align 8
   %mul205.i = fmul double %5, %26
   %add206.i = fadd double %add200.i, %mul205.i
   %arrayidx208.i = getelementptr inbounds [4 x double]* %A, i64 2, i64 3
-  %27 = load double* %arrayidx208.i, align 8, !tbaa !0
+  %27 = load double* %arrayidx208.i, align 8
   %mul211.i = fmul double %7, %27
   %add212.i = fadd double %add206.i, %mul211.i
   %mul218.i = fmul double %8, %24
@@ -152,18 +152,18 @@ entry:
   %mul283.i = fmul double %19, %27
   %add284.i = fadd double %add278.i, %mul283.i
   %arrayidx287.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 0
-  %28 = load double* %arrayidx287.i, align 8, !tbaa !0
+  %28 = load double* %arrayidx287.i, align 8
   %mul290.i = fmul double %1, %28
   %arrayidx292.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 1
-  %29 = load double* %arrayidx292.i, align 8, !tbaa !0
+  %29 = load double* %arrayidx292.i, align 8
   %mul295.i = fmul double %3, %29
   %add296.i = fadd double %mul290.i, %mul295.i
   %arrayidx298.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 2
-  %30 = load double* %arrayidx298.i, align 8, !tbaa !0
+  %30 = load double* %arrayidx298.i, align 8
   %mul301.i = fmul double %5, %30
   %add302.i = fadd double %add296.i, %mul301.i
   %arrayidx304.i = getelementptr inbounds [4 x double]* %A, i64 3, i64 3
-  %31 = load double* %arrayidx304.i, align 8, !tbaa !0
+  %31 = load double* %arrayidx304.i, align 8
   %mul307.i = fmul double %7, %31
   %add308.i = fadd double %add302.i, %mul307.i
   %mul314.i = fmul double %8, %28
@@ -222,7 +222,3 @@ entry:
 }
 
 attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!0 = metadata !{metadata !"double", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/misched-matrix.ll b/test/CodeGen/X86/misched-matrix.ll
index f5566e5..4dc95c5 100644
--- a/test/CodeGen/X86/misched-matrix.ll
+++ b/test/CodeGen/X86/misched-matrix.ll
@@ -94,57 +94,57 @@ entry:
 for.body:                              ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx8 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 0
-  %tmp = load i32* %arrayidx8, align 4, !tbaa !0
+  %tmp = load i32* %arrayidx8, align 4
   %arrayidx12 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 0
-  %tmp1 = load i32* %arrayidx12, align 4, !tbaa !0
+  %tmp1 = load i32* %arrayidx12, align 4
   %arrayidx8.1 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 1
-  %tmp2 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %tmp2 = load i32* %arrayidx8.1, align 4
   %arrayidx12.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 0
-  %tmp3 = load i32* %arrayidx12.1, align 4, !tbaa !0
+  %tmp3 = load i32* %arrayidx12.1, align 4
   %arrayidx8.2 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 2
-  %tmp4 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %tmp4 = load i32* %arrayidx8.2, align 4
   %arrayidx12.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 0
-  %tmp5 = load i32* %arrayidx12.2, align 4, !tbaa !0
+  %tmp5 = load i32* %arrayidx12.2, align 4
   %arrayidx8.3 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 3
-  %tmp6 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %tmp6 = load i32* %arrayidx8.3, align 4
   %arrayidx12.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 0
-  %tmp8 = load i32* %arrayidx8, align 4, !tbaa !0
+  %tmp8 = load i32* %arrayidx8, align 4
   %arrayidx12.137 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 1
-  %tmp9 = load i32* %arrayidx12.137, align 4, !tbaa !0
-  %tmp10 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %tmp9 = load i32* %arrayidx12.137, align 4
+  %tmp10 = load i32* %arrayidx8.1, align 4
   %arrayidx12.1.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 1
-  %tmp11 = load i32* %arrayidx12.1.1, align 4, !tbaa !0
-  %tmp12 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %tmp11 = load i32* %arrayidx12.1.1, align 4
+  %tmp12 = load i32* %arrayidx8.2, align 4
   %arrayidx12.2.1 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 1
-  %tmp13 = load i32* %arrayidx12.2.1, align 4, !tbaa !0
-  %tmp14 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %tmp13 = load i32* %arrayidx12.2.1, align 4
+  %tmp14 = load i32* %arrayidx8.3, align 4
   %arrayidx12.3.1 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 1
-  %tmp15 = load i32* %arrayidx12.3.1, align 4, !tbaa !0
-  %tmp16 = load i32* %arrayidx8, align 4, !tbaa !0
+  %tmp15 = load i32* %arrayidx12.3.1, align 4
+  %tmp16 = load i32* %arrayidx8, align 4
   %arrayidx12.239 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 2
-  %tmp17 = load i32* %arrayidx12.239, align 4, !tbaa !0
-  %tmp18 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %tmp17 = load i32* %arrayidx12.239, align 4
+  %tmp18 = load i32* %arrayidx8.1, align 4
   %arrayidx12.1.2 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 2
-  %tmp19 = load i32* %arrayidx12.1.2, align 4, !tbaa !0
-  %tmp20 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %tmp19 = load i32* %arrayidx12.1.2, align 4
+  %tmp20 = load i32* %arrayidx8.2, align 4
   %arrayidx12.2.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 2
-  %tmp21 = load i32* %arrayidx12.2.2, align 4, !tbaa !0
-  %tmp22 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %tmp21 = load i32* %arrayidx12.2.2, align 4
+  %tmp22 = load i32* %arrayidx8.3, align 4
   %arrayidx12.3.2 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 2
-  %tmp23 = load i32* %arrayidx12.3.2, align 4, !tbaa !0
-  %tmp24 = load i32* %arrayidx8, align 4, !tbaa !0
+  %tmp23 = load i32* %arrayidx12.3.2, align 4
+  %tmp24 = load i32* %arrayidx8, align 4
   %arrayidx12.341 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 3
-  %tmp25 = load i32* %arrayidx12.341, align 4, !tbaa !0
-  %tmp26 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %tmp25 = load i32* %arrayidx12.341, align 4
+  %tmp26 = load i32* %arrayidx8.1, align 4
   %arrayidx12.1.3 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 3
-  %tmp27 = load i32* %arrayidx12.1.3, align 4, !tbaa !0
-  %tmp28 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %tmp27 = load i32* %arrayidx12.1.3, align 4
+  %tmp28 = load i32* %arrayidx8.2, align 4
   %arrayidx12.2.3 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 3
-  %tmp29 = load i32* %arrayidx12.2.3, align 4, !tbaa !0
-  %tmp30 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %tmp29 = load i32* %arrayidx12.2.3, align 4
+  %tmp30 = load i32* %arrayidx8.3, align 4
   %arrayidx12.3.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 3
-  %tmp31 = load i32* %arrayidx12.3.3, align 4, !tbaa !0
-  %tmp7 = load i32* %arrayidx12.3, align 4, !tbaa !0
+  %tmp31 = load i32* %arrayidx12.3.3, align 4
+  %tmp7 = load i32* %arrayidx12.3, align 4
   %mul = mul nsw i32 %tmp1, %tmp
   %mul.1 = mul nsw i32 %tmp3, %tmp2
   %mul.2 = mul nsw i32 %tmp5, %tmp4
@@ -174,13 +174,13 @@ for.body:                              ; preds = %for.body, %entry
   %add.2.3 = add nsw i32 %mul.2.3, %add.1.3
   %add.3.3 = add nsw i32 %mul.3.3, %add.2.3
   %arrayidx16 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 0
-  store i32 %add.3, i32* %arrayidx16, align 4, !tbaa !0
+  store i32 %add.3, i32* %arrayidx16, align 4
   %arrayidx16.1 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 1
-  store i32 %add.3.1, i32* %arrayidx16.1, align 4, !tbaa !0
+  store i32 %add.3.1, i32* %arrayidx16.1, align 4
   %arrayidx16.2 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 2
-  store i32 %add.3.2, i32* %arrayidx16.2, align 4, !tbaa !0
+  store i32 %add.3.2, i32* %arrayidx16.2, align 4
   %arrayidx16.3 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 3
-  store i32 %add.3.3, i32* %arrayidx16.3, align 4, !tbaa !0
+  store i32 %add.3.3, i32* %arrayidx16.3, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 4
@@ -189,7 +189,3 @@ for.body:                              ; preds = %for.body, %entry
 for.end:                                        ; preds = %for.body
   ret void
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
index d9c7c67..33dd2eb 100644
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ b/test/CodeGen/X86/mmx-pinsrw.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s  -mtriple=x86_64-linux -mcpu=corei7 | grep pinsr
+; RUN: llc < %s  -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
 ; PR2562
 
+; CHECK: pinsr
+
 external global i16		; <i16*>:0 [#uses=1]
 external global <4 x i16>		; <<4 x i16>*>:1 [#uses=2]
 
diff --git a/test/CodeGen/X86/mul-legalize.ll b/test/CodeGen/X86/mul-legalize.ll
index 069737d..339de31 100644
--- a/test/CodeGen/X86/mul-legalize.ll
+++ b/test/CodeGen/X86/mul-legalize.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=x86 | grep 24576
+; RUN: llc < %s -march=x86 | FileCheck %s
 ; PR2135
 
+; CHECK: 24576
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
 @.str = constant [13 x i8] c"c45531m.adb\00\00"		
diff --git a/test/CodeGen/X86/negative_zero.ll b/test/CodeGen/X86/negative_zero.ll
index 29474c2..c8c2cd7 100644
--- a/test/CodeGen/X86/negative_zero.ll
+++ b/test/CodeGen/X86/negative_zero.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | grep fchs
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | FileCheck %s
+
+; CHECK: fchs
 
 
 define double @T() {
diff --git a/test/CodeGen/X86/no-compact-unwind.ll b/test/CodeGen/X86/no-compact-unwind.ll
new file mode 100644
index 0000000..627f7da
--- /dev/null
+++ b/test/CodeGen/X86/no-compact-unwind.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple x86_64-apple-macosx10.8.0 -disable-cfi | FileCheck %s
+
+%"struct.dyld::MappedRanges" = type { [400 x %struct.anon], %"struct.dyld::MappedRanges"* }
+%struct.anon = type { %class.ImageLoader*, i64, i64 }
+%class.ImageLoader = type { i32 (...)**, i8*, i8*, i32, i64, i64, i32, i32, %"struct.ImageLoader::recursive_lock"*, i16, i16, [4 x i8] }
+%"struct.ImageLoader::recursive_lock" = type { i32, i32 }
+
+@G1 = external hidden global %"struct.dyld::MappedRanges", align 8
+
+declare void @OSMemoryBarrier() optsize
+
+; This compact unwind encoding indicates that we could not generate correct
+; compact unwind encodings for this function. This then defaults to using the
+; DWARF EH frame.
+;
+; CHECK: .section __LD,__compact_unwind,regular,debug
+; CHECK: .quad _func
+; CHECK: .long 67108864                ## Compact Unwind Encoding: 0x4000000
+; CHECK: .quad 0                       ## Personality Function
+; CHECK: .quad 0                       ## LSDA
+;
+define void @func(%class.ImageLoader* %image) optsize ssp uwtable {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc10, %entry
+  %p.019 = phi %"struct.dyld::MappedRanges"* [ @G1, %entry ], [ %1, %for.inc10 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.inc, %for.cond1.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
+  %image4 = getelementptr inbounds %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 0, i64 %indvars.iv, i32 0
+  %0 = load %class.ImageLoader** %image4, align 8
+  %cmp5 = icmp eq %class.ImageLoader* %0, %image
+  br i1 %cmp5, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body3
+  tail call void @OSMemoryBarrier() optsize
+  store %class.ImageLoader* null, %class.ImageLoader** %image4, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 400
+  br i1 %exitcond, label %for.inc10, label %for.body3
+
+for.inc10:                                        ; preds = %for.inc
+  %next = getelementptr inbounds %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 1
+  %1 = load %"struct.dyld::MappedRanges"** %next, align 8
+  %cmp = icmp eq %"struct.dyld::MappedRanges"* %1, null
+  br i1 %cmp, label %for.end11, label %for.cond1.preheader
+
+for.end11:                                        ; preds = %for.inc10
+  ret void
+}
diff --git a/test/CodeGen/X86/nosse-error1.ll b/test/CodeGen/X86/nosse-error1.ll
index 16cbb73..cddff3f 100644
--- a/test/CodeGen/X86/nosse-error1.ll
+++ b/test/CodeGen/X86/nosse-error1.ll
@@ -1,7 +1,10 @@
-; RUN: llvm-as < %s > %t1
-; RUN: not llc -march=x86-64 -mattr=-sse < %t1 2> %t2
-; RUN: grep "SSE register return with SSE disabled" %t2
-; RUN: llc -march=x86-64 < %t1 | grep xmm
+; RUN: llc < %s -march=x86-64 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; NOSSE: {{SSE register return with SSE disabled}}
+
+; CHECK: xmm
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
 @f = external global float		; <float*> [#uses=4]
diff --git a/test/CodeGen/X86/nosse-error2.ll b/test/CodeGen/X86/nosse-error2.ll
index 45a5eaf..fc9ba01 100644
--- a/test/CodeGen/X86/nosse-error2.ll
+++ b/test/CodeGen/X86/nosse-error2.ll
@@ -1,7 +1,10 @@
-; RUN: llvm-as < %s > %t1
-; RUN: not llc -march=x86 -mcpu=i686 -mattr=-sse < %t1 2> %t2
-; RUN: grep "SSE register return with SSE disabled" %t2
-; RUN: llc -march=x86 -mcpu=i686 -mattr=+sse < %t1 | grep xmm
+; RUN: llc < %s -march=x86 -mcpu=i686 -mattr=-sse 2>&1 | FileCheck --check-prefix NOSSE %s
+; RUN: llc < %s -march=x86 -mcpu=i686 -mattr=+sse | FileCheck %s
+
+; NOSSE: {{SSE register return with SSE disabled}}
+
+; CHECK: xmm
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-unknown-linux-gnu"
 @f = external global float		; <float*> [#uses=4]
diff --git a/test/CodeGen/X86/optimize-max-2.ll b/test/CodeGen/X86/optimize-max-2.ll
index 8851c5b1a..10ab831 100644
--- a/test/CodeGen/X86/optimize-max-2.ll
+++ b/test/CodeGen/X86/optimize-max-2.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep cmov %t | count 2
-; RUN: grep jne %t | count 1
+; RUN: llc < %s -march=x86-64 | grep cmov | count 2
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK: jne
+; CHECK-NOT: jne
 
 ; LSR's OptimizeMax function shouldn't try to eliminate this max, because
 ; it has three operands.
diff --git a/test/CodeGen/X86/peep-test-2.ll b/test/CodeGen/X86/peep-test-2.ll
index 2745172..e4bafbb 100644
--- a/test/CodeGen/X86/peep-test-2.ll
+++ b/test/CodeGen/X86/peep-test-2.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86 | grep testl
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK: testl
 
 ; It's tempting to eliminate the testl instruction here and just use the
 ; EFLAGS value from the incl, however it can't be known whether the add
diff --git a/test/CodeGen/X86/phys_subreg_coalesce.ll b/test/CodeGen/X86/phys_subreg_coalesce.ll
index 2c855ce..8b2f61e 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+sse2 | not grep movl
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+sse2 | FileCheck %s
+
+; CHECK-NOT: movl
 
 	%struct.dpoint = type { double, double }
 
diff --git a/test/CodeGen/X86/pr12889.ll b/test/CodeGen/X86/pr12889.ll
index 331d8f9..428e9b7 100644
--- a/test/CodeGen/X86/pr12889.ll
+++ b/test/CodeGen/X86/pr12889.ll
@@ -6,13 +6,10 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @func() nounwind uwtable {
 entry:
-  %0 = load i8* @c0, align 1, !tbaa !0
+  %0 = load i8* @c0, align 1
   %tobool = icmp ne i8 %0, 0
   %conv = zext i1 %tobool to i8
   %storemerge = shl nuw nsw i8 %conv, %conv
   store i8 %storemerge, i8* @c0, align 1
   ret void
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/pr2656.ll b/test/CodeGen/X86/pr2656.ll
index f0e31f7..1122d2d 100644
--- a/test/CodeGen/X86/pr2656.ll
+++ b/test/CodeGen/X86/pr2656.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep "xorps.*sp" | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 ; PR2656
 
+; CHECK:     {{xorps.*sp}}
+; CHECK-NOT: {{xorps.*sp}}
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9.4.0"
 	%struct.anon = type <{ float, float }>
diff --git a/test/CodeGen/X86/private-2.ll b/test/CodeGen/X86/private-2.ll
index 8aa744e..4413cee 100644
--- a/test/CodeGen/X86/private-2.ll
+++ b/test/CodeGen/X86/private-2.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | grep L__ZZ20
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
 ; Quote should be outside of private prefix.
 ; rdar://6855766x
 
+; CHECK: L__ZZ20
+
 	%struct.A = type { i32*, i32 }
 @"_ZZ20-[Example1 whatever]E4C.91" = private constant %struct.A { i32* null, i32 1 }		; <%struct.A*> [#uses=1]
 
diff --git a/test/CodeGen/X86/rd-mod-wr-eflags.ll b/test/CodeGen/X86/rd-mod-wr-eflags.ll
index 8ef9b5d..0bf601b 100644
--- a/test/CodeGen/X86/rd-mod-wr-eflags.ll
+++ b/test/CodeGen/X86/rd-mod-wr-eflags.ll
@@ -8,9 +8,9 @@ entry:
 ; CHECK: decq	(%{{rdi|rcx}})
 ; CHECK-NEXT: je
   %refcnt = getelementptr inbounds %struct.obj* %o, i64 0, i32 0
-  %0 = load i64* %refcnt, align 8, !tbaa !0
+  %0 = load i64* %refcnt, align 8
   %dec = add i64 %0, -1
-  store i64 %dec, i64* %refcnt, align 8, !tbaa !0
+  store i64 %dec, i64* %refcnt, align 8
   %tobool = icmp eq i64 %dec, 0
   br i1 %tobool, label %if.end, label %return
 
@@ -33,12 +33,12 @@ define i32 @test() nounwind uwtable ssp {
 entry:
 ; CHECK: decq
 ; CHECK-NOT: decq
-%0 = load i64* @c, align 8, !tbaa !0
+%0 = load i64* @c, align 8
 %dec.i = add nsw i64 %0, -1
-store i64 %dec.i, i64* @c, align 8, !tbaa !0
+store i64 %dec.i, i64* @c, align 8
 %tobool.i = icmp ne i64 %dec.i, 0
 %lor.ext.i = zext i1 %tobool.i to i32
-store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+store i32 %lor.ext.i, i32* @a, align 4
 %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
 ret i32 0
 }
@@ -47,12 +47,12 @@ ret i32 0
 define i32 @test2() nounwind uwtable ssp {
 entry:
 ; CHECK-NOT: decq ({{.*}})
-%0 = load i64* @c, align 8, !tbaa !0
+%0 = load i64* @c, align 8
 %dec.i = add nsw i64 %0, -1
-store i64 %dec.i, i64* @c, align 8, !tbaa !0
+store i64 %dec.i, i64* @c, align 8
 %tobool.i = icmp ne i64 %0, 0
 %lor.ext.i = zext i1 %tobool.i to i32
-store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+store i32 %lor.ext.i, i32* @a, align 4
 %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
 ret i32 0
 }
@@ -61,11 +61,6 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare void @free(i8* nocapture) nounwind
 
-!0 = metadata !{metadata !"long", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"int", metadata !1}
-
 %struct.obj2 = type { i64, i32, i16, i8 }
 
 declare void @other(%struct.obj2* ) nounwind;
diff --git a/test/CodeGen/X86/select-with-and-or.ll b/test/CodeGen/X86/select-with-and-or.ll
new file mode 100644
index 0000000..1ccf30b
--- /dev/null
+++ b/test/CodeGen/X86/select-with-and-or.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -O3 | \
+; RUN:	llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i32> @test1(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> %c, <4 x i32> zeroinitializer
+  ret <4 x i32> %r
+; CHECK: test1
+; CHECK: cmpnle
+; CHECK-NEXT: andps
+; CHECK: ret
+}
+
+define <4 x i32> @test2(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c
+  ret <4 x i32> %r
+; CHECK: test2
+; CHECK: cmpnle
+; CHECK-NEXT: orps
+; CHECK: ret
+}
+
+define <4 x i32> @test3(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> zeroinitializer, <4 x i32> %c
+  ret <4 x i32> %r
+; CHECK: test3
+; CHECK: cmple
+; CHECK-NEXT: andps
+; CHECK: ret
+}
+
+define <4 x i32> @test4(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %r
+; CHECK: test4
+; CHECK: cmple
+; CHECK-NEXT: orps
+; CHECK: ret
+}
+
+define <4 x i32> @test5(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer
+  ret <4 x i32> %r
+; CHECK: test5
+; CHECK: cmpnle
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test6(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %r
+; CHECK: test6
+; CHECK: cmple
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test7(<4 x float> %a, <4 x float> %b, <4 x i32>* %p) {
+  %f = fcmp ult <4 x float> %a, %b
+  %s = sext <4 x i1> %f to <4 x i32>
+  %l = load <4 x i32>* %p
+  %r = and <4 x i32> %l, %s
+  ret <4 x i32> %r
+; CHECK: test7
+; CHECK: cmpnle
+; CHECK-NEXT: andps
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/sincos-opt.ll b/test/CodeGen/X86/sincos-opt.ll
index f364d1f..333c466 100644
--- a/test/CodeGen/X86/sincos-opt.ll
+++ b/test/CodeGen/X86/sincos-opt.ll
@@ -4,6 +4,7 @@
 
 ; Combine sin / cos into a single call.
 ; rdar://13087969
+; rdar://13599493
 
 define float @test1(float %x) nounwind {
 entry:
@@ -14,7 +15,8 @@ entry:
 
 ; OSX_SINCOS: test1:
 ; OSX_SINCOS: callq ___sincosf_stret
-; OSX_SINCOS: addss %xmm1, %xmm0
+; OSX_SINCOS: pshufd $1, %xmm0, %xmm1
+; OSX_SINCOS: addss %xmm0, %xmm1
 
 ; OSX_NOOPT: test1
 ; OSX_NOOPT: callq _cosf
diff --git a/test/CodeGen/X86/stdcall.ll b/test/CodeGen/X86/stdcall.ll
index a7c2517..73826ed 100644
--- a/test/CodeGen/X86/stdcall.ll
+++ b/test/CodeGen/X86/stdcall.ll
@@ -1,16 +1,24 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mtriple="i386-pc-mingw32" < %s | FileCheck %s
 ; PR5851
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-pc-mingw32"
-
 %0 = type { void (...)* }
 
-@B = global %0 { void (...)* bitcast (void ()* @MyFunc to void (...)*) }, align 4
-; CHECK: _B:
-; CHECK: .long _MyFunc@0
-
 define internal x86_stdcallcc void @MyFunc() nounwind {
 entry:
+; CHECK: MyFunc@0:
+; CHECK: ret
   ret void
 }
+
+; PR14410
+define x86_stdcallcc i32 @"\01DoNotMangle"(i32 %a) {
+; CHECK: DoNotMangle:
+; CHECK: ret $4
+entry:
+  ret i32 %a
+}
+
+@B = global %0 { void (...)* bitcast (void ()* @MyFunc to void (...)*) }, align 4
+; CHECK: _B:
+; CHECK: .long _MyFunc@0
+
diff --git a/test/CodeGen/X86/store-fp-constant.ll b/test/CodeGen/X86/store-fp-constant.ll
index 206886b..71df8d3 100644
--- a/test/CodeGen/X86/store-fp-constant.ll
+++ b/test/CodeGen/X86/store-fp-constant.ll
@@ -1,5 +1,8 @@
-; RUN: llc < %s -march=x86 | not grep rodata
-; RUN: llc < %s -march=x86 | not grep literal
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK-NOT: rodata
+; CHECK-NOT: literal
+
 ;
 ; Check that no FP constants in this testcase ends up in the 
 ; constant pool.
diff --git a/test/CodeGen/X86/subreg-to-reg-1.ll b/test/CodeGen/X86/subreg-to-reg-1.ll
index 4f31ab5..2931bab 100644
--- a/test/CodeGen/X86/subreg-to-reg-1.ll
+++ b/test/CodeGen/X86/subreg-to-reg-1.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=x86-64 | grep "leal	.*), %e.*" | count 1
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK:     {{leal	.*[)], %e.*}}
+; CHECK-NOT: {{leal	.*[)], %e.*}}
 
 ; Don't eliminate or coalesce away the explicit zero-extension!
 ; This is currently using an leal because of a 3-addressification detail,
diff --git a/test/CodeGen/X86/subreg-to-reg-3.ll b/test/CodeGen/X86/subreg-to-reg-3.ll
index 931ae75..80ab1a2 100644
--- a/test/CodeGen/X86/subreg-to-reg-3.ll
+++ b/test/CodeGen/X86/subreg-to-reg-3.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86-64 | grep imull
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK: imull
 
 ; Don't eliminate or coalesce away the explicit zero-extension!
 
diff --git a/test/CodeGen/X86/subtarget-feature-change.ll b/test/CodeGen/X86/subtarget-feature-change.ll
index cd67729..04d4a71 100644
--- a/test/CodeGen/X86/subtarget-feature-change.ll
+++ b/test/CodeGen/X86/subtarget-feature-change.ll
@@ -14,12 +14,12 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %1 = load float* %arrayidx2, align 4
   %mul = fmul float %0, %1
   %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
-  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  store float %mul, float* %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -43,12 +43,12 @@ entry:
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float* %c, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %1 = load float* %arrayidx2, align 4
   %mul = fmul float %0, %1
   %arrayidx4 = getelementptr inbounds float* %a, i64 %indvars.iv
-  store float %mul, float* %arrayidx4, align 4, !tbaa !0
+  store float %mul, float* %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -60,7 +60,3 @@ for.end:
 
 attributes #0 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,-sse,-avx,-sse41,-ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,-sse2,-sse3" }
 attributes #1 = { nounwind optsize ssp uwtable "target-cpu"="core2" "target-features"="-sse4a,-avx2,-xop,-fma4,-bmi2,-3dnow,-3dnowa,-pclmul,+sse,-avx,-sse41,+ssse3,+mmx,-rtm,-sse42,-lzcnt,-f16c,-popcnt,-bmi,-aes,-fma,-rdrand,+sse2,+sse3" }
-
-!0 = metadata !{metadata !"float", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/switch-crit-edge-constant.ll b/test/CodeGen/X86/switch-crit-edge-constant.ll
index 1f2ab0d..18f987e 100644
--- a/test/CodeGen/X86/switch-crit-edge-constant.ll
+++ b/test/CodeGen/X86/switch-crit-edge-constant.ll
@@ -1,6 +1,8 @@
 ; PR925
-; RUN: llc < %s -march=x86 | \
-; RUN:   grep mov.*str1 | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK:      {{mov.*str1}}
+; CHECK-NOT:  {{mov.*str1}}
 
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin8.7.2"
diff --git a/test/CodeGen/X86/tailcall-64.ll b/test/CodeGen/X86/tailcall-64.ll
index ecc253b..60fe776 100644
--- a/test/CodeGen/X86/tailcall-64.ll
+++ b/test/CodeGen/X86/tailcall-64.ll
@@ -50,9 +50,18 @@ define {i64, i64} @test_pair_trivial() {
 ; CHECK: test_pair_trivial:
 ; CHECK: jmp	_testp                  ## TAILCALL
 
+define {i64, i64} @test_pair_notail() {
+  %A = tail call i64 @testi()
+
+  %b = insertvalue {i64, i64} undef, i64 %A, 0
+  %c = insertvalue {i64, i64} %b, i64 %A, 1
 
+  ret { i64, i64} %c
+}
+; CHECK: test_pair_notail:
+; CHECK-NOT: jmp	_testi
 
-define {i64, i64} @test_pair_trivial_extract() {
+define {i64, i64} @test_pair_extract_trivial() {
   %A = tail call { i64, i64} @testp()
   %x = extractvalue { i64, i64} %A, 0
   %y = extractvalue { i64, i64} %A, 1
@@ -63,10 +72,24 @@ define {i64, i64} @test_pair_trivial_extract() {
   ret { i64, i64} %c
 }
 
-; CHECK: test_pair_trivial_extract:
+; CHECK: test_pair_extract_trivial:
 ; CHECK: jmp	_testp                  ## TAILCALL
 
-define {i8*, i64} @test_pair_conv_extract() {
+define {i64, i64} @test_pair_extract_notail() {
+  %A = tail call { i64, i64} @testp()
+  %x = extractvalue { i64, i64} %A, 0
+  %y = extractvalue { i64, i64} %A, 1
+  
+  %b = insertvalue {i64, i64} undef, i64 %y, 0
+  %c = insertvalue {i64, i64} %b, i64 %x, 1
+  
+  ret { i64, i64} %c
+}
+
+; CHECK: test_pair_extract_notail:
+; CHECK-NOT: jmp	_testp
+
+define {i8*, i64} @test_pair_extract_conv() {
   %A = tail call { i64, i64} @testp()
   %x = extractvalue { i64, i64} %A, 0
   %y = extractvalue { i64, i64} %A, 1
@@ -79,10 +102,75 @@ define {i8*, i64} @test_pair_conv_extract() {
   ret { i8*, i64} %c
 }
 
-; CHECK: test_pair_conv_extract:
+; CHECK: test_pair_extract_conv:
+; CHECK: jmp	_testp                  ## TAILCALL
+
+define {i64, i64} @test_pair_extract_multiple() {
+  %A = tail call { i64, i64} @testp()
+  %x = extractvalue { i64, i64} %A, 0
+  %y = extractvalue { i64, i64} %A, 1
+  
+  %b = insertvalue {i64, i64} undef, i64 %x, 0
+  %c = insertvalue {i64, i64} %b, i64 %y, 1
+
+  %x1 = extractvalue { i64, i64} %b, 0
+  %y1 = extractvalue { i64, i64} %c, 1
+
+  %d = insertvalue {i64, i64} undef, i64 %x1, 0
+  %e = insertvalue {i64, i64} %b, i64 %y1, 1
+  
+  ret { i64, i64} %e
+}
+
+; CHECK: test_pair_extract_multiple:
+; CHECK: jmp	_testp                  ## TAILCALL
+
+define {i64, i64} @test_pair_extract_undef() {
+  %A = tail call { i64, i64} @testp()
+  %x = extractvalue { i64, i64} %A, 0
+  
+  %b = insertvalue {i64, i64} undef, i64 %x, 0
+  
+  ret { i64, i64} %b
+}
+
+; CHECK: test_pair_extract_undef:
 ; CHECK: jmp	_testp                  ## TAILCALL
 
+declare { i64, { i32, i32 } } @testn()
+
+define {i64, {i32, i32}} @test_nest() {
+  %A = tail call { i64, { i32, i32 } } @testn()
+  %x = extractvalue { i64, { i32, i32}} %A, 0
+  %y = extractvalue { i64, { i32, i32}} %A, 1
+  %y1 = extractvalue { i32, i32} %y, 0
+  %y2 = extractvalue { i32, i32} %y, 1
+  
+  %b = insertvalue {i64, {i32, i32}} undef, i64 %x, 0
+  %c1 = insertvalue {i32, i32} undef, i32 %y1, 0
+  %c2 = insertvalue {i32, i32} %c1, i32 %y2, 1
+  %c = insertvalue {i64, {i32, i32}} %b, {i32, i32} %c2, 1
+ 
+  ret { i64, { i32, i32}} %c
+}
+
+; CHECK: test_nest:
+; CHECK: jmp	_testn                  ## TAILCALL
+
+%struct.A = type { i32 }
+%struct.B = type { %struct.A, i32 }
+
+declare %struct.B* @testu()
+
+define %struct.A* @test_upcast() {
+entry:
+  %A = tail call %struct.B* @testu()
+  %x = getelementptr inbounds %struct.B* %A, i32 0, i32 0
+  ret %struct.A* %x
+}
 
+; CHECK: test_upcast:
+; CHECK: jmp	_testu                  ## TAILCALL
 
 ; PR13006
 define { i64, i64 } @crash(i8* %this) {
diff --git a/test/CodeGen/X86/this-return-64.ll b/test/CodeGen/X86/this-return-64.ll
new file mode 100644
index 0000000..2b26a89
--- /dev/null
+++ b/test/CodeGen/X86/this-return-64.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+%struct.A = type { i8 }
+%struct.B = type { i32 }
+%struct.C = type { %struct.B }
+%struct.D = type { %struct.B }
+%struct.E = type { %struct.B }
+
+declare %struct.A* @A_ctor(%struct.A* returned)
+declare %struct.B* @B_ctor(%struct.B* returned, i32)
+
+declare %struct.A* @A_ctor_nothisret(%struct.A*)
+declare %struct.B* @B_ctor_nothisret(%struct.B*, i32)
+
+define %struct.C* @C_ctor(%struct.C* %this, i32 %y) {
+entry:
+; CHECK: C_ctor:
+; CHECK: jmp     B_ctor                  # TAILCALL
+  %0 = getelementptr inbounds %struct.C* %this, i64 0, i32 0
+  %call = tail call %struct.B* @B_ctor(%struct.B* %0, i32 %y)
+  ret %struct.C* %this
+}
+
+define %struct.C* @C_ctor_nothisret(%struct.C* %this, i32 %y) {
+entry:
+; CHECK: C_ctor_nothisret:
+; CHECK-NOT: jmp     B_ctor_nothisret
+  %0 = getelementptr inbounds %struct.C* %this, i64 0, i32 0
+  %call = tail call %struct.B* @B_ctor_nothisret(%struct.B* %0, i32 %y)
+  ret %struct.C* %this
+}
+
+define %struct.D* @D_ctor(%struct.D* %this, i32 %y) {
+entry:
+; CHECK: D_ctor:
+; CHECK: movq    %rcx, [[SAVETHIS:%r[0-9a-z]+]]
+; CHECK: callq   A_ctor
+; CHECK: movq    [[SAVETHIS]], %rcx
+; CHECK: jmp     B_ctor                  # TAILCALL
+  %0 = bitcast %struct.D* %this to %struct.A*
+  %call = tail call %struct.A* @A_ctor(%struct.A* %0)
+  %1 = getelementptr inbounds %struct.D* %this, i64 0, i32 0
+  %call2 = tail call %struct.B* @B_ctor(%struct.B* %1, i32 %y)
+; (this next line would never be generated by Clang, actually)
+  %2 = bitcast %struct.A* %call to %struct.D*
+  ret %struct.D* %2
+}
+
+define %struct.D* @D_ctor_nothisret(%struct.D* %this, i32 %y) {
+entry:
+; CHECK: D_ctor_nothisret:
+; CHECK: movq    %rcx, [[SAVETHIS:%r[0-9a-z]+]]
+; CHECK: callq   A_ctor_nothisret
+; CHECK: movq    [[SAVETHIS]], %rcx
+; CHECK-NOT: jmp     B_ctor_nothisret
+  %0 = bitcast %struct.D* %this to %struct.A*
+  %call = tail call %struct.A* @A_ctor_nothisret(%struct.A* %0)
+  %1 = getelementptr inbounds %struct.D* %this, i64 0, i32 0
+  %call2 = tail call %struct.B* @B_ctor_nothisret(%struct.B* %1, i32 %y)
+; (this next line would never be generated by Clang, actually)
+  %2 = bitcast %struct.A* %call to %struct.D*
+  ret %struct.D* %2
+}
+
+define %struct.E* @E_ctor(%struct.E* %this, i32 %x) {
+entry:
+; CHECK: E_ctor:
+; CHECK: movq    %rcx, [[SAVETHIS:%r[0-9a-z]+]]
+; CHECK: callq   B_ctor
+; CHECK: movq    [[SAVETHIS]], %rcx
+; CHECK: jmp     B_ctor                  # TAILCALL
+  %b = getelementptr inbounds %struct.E* %this, i64 0, i32 0
+  %call = tail call %struct.B* @B_ctor(%struct.B* %b, i32 %x)
+  %call4 = tail call %struct.B* @B_ctor(%struct.B* %b, i32 %x)
+  ret %struct.E* %this
+}
+
+define %struct.E* @E_ctor_nothisret(%struct.E* %this, i32 %x) {
+entry:
+; CHECK: E_ctor_nothisret:
+; CHECK: movq    %rcx, [[SAVETHIS:%r[0-9a-z]+]]
+; CHECK: callq   B_ctor_nothisret
+; CHECK: movq    [[SAVETHIS]], %rcx
+; CHECK-NOT: jmp     B_ctor_nothisret
+  %b = getelementptr inbounds %struct.E* %this, i64 0, i32 0
+  %call = tail call %struct.B* @B_ctor_nothisret(%struct.B* %b, i32 %x)
+  %call4 = tail call %struct.B* @B_ctor_nothisret(%struct.B* %b, i32 %x)
+  ret %struct.E* %this
+}
diff --git a/test/CodeGen/X86/unwindraise.ll b/test/CodeGen/X86/unwindraise.ll
index a438723..9bbe980 100644
--- a/test/CodeGen/X86/unwindraise.ll
+++ b/test/CodeGen/X86/unwindraise.ll
@@ -50,12 +50,12 @@ while.body:                                       ; preds = %uw_update_context.e
   ]
 
 if.end3:                                          ; preds = %while.body
-  %4 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality, align 8, !tbaa !0
+  %4 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality, align 8
   %tobool = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %4, null
   br i1 %tobool, label %if.end13, label %if.then4
 
 if.then4:                                         ; preds = %if.end3
-  %5 = load i64* %exception_class, align 8, !tbaa !3
+  %5 = load i64* %exception_class, align 8
   %call6 = call i32 %4(i32 1, i32 1, i64 %5, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context)
   switch i32 %call6, label %do.end21.loopexit46 [
     i32 6, label %while.end
@@ -64,7 +64,7 @@ if.then4:                                         ; preds = %if.end3
 
 if.end13:                                         ; preds = %if.then4, %if.end3
   call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs)
-  %6 = load i64* %retaddr_column.i, align 8, !tbaa !3
+  %6 = load i64* %retaddr_column.i, align 8
   %conv.i = trunc i64 %6 to i32
   %cmp.i.i.i = icmp slt i32 %conv.i, 18
   br i1 %cmp.i.i.i, label %cond.end.i.i.i, label %cond.true.i.i.i
@@ -77,17 +77,17 @@ cond.end.i.i.i:                                   ; preds = %if.end13
   %sext.i = shl i64 %6, 32
   %idxprom.i.i.i = ashr exact i64 %sext.i, 32
   %arrayidx.i.i.i = getelementptr inbounds [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i
-  %7 = load i8* %arrayidx.i.i.i, align 1, !tbaa !1
+  %7 = load i8* %arrayidx.i.i.i, align 1
   %arrayidx2.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i
-  %8 = load i8** %arrayidx2.i.i.i, align 8, !tbaa !0
-  %9 = load i64* %flags.i.i.i.i, align 8, !tbaa !3
+  %8 = load i8** %arrayidx2.i.i.i, align 8
+  %9 = load i64* %flags.i.i.i.i, align 8
   %and.i.i.i.i = and i64 %9, 4611686018427387904
   %tobool.i.i.i = icmp eq i64 %and.i.i.i.i, 0
   br i1 %tobool.i.i.i, label %if.end.i.i.i, label %land.lhs.true.i.i.i
 
 land.lhs.true.i.i.i:                              ; preds = %cond.end.i.i.i
   %arrayidx4.i.i.i = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i
-  %10 = load i8* %arrayidx4.i.i.i, align 1, !tbaa !1
+  %10 = load i8* %arrayidx4.i.i.i, align 1
   %tobool6.i.i.i = icmp eq i8 %10, 0
   br i1 %tobool6.i.i.i, label %if.end.i.i.i, label %if.then.i.i.i
 
@@ -101,7 +101,7 @@ if.end.i.i.i:                                     ; preds = %land.lhs.true.i.i.i
 
 if.then10.i.i.i:                                  ; preds = %if.end.i.i.i
   %12 = bitcast i8* %8 to i64*
-  %13 = load i64* %12, align 8, !tbaa !3
+  %13 = load i64* %12, align 8
   br label %uw_update_context.exit
 
 cond.true14.i.i.i:                                ; preds = %if.end.i.i.i
@@ -111,16 +111,16 @@ cond.true14.i.i.i:                                ; preds = %if.end.i.i.i
 uw_update_context.exit:                           ; preds = %if.then10.i.i.i, %if.then.i.i.i
   %retval.0.i.i.i = phi i64 [ %11, %if.then.i.i.i ], [ %13, %if.then10.i.i.i ]
   %14 = inttoptr i64 %retval.0.i.i.i to i8*
-  store i8* %14, i8** %ra.i, align 8, !tbaa !0
+  store i8* %14, i8** %ra.i, align 8
   br label %while.body
 
 while.end:                                        ; preds = %if.then4
   %private_1 = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 2
-  store i64 0, i64* %private_1, align 8, !tbaa !3
-  %15 = load i8** %ra.i, align 8, !tbaa !0
+  store i64 0, i64* %private_1, align 8
+  %15 = load i8** %ra.i, align 8
   %16 = ptrtoint i8* %15 to i64
   %private_2 = getelementptr inbounds %struct._Unwind_Exception* %exc, i64 0, i32 3
-  store i64 %16, i64* %private_2, align 8, !tbaa !3
+  store i64 %16, i64* %private_2, align 8
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 240, i32 8, i1 false)
   %17 = bitcast %struct._Unwind_FrameState* %fs.i to i8*
   call void @llvm.lifetime.start(i64 -1, i8* %17)
@@ -130,21 +130,21 @@ while.end:                                        ; preds = %if.then4
 
 while.body.i:                                     ; preds = %uw_update_context.exit44, %while.end
   %call.i = call fastcc i32 @uw_frame_state_for(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i)
-  %18 = load i8** %ra.i, align 8, !tbaa !0
+  %18 = load i8** %ra.i, align 8
   %19 = ptrtoint i8* %18 to i64
-  %20 = load i64* %private_2, align 8, !tbaa !3
+  %20 = load i64* %private_2, align 8
   %cmp.i = icmp eq i64 %19, %20
   %cmp2.i = icmp eq i32 %call.i, 0
   br i1 %cmp2.i, label %if.end.i, label %do.end21
 
 if.end.i:                                         ; preds = %while.body.i
-  %21 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality.i, align 8, !tbaa !0
+  %21 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality.i, align 8
   %tobool.i = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %21, null
   br i1 %tobool.i, label %if.end12.i, label %if.then3.i
 
 if.then3.i:                                       ; preds = %if.end.i
   %or.i = select i1 %cmp.i, i32 6, i32 2
-  %22 = load i64* %exception_class, align 8, !tbaa !3
+  %22 = load i64* %exception_class, align 8
   %call5.i = call i32 %21(i32 1, i32 %or.i, i64 %22, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context)
   switch i32 %call5.i, label %do.end21 [
     i32 7, label %do.body19
@@ -160,7 +160,7 @@ cond.true.i:                                      ; preds = %if.end12.i
 
 cond.end.i:                                       ; preds = %if.end12.i
   call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i)
-  %23 = load i64* %retaddr_column.i22, align 8, !tbaa !3
+  %23 = load i64* %retaddr_column.i22, align 8
   %conv.i23 = trunc i64 %23 to i32
   %cmp.i.i.i24 = icmp slt i32 %conv.i23, 18
   br i1 %cmp.i.i.i24, label %cond.end.i.i.i33, label %cond.true.i.i.i25
@@ -173,17 +173,17 @@ cond.end.i.i.i33:                                 ; preds = %cond.end.i
   %sext.i26 = shl i64 %23, 32
   %idxprom.i.i.i27 = ashr exact i64 %sext.i26, 32
   %arrayidx.i.i.i28 = getelementptr inbounds [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i27
-  %24 = load i8* %arrayidx.i.i.i28, align 1, !tbaa !1
+  %24 = load i8* %arrayidx.i.i.i28, align 1
   %arrayidx2.i.i.i29 = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i27
-  %25 = load i8** %arrayidx2.i.i.i29, align 8, !tbaa !0
-  %26 = load i64* %flags.i.i.i.i, align 8, !tbaa !3
+  %25 = load i8** %arrayidx2.i.i.i29, align 8
+  %26 = load i64* %flags.i.i.i.i, align 8
   %and.i.i.i.i31 = and i64 %26, 4611686018427387904
   %tobool.i.i.i32 = icmp eq i64 %and.i.i.i.i31, 0
   br i1 %tobool.i.i.i32, label %if.end.i.i.i39, label %land.lhs.true.i.i.i36
 
 land.lhs.true.i.i.i36:                            ; preds = %cond.end.i.i.i33
   %arrayidx4.i.i.i34 = getelementptr inbounds %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i27
-  %27 = load i8* %arrayidx4.i.i.i34, align 1, !tbaa !1
+  %27 = load i8* %arrayidx4.i.i.i34, align 1
   %tobool6.i.i.i35 = icmp eq i8 %27, 0
   br i1 %tobool6.i.i.i35, label %if.end.i.i.i39, label %if.then.i.i.i37
 
@@ -197,7 +197,7 @@ if.end.i.i.i39:                                   ; preds = %land.lhs.true.i.i.i
 
 if.then10.i.i.i40:                                ; preds = %if.end.i.i.i39
   %29 = bitcast i8* %25 to i64*
-  %30 = load i64* %29, align 8, !tbaa !3
+  %30 = load i64* %29, align 8
   br label %uw_update_context.exit44
 
 cond.true14.i.i.i41:                              ; preds = %if.end.i.i.i39
@@ -207,13 +207,13 @@ cond.true14.i.i.i41:                              ; preds = %if.end.i.i.i39
 uw_update_context.exit44:                         ; preds = %if.then10.i.i.i40, %if.then.i.i.i37
   %retval.0.i.i.i42 = phi i64 [ %28, %if.then.i.i.i37 ], [ %30, %if.then10.i.i.i40 ]
   %31 = inttoptr i64 %retval.0.i.i.i42 to i8*
-  store i8* %31, i8** %ra.i, align 8, !tbaa !0
+  store i8* %31, i8** %ra.i, align 8
   br label %while.body.i
 
 do.body19:                                        ; preds = %if.then3.i
   call void @llvm.lifetime.end(i64 -1, i8* %17)
   %call20 = call fastcc i64 @uw_install_context_1(%struct._Unwind_Context* %this_context, %struct._Unwind_Context* %cur_context)
-  %32 = load i8** %ra.i, align 8, !tbaa !0
+  %32 = load i8** %ra.i, align 8
   call void @llvm.eh.return.i64(i64 %call20, i8* %32)
   unreachable
 
@@ -245,8 +245,3 @@ declare fastcc void @uw_update_context_1(%struct._Unwind_Context*, %struct._Unwi
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"long", metadata !1}
diff --git a/test/CodeGen/X86/v4f32-immediate.ll b/test/CodeGen/X86/v4f32-immediate.ll
index b5ebaa7..68d20a0 100644
--- a/test/CodeGen/X86/v4f32-immediate.ll
+++ b/test/CodeGen/X86/v4f32-immediate.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse | grep movaps
+; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+
+; CHECK: movaps
 
 define <4 x float> @foo() {
   ret <4 x float> <float 0x4009C9D0A0000000, float 0x4002666660000000, float 0x3FF3333340000000, float 0x3FB99999A0000000>
diff --git a/test/CodeGen/X86/vararg_tailcall.ll b/test/CodeGen/X86/vararg_tailcall.ll
index 73d80eb..eeda5e1 100644
--- a/test/CodeGen/X86/vararg_tailcall.ll
+++ b/test/CodeGen/X86/vararg_tailcall.ll
@@ -39,7 +39,7 @@ declare void @bar2(i8*, i64) optsize noredzone
 ; WIN64: callq
 define i8* @foo2(i8* %arg) nounwind optsize ssp noredzone {
 entry:
-  %tmp1 = load i8** @sel, align 8, !tbaa !0
+  %tmp1 = load i8** @sel, align 8
   %call = tail call i8* (i8*, i8*, ...)* @x2(i8* %arg, i8* %tmp1) nounwind optsize noredzone
   ret i8* %call
 }
@@ -52,10 +52,10 @@ declare i8* @x2(i8*, i8*, ...) optsize noredzone
 ; WIN64: callq
 define i8* @foo6(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
 entry:
-  %tmp2 = load i8** @sel3, align 8, !tbaa !0
-  %tmp3 = load i8** @sel4, align 8, !tbaa !0
-  %tmp4 = load i8** @sel5, align 8, !tbaa !0
-  %tmp5 = load i8** @sel6, align 8, !tbaa !0
+  %tmp2 = load i8** @sel3, align 8
+  %tmp3 = load i8** @sel4, align 8
+  %tmp4 = load i8** @sel5, align 8
+  %tmp5 = load i8** @sel6, align 8
   %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5) nounwind optsize noredzone
   ret i8* %call
 }
@@ -68,11 +68,11 @@ declare i8* @x3(i8*, i8*, i8*, ...) optsize noredzone
 ; WIN64: callq
 define i8* @foo7(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
 entry:
-  %tmp2 = load i8** @sel3, align 8, !tbaa !0
-  %tmp3 = load i8** @sel4, align 8, !tbaa !0
-  %tmp4 = load i8** @sel5, align 8, !tbaa !0
-  %tmp5 = load i8** @sel6, align 8, !tbaa !0
-  %tmp6 = load i8** @sel7, align 8, !tbaa !0
+  %tmp2 = load i8** @sel3, align 8
+  %tmp3 = load i8** @sel4, align 8
+  %tmp4 = load i8** @sel5, align 8
+  %tmp5 = load i8** @sel6, align 8
+  %tmp6 = load i8** @sel7, align 8
   %call = tail call i8* (i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...)* @x7(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i8* %tmp6) nounwind optsize noredzone
   ret i8* %call
 }
@@ -85,14 +85,10 @@ declare i8* @x7(i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...) optsize noredzone
 ; WIN64: callq
 define i8* @foo8(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
 entry:
-  %tmp2 = load i8** @sel3, align 8, !tbaa !0
-  %tmp3 = load i8** @sel4, align 8, !tbaa !0
-  %tmp4 = load i8** @sel5, align 8, !tbaa !0
-  %tmp5 = load i8** @sel6, align 8, !tbaa !0
+  %tmp2 = load i8** @sel3, align 8
+  %tmp3 = load i8** @sel4, align 8
+  %tmp4 = load i8** @sel5, align 8
+  %tmp5 = load i8** @sel6, align 8
   %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i32 48879, i32 48879) nounwind optsize noredzone
   ret i8* %call
 }
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
index b6d91a3..fd5c234 100644
--- a/test/CodeGen/X86/vec_compare.ll
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -65,3 +65,159 @@ define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) nounwind {
 	%D = sext <2 x i1> %C to <2 x i64>
 	ret <2 x i64> %D
 }
+
+define <2 x i64> @test7(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: [[CONSTSEG:[A-Z0-9_]*]]:
+; CHECK:      .long	2147483648
+; CHECK-NEXT: .long	0
+; CHECK-NEXT: .long	2147483648
+; CHECK-NEXT: .long	0
+; CHECK: test7:
+; CHECK: movdqa [[CONSTSEG]], [[CONSTREG:%xmm[0-9]*]]
+; CHECK: pxor [[CONSTREG]]
+; CHECK: pxor [[CONSTREG]]
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+	%C = icmp sgt <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test8(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test8:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+	%C = icmp slt <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test9:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp sge <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test10:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp sle <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test11(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: [[CONSTSEG:[A-Z0-9_]*]]:
+; CHECK:      .long	2147483648
+; CHECK-NEXT: .long	2147483648
+; CHECK-NEXT: .long	2147483648
+; CHECK-NEXT: .long	2147483648
+; CHECK: test11:
+; CHECK: movdqa [[CONSTSEG]], [[CONSTREG:%xmm[0-9]*]]
+; CHECK: pxor [[CONSTREG]]
+; CHECK: pxor [[CONSTREG]]
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+	%C = icmp ugt <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test12(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test12:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: ret
+	%C = icmp ult <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test13(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test13:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm0
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp uge <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
+
+define <2 x i64> @test14(<2 x i64> %A, <2 x i64> %B) nounwind {
+; CHECK: test14:
+; CHECK: pxor
+; CHECK: pxor
+; CHECK: pcmpgtd %xmm1
+; CHECK: pshufd $-96
+; CHECK: pcmpeqd
+; CHECK: pshufd $-11
+; CHECK: pand
+; CHECK: pshufd $-11
+; CHECK: por
+; CHECK: pcmpeqd
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp ule <2 x i64> %A, %B
+	%D = sext <2 x i1> %C to <2 x i64>
+	ret <2 x i64> %D
+}
diff --git a/test/CodeGen/X86/vec_set-9.ll b/test/CodeGen/X86/vec_set-9.ll
index b8ec0cf..6979f6b 100644
--- a/test/CodeGen/X86/vec_set-9.ll
+++ b/test/CodeGen/X86/vec_set-9.ll
@@ -1,5 +1,10 @@
-; RUN: llc < %s -march=x86-64 | grep movd | count 1
-; RUN: llc < %s -march=x86-64 | grep "movlhps.*%xmm0, %xmm0"
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK: test3
+; CHECK: movd
+; CHECK-NOT: movd
+; CHECK: {{movlhps.*%xmm0, %xmm0}}
+; CHECK-NEXT: ret
 
 define <2 x i64> @test3(i64 %A) nounwind {
 entry:
diff --git a/test/CodeGen/X86/vec_set-B.ll b/test/CodeGen/X86/vec_set-B.ll
index f5b3e8b..5578eca 100644
--- a/test/CodeGen/X86/vec_set-B.ll
+++ b/test/CodeGen/X86/vec_set-B.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep esp | count 2
 
+; CHECK-NOT: movaps
+
 ; These should both generate something like this:
 ;_test3:
 ;	movl	$1234567, %eax
diff --git a/test/CodeGen/X86/vec_set-D.ll b/test/CodeGen/X86/vec_set-D.ll
index 3d6369e..9c1e1ac 100644
--- a/test/CodeGen/X86/vec_set-D.ll
+++ b/test/CodeGen/X86/vec_set-D.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; CHECK: movq
 
 define <4 x i32> @t(i32 %x, i32 %y) nounwind  {
 	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
diff --git a/test/CodeGen/X86/vec_set-I.ll b/test/CodeGen/X86/vec_set-I.ll
index 64f36f9..c5d6ab8 100644
--- a/test/CodeGen/X86/vec_set-I.ll
+++ b/test/CodeGen/X86/vec_set-I.ll
@@ -1,5 +1,8 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xorp
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; CHECK-NOT: xorp
+; CHECK: movd
+; CHECK-NOT: xorp
 
 define void @t1() nounwind  {
 	%tmp298.i.i = load <4 x float>* null, align 16
diff --git a/test/CodeGen/X86/vec_shuffle-28.ll b/test/CodeGen/X86/vec_shuffle-28.ll
index 343685b..ebf5577 100644
--- a/test/CodeGen/X86/vec_shuffle-28.ll
+++ b/test/CodeGen/X86/vec_shuffle-28.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
-; RUN: grep pshufb %t | count 1
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
+
+; CHECK:     pshufb
+; CHECK-NOT: pshufb
 
 ; FIXME: this test has a superfluous punpcklqdq pre-pshufb currently.
 ;        Don't XFAIL it because it's still better than the previous code.
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 41ea024..bda3fef 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,7 +1,13 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | FileCheck -check-prefix CHECK2 %s
 ; 64-bit stores here do not use MMX.
 
+; CHECK: xorps
+; CHECK-NOT: xorps
+
+; CHECK2: pcmpeqd
+; CHECK2-NOT: pcmpeqd
+
 @M1 = external global <1 x i64>
 @M2 = external global <2 x i32>
 
diff --git a/test/CodeGen/X86/vector.ll b/test/CodeGen/X86/vector.ll
index 46b0e18..82d20a2 100644
--- a/test/CodeGen/X86/vector.ll
+++ b/test/CodeGen/X86/vector.ll
@@ -1,6 +1,6 @@
 ; Test that vectors are scalarized/lowered correctly.
-; RUN: llc < %s -march=x86 -mcpu=i386 > %t
-; RUN: llc < %s -march=x86 -mcpu=yonah >> %t
+; RUN: llc < %s -march=x86 -mcpu=i386
+; RUN: llc < %s -march=x86 -mcpu=yonah
 
 %d8 = type <8 x double>
 %f1 = type <1 x float>
diff --git a/test/CodeGen/X86/viabs.ll b/test/CodeGen/X86/viabs.ll
new file mode 100644
index 0000000..f748a14
--- /dev/null
+++ b/test/CodeGen/X86/viabs.ll
@@ -0,0 +1,183 @@
+; RUN: llc < %s -march=x86-64 -mcpu=x86-64 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s -check-prefix=SSSE3
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s -check-prefix=AVX2
+
+define <4 x i32> @test1(<4 x i32> %a) nounwind {
+; SSE2: test1:
+; SSE2: movdqa
+; SSE2: psrad $31
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+
+; SSSE3: test1:
+; SSSE3: pabsd
+; SSSE3-NEXT: ret
+
+; AVX2: test1:
+; AVX2: vpabsd
+; AVX2-NEXT: ret
+        %tmp1neg = sub <4 x i32> zeroinitializer, %a
+        %b = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+        %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
+        ret <4 x i32> %abs
+}
+
+define <4 x i32> @test2(<4 x i32> %a) nounwind {
+; SSE2: test2:
+; SSE2: movdqa
+; SSE2: psrad $31
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+
+; SSSE3: test2:
+; SSSE3: pabsd
+; SSSE3-NEXT: ret
+
+; AVX2: test2:
+; AVX2: vpabsd
+; AVX2-NEXT: ret
+        %tmp1neg = sub <4 x i32> zeroinitializer, %a
+        %b = icmp sge <4 x i32> %a, zeroinitializer
+        %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
+        ret <4 x i32> %abs
+}
+
+define <8 x i16> @test3(<8 x i16> %a) nounwind {
+; SSE2: test3:
+; SSE2: movdqa
+; SSE2: psraw $15
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+
+; SSSE3: test3:
+; SSSE3: pabsw
+; SSSE3-NEXT: ret
+
+; AVX2: test3:
+; AVX2: vpabsw
+; AVX2-NEXT: ret
+        %tmp1neg = sub <8 x i16> zeroinitializer, %a
+        %b = icmp sgt <8 x i16> %a, zeroinitializer
+        %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg
+        ret <8 x i16> %abs
+}
+
+define <16 x i8> @test4(<16 x i8> %a) nounwind {
+; SSE2: test4:
+; SSE2: pxor
+; SSE2: pcmpgtb
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+
+; SSSE3: test4:
+; SSSE3: pabsb
+; SSSE3-NEXT: ret
+
+; AVX2: test4:
+; AVX2: vpabsb
+; AVX2-NEXT: ret
+        %tmp1neg = sub <16 x i8> zeroinitializer, %a
+        %b = icmp slt <16 x i8> %a, zeroinitializer
+        %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a
+        ret <16 x i8> %abs
+}
+
+define <4 x i32> @test5(<4 x i32> %a) nounwind {
+; SSE2: test5:
+; SSE2: movdqa
+; SSE2: psrad $31
+; SSE2-NEXT: padd
+; SSE2-NEXT: pxor
+; SSE2-NEXT: ret
+
+; SSSE3: test5:
+; SSSE3: pabsd
+; SSSE3-NEXT: ret
+
+; AVX2: test5:
+; AVX2: vpabsd
+; AVX2-NEXT: ret
+        %tmp1neg = sub <4 x i32> zeroinitializer, %a
+        %b = icmp sle <4 x i32> %a, zeroinitializer
+        %abs = select <4 x i1> %b, <4 x i32> %tmp1neg, <4 x i32> %a
+        ret <4 x i32> %abs
+}
+
+define <8 x i32> @test6(<8 x i32> %a) nounwind {
+; SSSE3: test6:
+; SSSE3: pabsd
+; SSSE3: pabsd
+; SSSE3-NEXT: ret
+
+; AVX2: test6:
+; AVX2: vpabsd {{.*}}%ymm
+; AVX2-NEXT: ret
+        %tmp1neg = sub <8 x i32> zeroinitializer, %a
+        %b = icmp sgt <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+        %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg
+        ret <8 x i32> %abs
+}
+
+define <8 x i32> @test7(<8 x i32> %a) nounwind {
+; SSSE3: test7:
+; SSSE3: pabsd
+; SSSE3: pabsd
+; SSSE3-NEXT: ret
+
+; AVX2: test7:
+; AVX2: vpabsd {{.*}}%ymm
+; AVX2-NEXT: ret
+        %tmp1neg = sub <8 x i32> zeroinitializer, %a
+        %b = icmp sge <8 x i32> %a, zeroinitializer
+        %abs = select <8 x i1> %b, <8 x i32> %a, <8 x i32> %tmp1neg
+        ret <8 x i32> %abs
+}
+
+define <16 x i16> @test8(<16 x i16> %a) nounwind {
+; SSSE3: test8:
+; SSSE3: pabsw
+; SSSE3: pabsw
+; SSSE3-NEXT: ret
+
+; AVX2: test8:
+; AVX2: vpabsw {{.*}}%ymm
+; AVX2-NEXT: ret
+        %tmp1neg = sub <16 x i16> zeroinitializer, %a
+        %b = icmp sgt <16 x i16> %a, zeroinitializer
+        %abs = select <16 x i1> %b, <16 x i16> %a, <16 x i16> %tmp1neg
+        ret <16 x i16> %abs
+}
+
+define <32 x i8> @test9(<32 x i8> %a) nounwind {
+; SSSE3: test9:
+; SSSE3: pabsb
+; SSSE3: pabsb
+; SSSE3-NEXT: ret
+
+; AVX2: test9:
+; AVX2: vpabsb {{.*}}%ymm
+; AVX2-NEXT: ret
+        %tmp1neg = sub <32 x i8> zeroinitializer, %a
+        %b = icmp slt <32 x i8> %a, zeroinitializer
+        %abs = select <32 x i1> %b, <32 x i8> %tmp1neg, <32 x i8> %a
+        ret <32 x i8> %abs
+}
+
+define <8 x i32> @test10(<8 x i32> %a) nounwind {
+; SSSE3: test10:
+; SSSE3: pabsd
+; SSSE3: pabsd
+; SSSE3-NEXT: ret
+
+; AVX2: test10:
+; AVX2: vpabsd {{.*}}%ymm
+; AVX2-NEXT: ret
+        %tmp1neg = sub <8 x i32> zeroinitializer, %a
+        %b = icmp sle <8 x i32> %a, zeroinitializer
+        %abs = select <8 x i1> %b, <8 x i32> %tmp1neg, <8 x i32> %a
+        ret <8 x i32> %abs
+}
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll
index 52b987e..2bfe5fb 100644
--- a/test/CodeGen/X86/win32_sret.ll
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; We specify -mcpu explicitly to avoid instruction reordering that happens on
+; some setups (e.g., Atom) from affecting the output.
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
 ; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
 ; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
-; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
 ; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
 ; RUN: llc < %s -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
 
@@ -117,11 +119,8 @@ entry:
 ; WIN32:      movl %eax, (%e{{[sc][px]}})
 
 ; The this pointer goes to ECX.
-; FIXME: for some reason, the below checks fail on the Ubuntu Atom D2700 bot.
-; FIXME-NEXT: leal {{[0-9]+}}(%esp), %ecx
-; FIXME-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ"
-
-; WIN32:      calll "?foo@C5@@QAE?AUS5@@XZ"
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ"
 ; WIN32:      ret
   ret void
 }
diff --git a/test/CodeGen/X86/x86-64-frameaddr.ll b/test/CodeGen/X86/x86-64-frameaddr.ll
index 57163d3..7d36a7a 100644
--- a/test/CodeGen/X86/x86-64-frameaddr.ll
+++ b/test/CodeGen/X86/x86-64-frameaddr.ll
@@ -1,4 +1,9 @@
-; RUN: llc < %s -march=x86-64 | grep movq | grep rbp
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK: stack_end_address
+; CHECK: {{movq.+rbp.*$}}
+; CHECK: {{movq.+rbp.*$}}
+; CHECK: ret
 
 define i64* @stack_end_address() nounwind  {
 entry:
diff --git a/test/CodeGen/X86/x86-64-pic-3.ll b/test/CodeGen/X86/x86-64-pic-3.ll
index ba93378..1b0ddc6 100644
--- a/test/CodeGen/X86/x86-64-pic-3.ll
+++ b/test/CodeGen/X86/x86-64-pic-3.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
-; RUN: grep "callq	f" %t1
-; RUN: not grep "callq	f@PLT" %t1
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic | FileCheck %s
+
+
+; CHECK-NOT: {{callq	f@PLT}}
+; CHECK: {{callq	f}}
+; CHECK-NOT: {{callq	f@PLT}}
 
 define void @g() {
 entry:
diff --git a/test/CodeGen/X86/x86-64-shortint.ll b/test/CodeGen/X86/x86-64-shortint.ll
index cbf6588..75f8902 100644
--- a/test/CodeGen/X86/x86-64-shortint.ll
+++ b/test/CodeGen/X86/x86-64-shortint.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s | grep movswl
+; RUN: llc < %s | FileCheck %s
+
+; CHECK: movswl
 
 target datalayout = "e-p:64:64"
 target triple = "x86_64-apple-darwin8"
diff --git a/test/CodeGen/X86/zext-extract_subreg.ll b/test/CodeGen/X86/zext-extract_subreg.ll
index 4f1dde3..168b898 100644
--- a/test/CodeGen/X86/zext-extract_subreg.ll
+++ b/test/CodeGen/X86/zext-extract_subreg.ll
@@ -6,7 +6,7 @@ entry:
   br i1 undef, label %return, label %if.end.i
 
 if.end.i:                                         ; preds = %entry
-  %tmp7.i = load i32* undef, align 4, !tbaa !0
+  %tmp7.i = load i32* undef, align 4
   br i1 undef, label %return, label %if.end
 
 if.end:                                           ; preds = %if.end.i
@@ -55,7 +55,3 @@ cond.false280:                                    ; preds = %cond.true225
 return:                                           ; preds = %if.end.i, %entry
   ret void
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/zext-inreg-0.ll b/test/CodeGen/X86/zext-inreg-0.ll
index ae6221a..688b88d 100644
--- a/test/CodeGen/X86/zext-inreg-0.ll
+++ b/test/CodeGen/X86/zext-inreg-0.ll
@@ -1,9 +1,12 @@
-; RUN: llc < %s -march=x86 | not grep and
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: not grep and %t
-; RUN: not grep movzbq %t
-; RUN: not grep movzwq %t
-; RUN: not grep movzlq %t
+; RUN: llc < %s -march=x86 | FileCheck -check-prefix=X86 %s
+; RUN: llc < %s -march=x86-64 | FileCheck -check-prefix=X64 %s
+
+; X86-NOT: and
+
+; X64-NOT: and
+; X64-NOT: movzbq
+; X64-NOT: movzwq
+; X64-NOT: movzlq
 
 ; These should use movzbl instead of 'and 255'.
 ; This related to not having a ZERO_EXTEND_REG opcode.
diff --git a/test/CodeGen/XCore/global_negative_offset.ll b/test/CodeGen/XCore/offset_folding.ll
index 0328fb0..30edfe6 100644
--- a/test/CodeGen/XCore/global_negative_offset.ll
+++ b/test/CodeGen/XCore/offset_folding.ll
@@ -1,23 +1,40 @@
 ; RUN: llc < %s -march=xcore | FileCheck %s
 
-; Don't fold negative offsets into cp / dp accesses to avoid a relocation
-; error if the address + addend is less than the start of the cp / dp.
-
 @a = external constant [0 x i32], section ".cp.rodata"
 @b = external global [0 x i32]
 
-define i32 *@f() nounwind {
+define i32 *@f1() nounwind {
+entry:
+; CHECK: f1:
+; CHECK: ldaw r11, cp[a+4]
+; CHECK: mov r0, r11
+	%0 = getelementptr [0 x i32]* @a, i32 0, i32 1
+	ret i32* %0
+}
+
+define i32 *@f2() nounwind {
+entry:
+; CHECK: f2:
+; CHECK: ldaw r0, dp[b+4]
+	%0 = getelementptr [0 x i32]* @b, i32 0, i32 1
+	ret i32* %0
+}
+
+; Don't fold negative offsets into cp / dp accesses to avoid a relocation
+; error if the address + addend is less than the start of the cp / dp.
+
+define i32 *@f3() nounwind {
 entry:
-; CHECK: f:
+; CHECK: f3:
 ; CHECK: ldaw r11, cp[a]
 ; CHECK: sub r0, r11, 4
 	%0 = getelementptr [0 x i32]* @a, i32 0, i32 -1
 	ret i32* %0
 }
 
-define i32 *@g() nounwind {
+define i32 *@f4() nounwind {
 entry:
-; CHECK: g:
+; CHECK: f4:
 ; CHECK: ldaw [[REG:r[0-9]+]], dp[b]
 ; CHECK: sub r0, [[REG]], 4
 	%0 = getelementptr [0 x i32]* @b, i32 0, i32 -1
diff --git a/test/CodeGen/XCore/unaligned_load.ll b/test/CodeGen/XCore/unaligned_load.ll
index 0ee8e1c..772a847 100644
--- a/test/CodeGen/XCore/unaligned_load.ll
+++ b/test/CodeGen/XCore/unaligned_load.ll
@@ -1,20 +1,19 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl __misaligned_load" %t1.s | count 1
-; RUN: grep ld16s %t1.s | count 2
-; RUN: grep ldw %t1.s | count 2
-; RUN: grep shl %t1.s | count 2
-; RUN: grep shr %t1.s | count 1
-; RUN: grep zext %t1.s | count 1
-; RUN: grep "or " %t1.s | count 2
+; RUN: llc < %s -march=xcore | FileCheck %s
 
-; Byte aligned load. Expands to call to __misaligned_load.
+; Byte aligned load.
+; CHECK: align1
+; CHECK: bl __misaligned_load
 define i32 @align1(i32* %p) nounwind {
 entry:
 	%0 = load i32* %p, align 1		; <i32> [#uses=1]
 	ret i32 %0
 }
 
-; Half word aligned load. Expands to two 16bit loads.
+; Half word aligned load.
+; CHECK: align2:
+; CHECK: ld16s
+; CHECK: ld16s
+; CHECK: or
 define i32 @align2(i32* %p) nounwind {
 entry:
 	%0 = load i32* %p, align 2		; <i32> [#uses=1]
@@ -23,7 +22,11 @@ entry:
 
 @a = global [5 x i8] zeroinitializer, align 4
 
-; Constant offset from word aligned base. Expands to two 32bit loads.
+; Constant offset from word aligned base.
+; CHECK: align3:
+; CHECK: ldw {{r[0-9]+}}, dp
+; CHECK: ldw {{r[0-9]+}}, dp
+; CHECK: or
 define i32 @align3() nounwind {
 entry:
 	%0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
diff --git a/test/CodeGen/XCore/unaligned_store.ll b/test/CodeGen/XCore/unaligned_store.ll
index 62078e6..94e1852 100644
--- a/test/CodeGen/XCore/unaligned_store.ll
+++ b/test/CodeGen/XCore/unaligned_store.ll
@@ -1,16 +1,18 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl __misaligned_store" %t1.s | count 1
-; RUN: grep st16 %t1.s | count 2
-; RUN: grep shr %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 
-; Byte aligned store. Expands to call to __misaligned_store.
+; Byte aligned store.
+; CHECK: align1:
+; CHECK: bl __misaligned_store
 define void @align1(i32* %p, i32 %val) nounwind {
 entry:
 	store i32 %val, i32* %p, align 1
 	ret void
 }
 
-; Half word aligned store. Expands to two 16bit stores.
+; Half word aligned store.
+; CHECK: align2
+; CHECK: st16
+; CHECK: st16
 define void @align2(i32* %p, i32 %val) nounwind {
 entry:
 	store i32 %val, i32* %p, align 2
diff --git a/test/DebugInfo/2010-03-19-DbgDeclare.ll b/test/DebugInfo/2010-03-19-DbgDeclare.ll
index 1f7a889..9f52d11 100644
--- a/test/DebugInfo/2010-03-19-DbgDeclare.ll
+++ b/test/DebugInfo/2010-03-19-DbgDeclare.ll
@@ -1,12 +1,17 @@
-; RUN: llvm-as < %s | opt -verify -disable-output
+; RUN: llvm-as < %s | opt -verify -S -asm-verbose | FileCheck %s
+
+; CHECK: lang 0x8001
 
 define void @Foo(i32 %a, i32 %b) {
 entry:
   call void @llvm.dbg.declare(metadata !{i32* null}, metadata !1)
   ret void
 }
-
+!llvm.dbg.cu = !{!2}
+!2 = metadata !{i32 786449, metadata !4, i32 32769, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !3, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp] [lang 0x8001]
+!3 = metadata !{}
 !0 = metadata !{i32 662302, i32 26, metadata !1, null}
 !1 = metadata !{i32 4, metadata !"foo"}
+!4 = metadata !{metadata !"scratch.cpp", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
diff --git a/test/DebugInfo/2010-03-24-MemberFn.ll b/test/DebugInfo/2010-03-24-MemberFn.ll
index 06c2367..15197f4 100644
--- a/test/DebugInfo/2010-03-24-MemberFn.ll
+++ b/test/DebugInfo/2010-03-24-MemberFn.ll
@@ -43,7 +43,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !2 = metadata !{i32 786443, metadata !3, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
 !3 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"bar", metadata !"bar", metadata !"_Z3barv", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @_Z3barv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
 !4 = metadata !{i32 786473, metadata !25} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786449, i32 4, metadata !4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !24, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 786449, i32 4, metadata !4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !24, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !6 = metadata !{i32 786453, metadata !25, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !7 = metadata !{metadata !8}
 !8 = metadata !{i32 786468, metadata !25, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
index dd98db9..7f8e418 100644
--- a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
+++ b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
@@ -57,7 +57,7 @@ entry:
 !1 = metadata !{i32 786443, metadata !2, i32 15, i32 12} ; [ DW_TAG_lexical_block ]
 !2 = metadata !{i32 786478, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @main, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
 !3 = metadata !{i32 786473, metadata !"one.cc", metadata !"/tmp", metadata !4} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 786449, i32 4, metadata !3, metadata !"clang 1.5", i1 false, metadata !"", i32 0, null, null, metadata !37, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 786449, i32 4, metadata !3, metadata !"clang 1.5", i1 false, metadata !"", i32 0, null, null, metadata !37, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !5 = metadata !{i32 786453, metadata !3, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !6 = metadata !{metadata !7}
 !7 = metadata !{i32 786468, metadata !3, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/2010-04-19-FramePtr.ll b/test/DebugInfo/2010-04-19-FramePtr.ll
index f9e90cd..88eebe6 100644
--- a/test/DebugInfo/2010-04-19-FramePtr.ll
+++ b/test/DebugInfo/2010-04-19-FramePtr.ll
@@ -25,7 +25,7 @@ return:                                           ; preds = %entry
 !0 = metadata !{i32 2, i32 0, metadata !1, null}
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @foo, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !"a.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !9, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6}
 !6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
index e44362d4..f5ebb2d 100644
--- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
@@ -25,7 +25,7 @@ entry:
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 9, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !24, i32 9} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !25, metadata !26, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !25, metadata !26,  metadata !26, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5, metadata !5}
 !5 = metadata !{i32 786468, metadata !27, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/AArch64/dwarfdump.ll b/test/DebugInfo/AArch64/dwarfdump.ll
index 673c789..bcdd462 100644
--- a/test/DebugInfo/AArch64/dwarfdump.ll
+++ b/test/DebugInfo/AArch64/dwarfdump.ll
@@ -22,7 +22,7 @@ attributes #0 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !9, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/llvm/build/tmp.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !9, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/llvm/build/tmp.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !2 = metadata !{metadata !3}
 !3 = metadata !{i32 786478, metadata !4, metadata !"main", metadata !"main", metadata !"", metadata !4, i32 1, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
diff --git a/test/DebugInfo/AArch64/eh_frame.ll b/test/DebugInfo/AArch64/eh_frame.ll
deleted file mode 100644
index 2539c56..0000000
--- a/test/DebugInfo/AArch64/eh_frame.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu %s -filetype=obj -o %t
-; RUN: llvm-objdump -s %t | FileCheck %s
-@var = global i32 0
-
-declare void @bar()
-
-define i64 @check_largest_class(i32 %in)  {
-  %res = load i32* @var
-  call void @bar()
-  %ext = zext i32 %res to i64
-  ret i64 %ext
-}
-
-; The really key points we're checking here are:
-;  * Return register is x30.
-;  * Pointer format is 0x1b (GNU doesn't appear to understand others).
-
-; The rest is largely incidental, but not expected to change regularly.
-
-; Output is:
-
-; CHECK: Contents of section .eh_frame:
-; CHECK-NEXT: 0000 10000000 00000000 017a5200 017c1e01  .........zR..|..
-; CHECK-NEXT: 0010 1b0c1f00 18000000 18000000 00000000  ................
-
-
-; Won't check the rest, it's rather incidental.
-; 0020 24000000 00440c1f 10449e02 93040000  $....D...D......
-
-
-; The first CIE:
-; -------------------
-; 10000000: length of first CIE = 0x10
-; 00000000: This is a CIE
-; 01: version = 0x1
-; 7a 52 00: augmentation string "zR" -- pointer format is specified
-; 01: code alignment factor 1
-; 7c: data alignment factor -4
-; 1e: return address register 30 (== x30).
-; 01: 1 byte of augmentation
-; 1b: pointer format 1b: DW_EH_PE_pcrel | DW_EH_PE_sdata4
-; 0c 1f 00: initial instructions: "DW_CFA_def_cfa x31 ofs 0" in this case
-
-; Next the FDE:
-; -------------
-; 18000000: FDE length 0x18
-; 18000000: Uses CIE 0x18 backwards (only coincidentally same as above)
-; 00000000: PC begin for this FDE is at 00000000 (relocation is applied here)
-; 24000000: FDE applies up to PC begin+0x24
-; 00: Augmentation string length 0 for this FDE
-; Rest: call frame instructions
diff --git a/test/DebugInfo/AArch64/eh_frame.s b/test/DebugInfo/AArch64/eh_frame.s
new file mode 100644
index 0000000..d8d6b6d
--- /dev/null
+++ b/test/DebugInfo/AArch64/eh_frame.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -filetype=obj %s -o %t
+// RUN: llvm-objdump -s %t | FileCheck %s
+        .text
+        .globl foo
+        .type foo,@function
+foo:
+        .cfi_startproc
+        ret
+        .cfi_endproc
+
+// The really key points we're checking here are:
+//  * Return register is x30.
+//  * Pointer format is 0x1b (GNU doesn't appear to understand others).
+
+// The rest is largely incidental, but not expected to change regularly.
+
+// Output is:
+
+// CHECK: Contents of section .eh_frame:
+// CHECK-NEXT: 0000 10000000 00000000 017a5200 017c1e01  .........zR..|..
+// CHECK-NEXT: 0010 1b0c1f00 10000000 18000000 00000000  ................
+
+
+// Won't check the rest, it's rather incidental.
+// 0020 04000000 00000000                    ........
+
+
+
+// The first CIE:
+// -------------------
+// 10000000: length of first CIE = 0x10
+// 00000000: This is a CIE
+// 01: version = 0x1
+// 7a 52 00: augmentation string "zR" -- pointer format is specified
+// 01: code alignment factor 1
+// 7c: data alignment factor -4
+// 1e: return address register 30 (== x30).
+// 01: 1 byte of augmentation
+// 1b: pointer format 1b: DW_EH_PE_pcrel | DW_EH_PE_sdata4
+// 0c 1f 00: initial instructions: "DW_CFA_def_cfa x31 ofs 0" in this case
+
+// Next the FDE:
+// -------------
+// 10000000: FDE length 0x10
+// 18000000: Uses CIE 0x18 backwards (only coincidentally same as above)
+// 00000000: PC begin for this FDE is at 00000000 (relocation is applied here)
+// 04000000: FDE applies up to PC begin+0x14
+// 00: Augmentation string length 0 for this FDE
diff --git a/test/DebugInfo/AArch64/variable-loc.ll b/test/DebugInfo/AArch64/variable-loc.ll
index ac3037e..ba9e13a 100644
--- a/test/DebugInfo/AArch64/variable-loc.ll
+++ b/test/DebugInfo/AArch64/variable-loc.ll
@@ -69,7 +69,7 @@ declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !11, metadata !14}
 !5 = metadata !{i32 786478, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
diff --git a/test/DebugInfo/Inputs/dwarfdump-test-zlib.cc b/test/DebugInfo/Inputs/dwarfdump-test-zlib.cc
new file mode 100644
index 0000000..260c3c4
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test-zlib.cc
@@ -0,0 +1,24 @@
+class DummyClass {
+  int a_;
+ public:
+  DummyClass(int a) : a_(a) {}
+  int add(int b) {
+    return a_ + b;
+  }
+};
+
+int f(int a, int b) {
+  DummyClass c(a);
+  return c.add(b);
+}
+
+int main() {
+  return f(2, 3);
+}
+
+// Built with Clang 3.2 and ld.gold linker:
+// $ mkdir -p /tmp/dbginfo
+// $ cp dwarfdump-test-zlib.cc /tmp/dbginfo
+// $ cd /tmp/dbginfo
+// $ clang++ -g dwarfdump-test-zlib.cc -Wl,--compress-debug-sections=zlib \
+//   -o <output>
diff --git a/test/DebugInfo/Inputs/dwarfdump-test-zlib.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test-zlib.elf-x86-64
new file mode 100755
index 0000000..16b3153
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test-zlib.elf-x86-64
diff --git a/test/DebugInfo/SystemZ/eh_frame.s b/test/DebugInfo/SystemZ/eh_frame.s
new file mode 100644
index 0000000..4e7afd5
--- /dev/null
+++ b/test/DebugInfo/SystemZ/eh_frame.s
@@ -0,0 +1,54 @@
+# RUN: llvm-mc -triple=s390x-linux-gnu -filetype=obj %s -o %t
+# RUN: llvm-objdump -s %t | FileCheck %s
+
+	.text
+	.globl	check_largest_class
+	.align	4
+	.type	check_largest_class,@function
+check_largest_class:
+	.cfi_startproc
+	stmg	%r13, %r15, 104(%r15)
+	.cfi_offset %r13, -56
+	.cfi_offset %r14, -48
+	.cfi_offset %r15, -40
+	aghi	%r15, -160
+	.cfi_def_cfa_offset 320
+	lmg	%r13, %r15, 264(%r15)
+	br	%r14
+	.size	check_largest_class, .-check_largest_class
+	.cfi_endproc
+
+# The readelf rendering is:
+#
+# Contents of the .eh_frame section:
+#
+# 00000000 0000001c 00000000 CIE
+#   Version:               1
+#   Augmentation:          "zR"
+#   Code alignment factor: 1
+#   Data alignment factor: -8
+#   Return address column: 14
+#   Augmentation data:     1b
+#
+#   DW_CFA_def_cfa: r15 ofs 160
+#   DW_CFA_nop
+#   DW_CFA_nop
+#   DW_CFA_nop
+#
+# 00000020 0000001c 00000024 FDE cie=00000000 pc=00000000..00000012
+#   DW_CFA_advance_loc: 6 to 00000006
+#   DW_CFA_offset: r13 at cfa-56
+#   DW_CFA_offset: r14 at cfa-48
+#   DW_CFA_offset: r15 at cfa-40
+#   DW_CFA_advance_loc: 4 to 0000000a
+#   DW_CFA_def_cfa_offset: 320
+#   DW_CFA_nop
+#   DW_CFA_nop
+#   DW_CFA_nop
+#   DW_CFA_nop
+#
+# CHECK: Contents of section .eh_frame:
+# CHECK-NEXT: 0000 00000014 00000000 017a5200 01780e01  .........zR..x..
+# CHECK-NEXT: 0010 1b0c0fa0 01000000 0000001c 0000001c  ................
+# CHECK-NEXT: 0020 00000000 00000012 00468d07 8e068f05  .........F......
+# CHECK-NEXT: 0030 440ec002 00000000                    D.......
diff --git a/test/DebugInfo/SystemZ/eh_frame_personality.ll b/test/DebugInfo/SystemZ/eh_frame_personality.ll
new file mode 100644
index 0000000..92ba34d
--- /dev/null
+++ b/test/DebugInfo/SystemZ/eh_frame_personality.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-FUNC %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-ET %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck -check-prefix=CHECK-REF %s
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @bar()
+
+define i64 @foo(i64 %lhs, i64 %rhs) {
+  invoke void @bar() to label %end unwind label %clean
+end:
+ ret i64 0
+
+clean:
+  %tst = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) cleanup
+  ret i64 42
+}
+
+; CHECK-FUNC: foo:
+; CHECK-FUNC: .cfi_startproc
+; CHECK-FUNC: .cfi_personality 0, __gxx_personality_v0
+; CHECK-FUNC: .cfi_lsda 0, .Lexception0
+; CHECK-FUNC: stmg	%r14, %r15, 112(%r15)
+; CHECK-FUNC: .cfi_offset %r14, -48
+; CHECK-FUNC: .cfi_offset %r15, -40
+; CHECK-FUNC: aghi	%r15, -160
+; CHECK-FUNC: .cfi_def_cfa_offset 320
+; ...main function...
+; CHECK-FUNC: .cfi_endproc
+;
+; CHECK-ET: .section	.gcc_except_table,"a",@progbits
+; CHECK-ET-NEXT: .align	4
+; CHECK-ET-NEXT: GCC_except_table0:
+; CHECK-ET-NEXT: .Lexception0:
+;
+; CHECK-REF: .cfi_personality 155, DW.ref.__gxx_personality_v0
+; CHECK-REF: .cfi_lsda 27, .Lexception0
+; CHECK-REF: .hidden	DW.ref.__gxx_personality_v0
+; CHECK-REF: .weak	DW.ref.__gxx_personality_v0
+; CHECK-REF: .section	.data.DW.ref.__gxx_personality_v0,"aGw",@progbits,DW.ref.__gxx_personality_v0,comdat
+; CHECK-REF-NEXT: .align	8
+; CHECK-REF-NEXT: .type	DW.ref.__gxx_personality_v0,@object
+; CHECK-REF-NEXT: .size	DW.ref.__gxx_personality_v0, 8
+; CHECK-REF-NEXT: DW.ref.__gxx_personality_v0:
+; CHECK-REF-NEXT: .quad	__gxx_personality_v0
diff --git a/test/DebugInfo/SystemZ/eh_frame_personality.s b/test/DebugInfo/SystemZ/eh_frame_personality.s
new file mode 100644
index 0000000..46b46db
--- /dev/null
+++ b/test/DebugInfo/SystemZ/eh_frame_personality.s
@@ -0,0 +1,67 @@
+# RUN: llvm-mc -triple=s390x-linux-gnu -filetype=obj %s -o %t
+# RUN: llvm-objdump -s %t | FileCheck %s
+
+	.text
+	.globl	foo
+	.align	4
+	.type	foo,@function
+foo:                                    # @foo
+	.cfi_startproc
+	.cfi_personality 155, DW.ref.__gxx_personality_v0
+	.cfi_lsda 27, .Lexception0
+	stmg	%r14, %r15, 112(%r15)
+	.cfi_offset %r14, -48
+	.cfi_offset %r15, -40
+	aghi	%r15, -160
+	.cfi_def_cfa_offset 320
+	lmg	%r14, %r15, 272(%r15)
+	br	%r14
+	.size	foo, .-foo
+	.cfi_endproc
+
+	.section	.gcc_except_table,"a",@progbits
+	.align	4
+.Lexception0:
+
+	.hidden	DW.ref.__gxx_personality_v0
+	.weak	DW.ref.__gxx_personality_v0
+	.section	.data.DW.ref.__gxx_personality_v0,"aGw",@progbits,DW.ref.__gxx_personality_v0,comdat
+	.align	8
+	.type	DW.ref.__gxx_personality_v0,@object
+	.size	DW.ref.__gxx_personality_v0, 8
+DW.ref.__gxx_personality_v0:
+	.quad	__gxx_personality_v0
+
+# The readelf rendering is:
+#
+# Contents of the .eh_frame section:
+#
+# 00000000 0000001c 00000000 CIE
+#   Version:               1
+#   Augmentation:          "zPLR"
+#   Code alignment factor: 1
+#   Data alignment factor: -8
+#   Return address column: 14
+#   Augmentation data:     9b ff ff ff ed 1b 1b
+#
+#   DW_CFA_def_cfa: r15 ofs 160
+#   DW_CFA_nop
+#   DW_CFA_nop
+#   DW_CFA_nop
+#
+# 00000020 0000001c 00000024 FDE cie=00000000 pc=00000000..00000012
+#   Augmentation data:     ff ff ff cf
+#
+#   DW_CFA_advance_loc: 6 to 00000006
+#   DW_CFA_offset: r14 at cfa-48
+#   DW_CFA_offset: r15 at cfa-40
+#   DW_CFA_advance_loc: 4 to 0000000a
+#   DW_CFA_def_cfa_offset: 320
+#   DW_CFA_nop
+#   DW_CFA_nop
+#
+# CHECK: Contents of section .eh_frame:
+# CHECK-NEXT: 0000 0000001c 00000000 017a504c 52000178  .........zPLR..x
+# CHECK-NEXT: 0010 0e079b00 0000001b 1b0c0fa0 01000000  ................
+# CHECK-NEXT: 0020 0000001c 00000024 00000000 00000012  .......$........
+# CHECK-NEXT: 0030 04000000 00468e06 8f05440e c0020000  .....F....D.....
diff --git a/test/DebugInfo/SystemZ/lit.local.cfg b/test/DebugInfo/SystemZ/lit.local.cfg
new file mode 100644
index 0000000..a70a685
--- /dev/null
+++ b/test/DebugInfo/SystemZ/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'SystemZ' in targets:
+    config.unsupported = True
+
diff --git a/test/DebugInfo/SystemZ/variable-loc.ll b/test/DebugInfo/SystemZ/variable-loc.ll
new file mode 100644
index 0000000..e6f4ff9
--- /dev/null
+++ b/test/DebugInfo/SystemZ/variable-loc.ll
@@ -0,0 +1,85 @@
+; RUN: llc -mtriple=s390x-linux-gnu -disable-fp-elim < %s | FileCheck %s
+;
+; This is a regression test making sure the location of variables is correct in
+; debugging information, even if they're addressed via the frame pointer.
+; A copy of the AArch64 test, commandeered for SystemZ.
+;
+; First make sure main_arr is where we expect it: %r11 + 164
+;
+; CHECK: main:
+; CHECK: aghi    %r15, -568
+; CHECK: la      [[MAIN_ARR:%r[0-9]+]], 164(%r11)
+; CHECK: lgr     %r2, [[MAIN_ARR]]
+; CHECK: brasl   %r14, populate_array@PLT
+;
+; Now check that the debugging information reflects this:
+; CHECK: DW_TAG_variable
+; CHECK-NEXT: .long .Linfo_string7
+;
+; Rather hard-coded, but 145 => DW_OP_fbreg and the .ascii is the sleb128
+; encoding of 164:
+; CHECK: DW_AT_location
+; CHECK-NEXT: .byte 145
+; CHECK-NEXT: .ascii "\244\001"
+;
+; CHECK: .Linfo_string7:
+; CHECK-NEXT: main_arr
+
+
+@.str = private unnamed_addr constant [13 x i8] c"Total is %d\0A\00", align 2
+
+declare void @populate_array(i32*, i32) nounwind
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i32 @sum_array(i32*, i32) nounwind
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %main_arr = alloca [100 x i32], align 4
+  %val = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @llvm.dbg.declare(metadata !{[100 x i32]* %main_arr}, metadata !17), !dbg !22
+  call void @llvm.dbg.declare(metadata !{i32* %val}, metadata !23), !dbg !24
+  %arraydecay = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !25
+  call void @populate_array(i32* %arraydecay, i32 100), !dbg !25
+  %arraydecay1 = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !26
+  %call = call i32 @sum_array(i32* %arraydecay1, i32 100), !dbg !26
+  store i32 %call, i32* %val, align 4, !dbg !26
+  %0 = load i32* %val, align 4, !dbg !27
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), i32 %0), !dbg !27
+  ret i32 0, !dbg !28
+}
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!3 = metadata !{metadata !5, metadata !11, metadata !14}
+!5 = metadata !{i32 786478, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
+!6 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9, metadata !10}
+!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = metadata !{i32 786478, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", metadata !6, i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
+!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = metadata !{metadata !10, metadata !9, metadata !10}
+!14 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
+!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{metadata !10}
+!17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19]
+!18 = metadata !{i32 786443, metadata !6, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
+!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
+!20 = metadata !{i32 786465, i64 0, i64 99}       ; [ DW_TAG_subrange_type ] [0, 99]
+!22 = metadata !{i32 19, i32 7, metadata !18, null}
+!23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20]
+!24 = metadata !{i32 20, i32 7, metadata !18, null}
+!25 = metadata !{i32 22, i32 3, metadata !18, null}
+!26 = metadata !{i32 23, i32 9, metadata !18, null}
+!27 = metadata !{i32 24, i32 3, metadata !18, null}
+!28 = metadata !{i32 26, i32 3, metadata !18, null}
+!29 = metadata !{metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build"}
diff --git a/test/DebugInfo/SystemZ/variable-loc.s b/test/DebugInfo/SystemZ/variable-loc.s
new file mode 100644
index 0000000..d439a46
--- /dev/null
+++ b/test/DebugInfo/SystemZ/variable-loc.s
@@ -0,0 +1,340 @@
+# RUN: llvm-mc < %s -triple=s390x-linux-gnu -filetype=obj | llvm-dwarfdump - | FileCheck %s
+#
+# We use both R_390_32 and R_390_64 to encode the dwarf information.
+# Test that they are used correctly.  This uses the assembly output
+# for variable-loc.ll
+#
+# A couple of R_390_32s, both at 0 and elsewhere:
+#
+# CHECK: DW_AT_producer [DW_FORM_strp] ( .debug_str[0x00000000] = "clang version 3.2 ")
+# CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000013] = "simple.c")
+#
+# A couple of R_390_64s similarly:
+#
+# CHECK: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000000)
+# CHECK: DW_AT_high_pc [DW_FORM_addr] (0x0000000000000050)
+
+
+	.file	"test/DebugInfo/SystemZ/variable-loc.ll"
+	.section	.debug_info,"",@progbits
+.Lsection_info:
+	.section	.debug_abbrev,"",@progbits
+.Lsection_abbrev:
+	.section	.debug_aranges,"",@progbits
+	.section	.debug_macinfo,"",@progbits
+	.section	.debug_line,"",@progbits
+.Lsection_line:
+	.section	.debug_loc,"",@progbits
+	.section	.debug_pubtypes,"",@progbits
+	.section	.debug_str,"MS",@progbits,1
+.Linfo_string:
+	.section	.debug_ranges,"",@progbits
+.Ldebug_range:
+	.section	.debug_loc,"",@progbits
+.Lsection_debug_loc:
+	.text
+.Ltext_begin:
+	.data
+	.file	1 "simple.c"
+	.file	2 "<stdin>"
+	.text
+	.globl	main
+	.align	4
+	.type	main,@function
+main:                                   # @main
+	.cfi_startproc
+.Lfunc_begin0:
+	.loc	2 18 0                  # :18:0
+# BB#0:                                 # %entry
+	stmg	%r12, %r15, 96(%r15)
+.Ltmp2:
+	.cfi_offset %r12, -64
+.Ltmp3:
+	.cfi_offset %r13, -56
+.Ltmp4:
+	.cfi_offset %r14, -48
+.Ltmp5:
+	.cfi_offset %r15, -40
+	aghi	%r15, -568
+.Ltmp6:
+	.cfi_def_cfa_offset 728
+	mvhi	564(%r15), 0
+	la	%r13, 164(%r15)
+	lhi	%r12, 100
+	.loc	2 22 3 prologue_end     # :22:3
+.Ltmp7:
+	lgr	%r2, %r13
+	lr	%r3, %r12
+	brasl	%r14, populate_array@PLT
+	.loc	2 23 9                  # :23:9
+	lgr	%r2, %r13
+	lr	%r3, %r12
+	brasl	%r14, sum_array@PLT
+	lr	%r0, %r2
+	st	%r0, 160(%r15)
+	.loc	2 24 3                  # :24:3
+	larl	%r2, .L.str
+	lr	%r3, %r0
+	brasl	%r14, printf@PLT
+	lhi	%r2, 0
+	.loc	2 26 3                  # :26:3
+	lmg	%r12, %r15, 664(%r15)
+	br	%r14
+.Ltmp8:
+.Ltmp9:
+	.size	main, .Ltmp9-main
+.Lfunc_end0:
+	.cfi_endproc
+
+	.type	.L.str,@object          # @.str
+	.section	.rodata.str1.2,"aMS",@progbits,1
+	.align	2
+.L.str:
+	.asciz	 "Total is %d\n"
+	.size	.L.str, 13
+
+	.cfi_sections .debug_frame
+	.text
+.Ltext_end:
+	.data
+.Ldata_end:
+	.text
+.Lsection_end1:
+	.section	.debug_info,"",@progbits
+.L.debug_info_begin0:
+	.long	155                     # Length of Compilation Unit Info
+	.short	2                       # DWARF version number
+	.long	.L.debug_abbrev_begin   # Offset Into Abbrev. Section
+	.byte	8                       # Address Size (in bytes)
+	.byte	1                       # Abbrev [1] 0xb:0x94 DW_TAG_compile_unit
+	.long	.Linfo_string0          # DW_AT_producer
+	.short	12                      # DW_AT_language
+	.long	.Linfo_string1          # DW_AT_name
+	.quad	0                       # DW_AT_low_pc
+	.long	.Lsection_line          # DW_AT_stmt_list
+	.long	.Linfo_string2          # DW_AT_comp_dir
+	.byte	2                       # Abbrev [2] 0x26:0x7 DW_TAG_subprogram
+	.long	.Linfo_string3          # DW_AT_name
+	.byte	2                       # DW_AT_decl_file
+	.byte	4                       # DW_AT_decl_line
+                                        # DW_AT_prototyped
+                                        # DW_AT_external
+	.byte	3                       # Abbrev [3] 0x2d:0xb DW_TAG_subprogram
+	.long	.Linfo_string4          # DW_AT_name
+	.byte	2                       # DW_AT_decl_file
+	.byte	9                       # DW_AT_decl_line
+                                        # DW_AT_prototyped
+	.long	56                      # DW_AT_type
+                                        # DW_AT_external
+	.byte	4                       # Abbrev [4] 0x38:0x7 DW_TAG_base_type
+	.long	.Linfo_string5          # DW_AT_name
+	.byte	5                       # DW_AT_encoding
+	.byte	4                       # DW_AT_byte_size
+	.byte	5                       # Abbrev [5] 0x3f:0xb DW_TAG_subprogram
+	.long	.Linfo_string6          # DW_AT_name
+	.byte	2                       # DW_AT_decl_file
+	.byte	18                      # DW_AT_decl_line
+                                        # DW_AT_prototyped
+	.long	56                      # DW_AT_type
+                                        # DW_AT_external
+                                        # DW_AT_declaration
+	.byte	6                       # Abbrev [6] 0x4a:0x7 DW_TAG_base_type
+	.long	.Linfo_string5          # DW_AT_name
+	.byte	4                       # DW_AT_byte_size
+	.byte	5                       # DW_AT_encoding
+	.byte	7                       # Abbrev [7] 0x51:0x5 DW_TAG_array_type
+	.long	56                      # DW_AT_type
+	.byte	8                       # Abbrev [8] 0x56:0x48 DW_TAG_subprogram
+	.long	63                      # DW_AT_specification
+	.quad	.Lfunc_begin0           # DW_AT_low_pc
+	.quad	.Lfunc_end0             # DW_AT_high_pc
+	.byte	1                       # DW_AT_frame_base
+	.byte	95
+                                        # DW_AT_APPLE_omit_frame_ptr
+	.byte	9                       # Abbrev [9] 0x6d:0x30 DW_TAG_lexical_block
+	.quad	.Ltmp7                  # DW_AT_low_pc
+	.quad	.Ltmp8                  # DW_AT_high_pc
+	.byte	10                      # Abbrev [10] 0x7e:0xf DW_TAG_variable
+	.long	.Linfo_string7          # DW_AT_name
+	.byte	2                       # DW_AT_decl_file
+	.byte	19                      # DW_AT_decl_line
+	.long	81                      # DW_AT_type
+	.byte	3                       # DW_AT_location
+	.byte	145
+	.ascii	 "\244\001"
+	.byte	10                      # Abbrev [10] 0x8d:0xf DW_TAG_variable
+	.long	.Linfo_string8          # DW_AT_name
+	.byte	2                       # DW_AT_decl_file
+	.byte	20                      # DW_AT_decl_line
+	.long	56                      # DW_AT_type
+	.byte	3                       # DW_AT_location
+	.byte	145
+	.ascii	 "\240\001"
+	.byte	0                       # End Of Children Mark
+	.byte	0                       # End Of Children Mark
+	.byte	0                       # End Of Children Mark
+.L.debug_info_end0:
+	.section	.debug_abbrev,"",@progbits
+.L.debug_abbrev_begin:
+	.byte	1                       # Abbreviation Code
+	.byte	17                      # DW_TAG_compile_unit
+	.byte	1                       # DW_CHILDREN_yes
+	.byte	37                      # DW_AT_producer
+	.byte	14                      # DW_FORM_strp
+	.byte	19                      # DW_AT_language
+	.byte	5                       # DW_FORM_data2
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	17                      # DW_AT_low_pc
+	.byte	1                       # DW_FORM_addr
+	.byte	16                      # DW_AT_stmt_list
+	.byte	6                       # DW_FORM_data4
+	.byte	27                      # DW_AT_comp_dir
+	.byte	14                      # DW_FORM_strp
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	2                       # Abbreviation Code
+	.byte	46                      # DW_TAG_subprogram
+	.byte	0                       # DW_CHILDREN_no
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	58                      # DW_AT_decl_file
+	.byte	11                      # DW_FORM_data1
+	.byte	59                      # DW_AT_decl_line
+	.byte	11                      # DW_FORM_data1
+	.byte	39                      # DW_AT_prototyped
+	.byte	25                      # DW_FORM_flag_present
+	.byte	63                      # DW_AT_external
+	.byte	25                      # DW_FORM_flag_present
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	3                       # Abbreviation Code
+	.byte	46                      # DW_TAG_subprogram
+	.byte	0                       # DW_CHILDREN_no
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	58                      # DW_AT_decl_file
+	.byte	11                      # DW_FORM_data1
+	.byte	59                      # DW_AT_decl_line
+	.byte	11                      # DW_FORM_data1
+	.byte	39                      # DW_AT_prototyped
+	.byte	25                      # DW_FORM_flag_present
+	.byte	73                      # DW_AT_type
+	.byte	19                      # DW_FORM_ref4
+	.byte	63                      # DW_AT_external
+	.byte	25                      # DW_FORM_flag_present
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	4                       # Abbreviation Code
+	.byte	36                      # DW_TAG_base_type
+	.byte	0                       # DW_CHILDREN_no
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	62                      # DW_AT_encoding
+	.byte	11                      # DW_FORM_data1
+	.byte	11                      # DW_AT_byte_size
+	.byte	11                      # DW_FORM_data1
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	5                       # Abbreviation Code
+	.byte	46                      # DW_TAG_subprogram
+	.byte	0                       # DW_CHILDREN_no
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	58                      # DW_AT_decl_file
+	.byte	11                      # DW_FORM_data1
+	.byte	59                      # DW_AT_decl_line
+	.byte	11                      # DW_FORM_data1
+	.byte	39                      # DW_AT_prototyped
+	.byte	25                      # DW_FORM_flag_present
+	.byte	73                      # DW_AT_type
+	.byte	19                      # DW_FORM_ref4
+	.byte	63                      # DW_AT_external
+	.byte	25                      # DW_FORM_flag_present
+	.byte	60                      # DW_AT_declaration
+	.byte	25                      # DW_FORM_flag_present
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	6                       # Abbreviation Code
+	.byte	36                      # DW_TAG_base_type
+	.byte	0                       # DW_CHILDREN_no
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	11                      # DW_AT_byte_size
+	.byte	11                      # DW_FORM_data1
+	.byte	62                      # DW_AT_encoding
+	.byte	11                      # DW_FORM_data1
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	7                       # Abbreviation Code
+	.byte	1                       # DW_TAG_array_type
+	.byte	0                       # DW_CHILDREN_no
+	.byte	73                      # DW_AT_type
+	.byte	19                      # DW_FORM_ref4
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	8                       # Abbreviation Code
+	.byte	46                      # DW_TAG_subprogram
+	.byte	1                       # DW_CHILDREN_yes
+	.byte	71                      # DW_AT_specification
+	.byte	19                      # DW_FORM_ref4
+	.byte	17                      # DW_AT_low_pc
+	.byte	1                       # DW_FORM_addr
+	.byte	18                      # DW_AT_high_pc
+	.byte	1                       # DW_FORM_addr
+	.byte	64                      # DW_AT_frame_base
+	.byte	10                      # DW_FORM_block1
+	.ascii	 "\347\177"             # DW_AT_APPLE_omit_frame_ptr
+	.byte	25                      # DW_FORM_flag_present
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	9                       # Abbreviation Code
+	.byte	11                      # DW_TAG_lexical_block
+	.byte	1                       # DW_CHILDREN_yes
+	.byte	17                      # DW_AT_low_pc
+	.byte	1                       # DW_FORM_addr
+	.byte	18                      # DW_AT_high_pc
+	.byte	1                       # DW_FORM_addr
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	10                      # Abbreviation Code
+	.byte	52                      # DW_TAG_variable
+	.byte	0                       # DW_CHILDREN_no
+	.byte	3                       # DW_AT_name
+	.byte	14                      # DW_FORM_strp
+	.byte	58                      # DW_AT_decl_file
+	.byte	11                      # DW_FORM_data1
+	.byte	59                      # DW_AT_decl_line
+	.byte	11                      # DW_FORM_data1
+	.byte	73                      # DW_AT_type
+	.byte	19                      # DW_FORM_ref4
+	.byte	2                       # DW_AT_location
+	.byte	10                      # DW_FORM_block1
+	.byte	0                       # EOM(1)
+	.byte	0                       # EOM(2)
+	.byte	0                       # EOM(3)
+.L.debug_abbrev_end:
+	.section	.debug_aranges,"",@progbits
+	.section	.debug_ranges,"",@progbits
+	.section	.debug_macinfo,"",@progbits
+	.section	.debug_str,"MS",@progbits,1
+.Linfo_string0:
+	.asciz	 "clang version 3.2 "
+.Linfo_string1:
+	.asciz	 "simple.c"
+.Linfo_string2:
+	.asciz	 "/home/timnor01/a64-trunk/build"
+.Linfo_string3:
+	.asciz	 "populate_array"
+.Linfo_string4:
+	.asciz	 "sum_array"
+.Linfo_string5:
+	.asciz	 "int"
+.Linfo_string6:
+	.asciz	 "main"
+.Linfo_string7:
+	.asciz	 "main_arr"
+.Linfo_string8:
+	.asciz	 "val"
+
+	.section	".note.GNU-stack","",@progbits
diff --git a/test/DebugInfo/X86/2010-04-13-PubType.ll b/test/DebugInfo/X86/2010-04-13-PubType.ll
index 5169647..5bebeaa 100644
--- a/test/DebugInfo/X86/2010-04-13-PubType.ll
+++ b/test/DebugInfo/X86/2010-04-13-PubType.ll
@@ -33,7 +33,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 7, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (%struct.X*, %struct.Y*)* @foo, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !17, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{metadata !6, metadata !7, metadata !9}
 !6 = metadata !{i32 786468, metadata !18, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
index d05dfc6..94eba6a 100644
--- a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
+++ b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
@@ -15,7 +15,7 @@ declare void @bar(i32)
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang 2.8", i1 false, metadata !"", i32 0, null, null, metadata !10, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang 2.8", i1 false, metadata !"", i32 0, null, null, metadata !10, metadata !11,  metadata !11, metadata !""} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786471, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201, null} ; [ DW_TAG_constant ]
diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
index ad55db0..7b8d914 100644
--- a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
+++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -18,7 +18,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"clang version 3.0 (trunk)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"clang version 3.0 (trunk)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12,  metadata !12, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10} ; [ DW_TAG_subprogram ]
diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
index e248aa6..5464b87 100644
--- a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
+++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -88,7 +88,7 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 720913, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 146596)", i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 720913, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 146596)", i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !9}
 !5 = metadata !{i32 720898, metadata !82, null, metadata !"bar", i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null} ; [ DW_TAG_class_type ]
diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll
index 84e3f63..dcacba1 100644
--- a/test/DebugInfo/X86/DW_AT_byte_size.ll
+++ b/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -24,7 +24,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 150996)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 150996)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14, i32 3} ; [ DW_TAG_subprogram ]
diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll
index 356360b..6f1aa41 100644
--- a/test/DebugInfo/X86/DW_AT_location-reference.ll
+++ b/test/DebugInfo/X86/DW_AT_location-reference.ll
@@ -49,7 +49,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 define void @f() nounwind {
 entry:
   %call = tail call i32 @g(i32 0, i32 0) nounwind, !dbg !8
-  store i32 %call, i32* @a, align 4, !dbg !8, !tbaa !9
+  store i32 %call, i32* @a, align 4, !dbg !8
   tail call void @llvm.dbg.value(metadata !12, i64 0, metadata !5), !dbg !13
   br label %while.body
 
@@ -63,7 +63,7 @@ while.body:                                       ; preds = %entry, %while.body
 while.end:                                        ; preds = %while.body
   tail call void @llvm.dbg.value(metadata !{i32 %mul}, i64 0, metadata !5), !dbg !14
   %call4 = tail call i32 @g(i32 %mul, i32 0) nounwind, !dbg !15
-  store i32 %call4, i32* @a, align 4, !dbg !15, !tbaa !9
+  store i32 %call4, i32* @a, align 4, !dbg !15
   tail call void @llvm.dbg.value(metadata !16, i64 0, metadata !5), !dbg !17
   br label %while.body9
 
@@ -77,7 +77,7 @@ while.body9:                                      ; preds = %while.end, %while.b
 while.end13:                                      ; preds = %while.body9
   tail call void @llvm.dbg.value(metadata !{i32 %mul12}, i64 0, metadata !5), !dbg !18
   %call15 = tail call i32 @g(i32 0, i32 %mul12) nounwind, !dbg !19
-  store i32 %call15, i32* @a, align 4, !dbg !19, !tbaa !9
+  store i32 %call15, i32* @a, align 4, !dbg !19
   ret void, !dbg !20
 }
 
@@ -89,16 +89,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, metadata !22, i32 4} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !23} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk)", i1 true, metadata !"", i32 0, null, null, metadata !21, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk)", i1 true, metadata !"", i32 0, null, null, metadata !21, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{null}
 !5 = metadata !{i32 786688, metadata !6, metadata !"x", metadata !1, i32 5, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
 !6 = metadata !{i32 786443, metadata !1, metadata !0, i32 4, i32 14, i32 0} ; [ DW_TAG_lexical_block ]
 !7 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !8 = metadata !{i32 6, i32 3, metadata !6, null}
-!9 = metadata !{metadata !"int", metadata !10}
-!10 = metadata !{metadata !"omnipotent char", metadata !11}
-!11 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !12 = metadata !{i32 1}
 !13 = metadata !{i32 7, i32 3, metadata !6, null}
 !14 = metadata !{i32 8, i32 3, metadata !6, null}
diff --git a/test/DebugInfo/X86/DW_AT_object_pointer.ll b/test/DebugInfo/X86/DW_AT_object_pointer.ll
index a3ad26c..789f556 100644
--- a/test/DebugInfo/X86/DW_AT_object_pointer.ll
+++ b/test/DebugInfo/X86/DW_AT_object_pointer.ll
@@ -47,7 +47,7 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.2 (trunk 163586) (llvm/trunk 163570)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/debug-tests/bar.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.2 (trunk 163586) (llvm/trunk 163570)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/debug-tests/bar.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !10, metadata !20}
 !5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3fooi, null, null, metadata !1, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [foo]
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
index 07849f3..93e1ecf 100644
--- a/test/DebugInfo/X86/DW_AT_specification.ll
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -16,7 +16,7 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !27, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !27, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18,  metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16, i32 4} ; [ DW_TAG_subprogram ]
diff --git a/test/DebugInfo/X86/DW_TAG_friend.ll b/test/DebugInfo/X86/DW_TAG_friend.ll
index f60175f..2e23222 100644
--- a/test/DebugInfo/X86/DW_TAG_friend.ll
+++ b/test/DebugInfo/X86/DW_TAG_friend.ll
@@ -17,7 +17,7 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !28, i32 4, metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !28, i32 4, metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !17}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 10, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ]
diff --git a/test/DebugInfo/X86/aligned_stack_var.ll b/test/DebugInfo/X86/aligned_stack_var.ll
index a8f6cca..b99de3c 100644
--- a/test/DebugInfo/X86/aligned_stack_var.ll
+++ b/test/DebugInfo/X86/aligned_stack_var.ll
@@ -26,7 +26,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !"run", metadata !"run", metadata !"_Z3runv", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ]
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
index fadea77..0046730 100644
--- a/test/DebugInfo/X86/block-capture.ll
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -62,7 +62,7 @@ declare i32 @__objc_personality_v0(...)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!35, !36, !37, !38}
 
-!0 = metadata !{i32 786449, metadata !63, i32 16, metadata !"clang version 3.1 (trunk 151227)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !63, i32 16, metadata !"clang version 3.1 (trunk 151227)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34}
 !5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 5} ; [ DW_TAG_subprogram ]
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index 48e1def..3b9aefc 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -34,7 +34,7 @@ declare void @_Z8moz_freePv(i8*)
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !59, i32 4, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !59, i32 4, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47,  metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31}
 !5 = metadata !{i32 720942, metadata !6, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ]
diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
index e7a554f..da6423f 100644
--- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
+++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
@@ -22,10 +22,10 @@ entry:
   tail call void @llvm.dbg.value(metadata !{%struct.S1* %sp}, i64 0, metadata !9), !dbg !20
   tail call void @llvm.dbg.value(metadata !{i32 %nums}, i64 0, metadata !18), !dbg !21
   %tmp2 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 1, !dbg !22
-  store i32 %nums, i32* %tmp2, align 4, !dbg !22, !tbaa !24
+  store i32 %nums, i32* %tmp2, align 4, !dbg !22
   %call = tail call float* @bar(i32 %nums) nounwind optsize, !dbg !27
   %tmp5 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 0, !dbg !27
-  store float* %call, float** %tmp5, align 8, !dbg !27, !tbaa !28
+  store float* %call, float** %tmp5, align 8, !dbg !27
   %cmp = icmp ne float* %call, null, !dbg !29
   %cond = zext i1 %cmp to i32, !dbg !29
   ret i32 %cond, !dbg !29
@@ -37,9 +37,9 @@ define void @foobar() nounwind optsize ssp {
 entry:
   tail call void @llvm.dbg.value(metadata !30, i64 0, metadata !9) nounwind, !dbg !31
   tail call void @llvm.dbg.value(metadata !34, i64 0, metadata !18) nounwind, !dbg !35
-  store i32 1, i32* getelementptr inbounds (%struct.S1* @p, i64 0, i32 1), align 8, !dbg !36, !tbaa !24
+  store i32 1, i32* getelementptr inbounds (%struct.S1* @p, i64 0, i32 1), align 8, !dbg !36
   %call.i = tail call float* @bar(i32 1) nounwind optsize, !dbg !37
-  store float* %call.i, float** getelementptr inbounds (%struct.S1* @p, i64 0, i32 0), align 8, !dbg !37, !tbaa !28
+  store float* %call.i, float** getelementptr inbounds (%struct.S1* @p, i64 0, i32 0), align 8, !dbg !37
   ret void, !dbg !38
 }
 
@@ -49,7 +49,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo, null, null, metadata !41, i32 8} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !42} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !42, i32 12, metadata !"clang version 2.9 (trunk 125693)", i1 true, metadata !"", i32 0, null, null, metadata !39, metadata !40, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !42, i32 12, metadata !"clang version 2.9 (trunk 125693)", i1 true, metadata !"", i32 0, null, null, metadata !39, metadata !40,  metadata !40, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
@@ -71,11 +71,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !21 = metadata !{i32 7, i32 21, metadata !0, null}
 !22 = metadata !{i32 9, i32 3, metadata !23, null}
 !23 = metadata !{i32 786443, metadata !1, metadata !0, i32 8, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!24 = metadata !{metadata !"int", metadata !25}
-!25 = metadata !{metadata !"omnipotent char", metadata !26}
-!26 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !27 = metadata !{i32 10, i32 3, metadata !23, null}
-!28 = metadata !{metadata !"any pointer", metadata !25}
 !29 = metadata !{i32 11, i32 3, metadata !23, null}
 !30 = metadata !{%struct.S1* @p}
 !31 = metadata !{i32 7, i32 13, metadata !0, metadata !32}
diff --git a/test/DebugInfo/X86/debug-info-block-captured-self.ll b/test/DebugInfo/X86/debug-info-block-captured-self.ll
index 77e02c6..7e318f6 100644
--- a/test/DebugInfo/X86/debug-info-block-captured-self.ll
+++ b/test/DebugInfo/X86/debug-info-block-captured-self.ll
@@ -77,7 +77,7 @@ define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_
 }
 
 !llvm.dbg.cu = !{!0}
-!0 = metadata !{i32 786449, i32 16, metadata !1, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !4, metadata !23, metadata !15, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m] [DW_LANG_ObjC]
+!0 = metadata !{i32 786449, i32 16, metadata !1, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !4, metadata !23, metadata !15,  metadata !15, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m] [DW_LANG_ObjC]
 !1 = metadata !{i32 786473, metadata !107} ; [ DW_TAG_file_type ]
 !2 = metadata !{metadata !3}
 !3 = metadata !{i32 786436, metadata !107, null, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [from ]
diff --git a/test/DebugInfo/X86/debug-info-blocks.ll b/test/DebugInfo/X86/debug-info-blocks.ll
index 36ab611..ae95033 100644
--- a/test/DebugInfo/X86/debug-info-blocks.ll
+++ b/test/DebugInfo/X86/debug-info-blocks.ll
@@ -260,7 +260,7 @@ attributes #3 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!56, !57, !58, !59}
 
-!0 = metadata !{i32 786449, metadata !1, i32 16, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !3, metadata !12, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/<unknown>] [DW_LANG_ObjC]
+!0 = metadata !{i32 786449, metadata !1, i32 16, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !3, metadata !12, metadata !2,  metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/<unknown>] [DW_LANG_ObjC]
 !1 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/<unknown>", metadata !"llvm/_build.ninja.Debug"}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
diff --git a/test/DebugInfo/X86/debug-info-static-member.ll b/test/DebugInfo/X86/debug-info-static-member.ll
index 50a2b3f..33485b6 100644
--- a/test/DebugInfo/X86/debug-info-static-member.ll
+++ b/test/DebugInfo/X86/debug-info-static-member.ll
@@ -58,7 +58,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.3 (trunk 171914)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !10, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/projects/upstream/static-member/test/debug-info-static-member.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.3 (trunk 171914)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !10,  metadata !10, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/projects/upstream/static-member/test/debug-info-static-member.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 23} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 23] [main]
diff --git a/test/DebugInfo/X86/elf-names.ll b/test/DebugInfo/X86/elf-names.ll
index 30e8c2e..7bc532e 100644
--- a/test/DebugInfo/X86/elf-names.ll
+++ b/test/DebugInfo/X86/elf-names.ll
@@ -21,13 +21,13 @@ define void @_ZN1DC2Ev(%class.D* nocapture %this) unnamed_addr nounwind uwtable
 entry:
   tail call void @llvm.dbg.value(metadata !{%class.D* %this}, i64 0, metadata !29), !dbg !36
   %c1 = getelementptr inbounds %class.D* %this, i64 0, i32 0, !dbg !37
-  store i32 1, i32* %c1, align 4, !dbg !37, !tbaa !39
+  store i32 1, i32* %c1, align 4, !dbg !37
   %c2 = getelementptr inbounds %class.D* %this, i64 0, i32 1, !dbg !42
-  store i32 2, i32* %c2, align 4, !dbg !42, !tbaa !39
+  store i32 2, i32* %c2, align 4, !dbg !42
   %c3 = getelementptr inbounds %class.D* %this, i64 0, i32 2, !dbg !43
-  store i32 3, i32* %c3, align 4, !dbg !43, !tbaa !39
+  store i32 3, i32* %c3, align 4, !dbg !43
   %c4 = getelementptr inbounds %class.D* %this, i64 0, i32 3, !dbg !44
-  store i32 4, i32* %c4, align 4, !dbg !44, !tbaa !39
+  store i32 4, i32* %c4, align 4, !dbg !44
   ret void, !dbg !45
 }
 
@@ -36,21 +36,21 @@ entry:
   tail call void @llvm.dbg.value(metadata !{%class.D* %this}, i64 0, metadata !34), !dbg !46
   tail call void @llvm.dbg.value(metadata !{%class.D* %d}, i64 0, metadata !35), !dbg !46
   %c1 = getelementptr inbounds %class.D* %d, i64 0, i32 0, !dbg !47
-  %0 = load i32* %c1, align 4, !dbg !47, !tbaa !39
+  %0 = load i32* %c1, align 4, !dbg !47
   %c12 = getelementptr inbounds %class.D* %this, i64 0, i32 0, !dbg !47
-  store i32 %0, i32* %c12, align 4, !dbg !47, !tbaa !39
+  store i32 %0, i32* %c12, align 4, !dbg !47
   %c2 = getelementptr inbounds %class.D* %d, i64 0, i32 1, !dbg !49
-  %1 = load i32* %c2, align 4, !dbg !49, !tbaa !39
+  %1 = load i32* %c2, align 4, !dbg !49
   %c23 = getelementptr inbounds %class.D* %this, i64 0, i32 1, !dbg !49
-  store i32 %1, i32* %c23, align 4, !dbg !49, !tbaa !39
+  store i32 %1, i32* %c23, align 4, !dbg !49
   %c3 = getelementptr inbounds %class.D* %d, i64 0, i32 2, !dbg !50
-  %2 = load i32* %c3, align 4, !dbg !50, !tbaa !39
+  %2 = load i32* %c3, align 4, !dbg !50
   %c34 = getelementptr inbounds %class.D* %this, i64 0, i32 2, !dbg !50
-  store i32 %2, i32* %c34, align 4, !dbg !50, !tbaa !39
+  store i32 %2, i32* %c34, align 4, !dbg !50
   %c4 = getelementptr inbounds %class.D* %d, i64 0, i32 3, !dbg !51
-  %3 = load i32* %c4, align 4, !dbg !51, !tbaa !39
+  %3 = load i32* %c4, align 4, !dbg !51
   %c45 = getelementptr inbounds %class.D* %this, i64 0, i32 3, !dbg !51
-  store i32 %3, i32* %c45, align 4, !dbg !51, !tbaa !39
+  store i32 %3, i32* %c45, align 4, !dbg !51
   ret void, !dbg !52
 }
 
@@ -58,7 +58,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !53, i32 4, metadata !"clang version 3.2 (trunk 167506) (llvm/trunk 167505)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !53, i32 4, metadata !"clang version 3.2 (trunk 167506) (llvm/trunk 167505)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !31}
 !5 = metadata !{i32 786478, metadata !6, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2Ev", i32 12, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*)* @_ZN1DC2Ev, null, metadata !17, metadata !27, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [D]
@@ -95,9 +95,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !36 = metadata !{i32 12, i32 0, metadata !5, null}
 !37 = metadata !{i32 13, i32 0, metadata !38, null}
 !38 = metadata !{i32 786443, metadata !5, i32 12, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/foo.cpp]
-!39 = metadata !{metadata !"int", metadata !40}
-!40 = metadata !{metadata !"omnipotent char", metadata !41}
-!41 = metadata !{metadata !"Simple C/C++ TBAA"}
 !42 = metadata !{i32 14, i32 0, metadata !38, null}
 !43 = metadata !{i32 15, i32 0, metadata !38, null}
 !44 = metadata !{i32 16, i32 0, metadata !38, null}
diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll
index 6e59915..ce3035e 100644
--- a/test/DebugInfo/X86/empty-and-one-elem-array.ll
+++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll
@@ -59,7 +59,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/test.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/test.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"func", metadata !"func", metadata !"", i32 11, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @func, null, null, metadata !1, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [func]
diff --git a/test/DebugInfo/X86/empty-array.ll b/test/DebugInfo/X86/empty-array.ll
index ace1156..1f46281 100644
--- a/test/DebugInfo/X86/empty-array.ll
+++ b/test/DebugInfo/X86/empty-array.ll
@@ -24,7 +24,7 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
diff --git a/test/DebugInfo/X86/ending-run.ll b/test/DebugInfo/X86/ending-run.ll
index 6de15f6..b0156b8 100644
--- a/test/DebugInfo/X86/ending-run.ll
+++ b/test/DebugInfo/X86/ending-run.ll
@@ -28,7 +28,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !19, metadata !"callee", metadata !"callee", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ]
diff --git a/test/DebugInfo/X86/enum-class.ll b/test/DebugInfo/X86/enum-class.ll
index 2272811..af6129c 100644
--- a/test/DebugInfo/X86/enum-class.ll
+++ b/test/DebugInfo/X86/enum-class.ll
@@ -7,7 +7,7 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !22, i32 4, metadata !"clang version 3.2 (trunk 157269) (llvm/trunk 157264)", i1 false, metadata !"", i32 0, metadata !1, metadata !15, metadata !15, metadata !17, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !22, i32 4, metadata !"clang version 3.2 (trunk 157269) (llvm/trunk 157264)", i1 false, metadata !"", i32 0, metadata !1, metadata !15, metadata !15, metadata !17,  metadata !17, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !3, metadata !8, metadata !12}
 !3 = metadata !{i32 786436, metadata !4, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
 !4 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
diff --git a/test/DebugInfo/X86/enum-fwd-decl.ll b/test/DebugInfo/X86/enum-fwd-decl.ll
index 33d807e..f4ff8b4 100644
--- a/test/DebugInfo/X86/enum-fwd-decl.ll
+++ b/test/DebugInfo/X86/enum-fwd-decl.ll
@@ -5,7 +5,7 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165274) (llvm/trunk 165272)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165274) (llvm/trunk 165272)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"e", metadata !"e", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i16* @e, null} ; [ DW_TAG_variable ] [e] [line 2] [def]
diff --git a/test/DebugInfo/X86/fission-cu.ll b/test/DebugInfo/X86/fission-cu.ll
index bfe2d17..8ad3c2d 100644
--- a/test/DebugInfo/X86/fission-cu.ll
+++ b/test/DebugInfo/X86/fission-cu.ll
@@ -1,11 +1,12 @@
 ; RUN: llc -split-dwarf=Enable -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
 ; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck %s
+; RUN: llvm-readobj --relocations %t | FileCheck --check-prefix=OBJ %s
 
 @a = common global i32 0, align 4
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.3 (trunk 169021) (llvm/trunk 169020)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !"baz.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.3 (trunk 169021) (llvm/trunk 169020)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !"baz.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
@@ -48,9 +49,9 @@
 ; CHECK: DW_AT_producer  DW_FORM_GNU_str_index
 ; CHECK: DW_AT_language  DW_FORM_data2
 ; CHECK: DW_AT_name      DW_FORM_GNU_str_index
-; CHECK: DW_AT_low_pc    DW_FORM_GNU_addr_index
-; CHECK: DW_AT_stmt_list DW_FORM_data4
-; CHECK: DW_AT_comp_dir  DW_FORM_GNU_str_index
+; CHECK-NOT: DW_AT_low_pc
+; CHECK-NOT: DW_AT_stmt_list
+; CHECK-NOT: DW_AT_comp_dir
 ; CHECK: DW_AT_GNU_dwo_id        DW_FORM_data8
 
 ; CHECK: [2] DW_TAG_base_type    DW_CHILDREN_no
@@ -72,29 +73,40 @@
 ; CHECK: DW_AT_producer [DW_FORM_GNU_str_index] ( indexed (00000000) string = "clang version 3.3 (trunk 169021) (llvm/trunk 169020)")
 ; CHECK: DW_AT_language [DW_FORM_data2]        (0x000c)
 ; CHECK: DW_AT_name [DW_FORM_GNU_str_index]    ( indexed (00000001) string = "baz.c")
-; CHECK: DW_AT_low_pc [DW_FORM_GNU_addr_index]     ( indexed (00000000) address = 0x0000000000000000)
+; CHECK-NOT: DW_AT_low_pc
+; CHECK-NOT: DW_AT_stmt_list
+; CHECK-NOT: DW_AT_comp_dir
 ; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8]  (0x0000000000000000)
 ; CHECK: DW_TAG_base_type
-; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000004) string = "int")
+; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000003) string = "int")
 ; CHECK: DW_TAG_variable
-; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000003) string = "a")
-; CHECK: DW_AT_type [DW_FORM_ref4]       (cu + 0x001e => {0x0000001e})
+; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000002) string = "a")
+; CHECK: DW_AT_type [DW_FORM_ref4]       (cu + 0x0018 => {0x00000018})
 ; CHECK: DW_AT_external [DW_FORM_flag_present]   (true)
 ; CHECK: DW_AT_decl_file [DW_FORM_data1] (0x01)
 ; CHECK: DW_AT_decl_line [DW_FORM_data1] (0x01)
-; CHECK: DW_AT_location [DW_FORM_block1] (<0x02> fb 01 )
+; CHECK: DW_AT_location [DW_FORM_block1] (<0x02> fb 00 )
 
 
 ; CHECK: .debug_str.dwo contents:
 ; CHECK: 0x00000000: "clang version 3.3 (trunk 169021) (llvm/trunk 169020)"
 ; CHECK: 0x00000035: "baz.c"
-; CHECK: 0x0000003b: "/usr/local/google/home/echristo/tmp"
-; CHECK: 0x0000005f: "a"
-; CHECK: 0x00000061: "int"
+; CHECK: 0x0000003b: "a"
+; CHECK: 0x0000003d: "int"
 
 ; CHECK: .debug_str_offsets.dwo contents:
 ; CHECK: 0x00000000: 00000000
 ; CHECK: 0x00000004: 00000035
 ; CHECK: 0x00000008: 0000003b
-; CHECK: 0x0000000c: 0000005f
-; CHECK: 0x00000010: 00000061
+; CHECK: 0x0000000c: 0000003d
+
+; Object file checks
+; For x86-64-linux we should have this set of relocations for the debug info section
+;
+; OBJ: .debug_info
+; OBJ-NEXT: R_X86_64_32 .debug_abbrev
+; OBJ-NEXT: R_X86_64_32 .debug_str
+; OBJ-NEXT: R_X86_64_32 .debug_addr
+; OBJ-NEXT: R_X86_64_32 .debug_line
+; OBJ-NEXT: R_X86_64_32 .debug_str
+; OBJ-NEXT: }
diff --git a/test/DebugInfo/X86/instcombine-instrinsics.ll b/test/DebugInfo/X86/instcombine-instrinsics.ll
new file mode 100644
index 0000000..4466828
--- /dev/null
+++ b/test/DebugInfo/X86/instcombine-instrinsics.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s -O2 -S | FileCheck %s
+; Verify that we emit the same intrinsic at most once.
+; CHECK: call void @llvm.dbg.value(metadata !{%struct.i14** %i14}
+; CHECK-NOT: call void @llvm.dbg.value(metadata !{%struct.i14** %i14}
+; CHECK: ret
+
+;*** IR Dump After Dead Argument Elimination ***
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.i3 = type { i32 }
+%struct.i14 = type { i32 }
+%struct.i24 = type opaque
+
+define %struct.i3* @barz(i64 %i9) nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.cond
+  br label %while.cond
+}
+
+declare void @llvm.dbg.declare(metadata, metadata)
+
+define void @init() nounwind {
+entry:
+  %i14 = alloca %struct.i14*, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.i14** %i14}, metadata !25)
+  store %struct.i14* null, %struct.i14** %i14, align 8
+  %call = call i32 @foo(i8* bitcast (void ()* @bar to i8*), %struct.i14** %i14)
+  %0 = load %struct.i14** %i14, align 8
+  %i16 = getelementptr inbounds %struct.i14* %0, i32 0, i32 0
+  %1 = load i32* %i16, align 4
+  %or = or i32 %1, 4
+  store i32 %or, i32* %i16, align 4
+  %call4 = call i32 @foo(i8* bitcast (void ()* @baz to i8*), %struct.i14** %i14)
+  ret void
+}
+
+declare i32 @foo(i8*, %struct.i14**) nounwind
+
+define internal void @bar() nounwind {
+entry:
+  %i9 = alloca i64, align 8
+  store i64 0, i64* %i9, align 8
+  %call = call i32 @put(i64 0, i64* %i9, i64 0, %struct.i24* null)
+  ret void
+}
+
+define internal void @baz() nounwind {
+entry:
+  ret void
+}
+
+declare i32 @put(i64, i64*, i64, %struct.i24*) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !48, metadata !""} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !"i1", metadata !""}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !21, metadata !33, metadata !47}
+!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"i2", metadata !"i2", metadata !"", i32 31, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, %struct.i3* (i64)* @barz, null, null, metadata !16, i32 32} ; [ DW_TAG_subprogram ] [line 31]  [scope 32]
+!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ]
+!6 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = metadata !{metadata !8, metadata !13}
+!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from i3]
+!9 = metadata !{i32 786451, metadata !1, null, metadata !"i3", i32 25, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_structure_type ]  [line 25, size 32, align 32, offset 0] [from ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"i4", i32 26, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]  [line 26, size 32, align 32, offset 0] [from i5]
+!12 = metadata !{i32 786468, null, null, metadata !"i5", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]  [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!13 = metadata !{i32 786454, metadata !1, null, metadata !"i6", i32 5, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ]  [line 5, size 0, align 0, offset 0] [from i7]
+!14 = metadata !{i32 786454, metadata !1, null, metadata !"i7", i32 2, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_typedef ]  [line 2, size 0, align 0, offset 0] [from i8]
+!15 = metadata !{i32 786468, null, null, metadata !"i8", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]  [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!16 = metadata !{}
+!21 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"i13", metadata !"i13", metadata !"", i32 42, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @init, null, null, metadata !24, i32 43} ; [ DW_TAG_subprogram ] [line 42]  [scope 43]
+!22 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = metadata !{null}
+!24 = metadata !{metadata !25}
+!25 = metadata !{i32 786688, metadata !21, metadata !"i14", metadata !5, i32 45, metadata !27, i32 0, i32 0} ; [ DW_TAG_auto_variable ]  [line 45]
+!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !28} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from i14]
+!28 = metadata !{i32 786451, metadata !1, null, metadata !"i14", i32 16, i64 32, i64 32, i32 0, i32 0, null, metadata !29, i32 0, null, null} ; [ DW_TAG_structure_type ]  [line 16, size 32, align 32, offset 0] [from ]
+!29 = metadata !{metadata !30}
+!30 = metadata !{i32 786445, metadata !1, metadata !28, metadata !"i16", i32 17, i64 32, i64 32, i64 0, i32 0, metadata !31} ; [ DW_TAG_member ]  [line 17, size 32, align 32, offset 0] [from i17]
+!31 = metadata !{i32 786454, metadata !1, null, metadata !"i17", i32 7, i64 0, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_typedef ]  [line 7, size 0, align 0, offset 0] [from int]
+!32 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]  [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!33 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"i18", metadata !"i18", metadata !"", i32 54, metadata !22, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @bar, null, null, metadata !34, i32 55} ; [ DW_TAG_subprogram ] [line 54]   [scope 55]
+!34 = metadata !{null}
+!47 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"i29", metadata !"i29", metadata !"", i32 53, metadata !22, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @baz, null, null, metadata !2, i32 53} ; [ DW_TAG_subprogram ] [line 53]
+!48 = metadata !{metadata !49}
+!49 = metadata !{i32 786484, i32 0, metadata !21, metadata !"i30", metadata !"i30", metadata !"", metadata !5, i32 44, metadata !50, i32 1, i32 1, null, null}
+!50 = metadata !{i32 786454, metadata !1, null, metadata !"i31", i32 6, i64 0, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_typedef ]  [line 6, size 0, align 0, offset 0] [from int]
+!52 = metadata !{i64 0}
+!55 = metadata !{%struct.i3* null}
+!72 = metadata !{%struct.i24* null}
diff --git a/test/DebugInfo/X86/line-info.ll b/test/DebugInfo/X86/line-info.ll
index 0c0a7ab..fd813b31 100644
--- a/test/DebugInfo/X86/line-info.ll
+++ b/test/DebugInfo/X86/line-info.ll
@@ -37,7 +37,7 @@ attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/list0.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2,  metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/list0.c] [DW_LANG_C99]
 !1 = metadata !{metadata !"list0.c", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4, metadata !10}
diff --git a/test/DebugInfo/X86/linkage-name.ll b/test/DebugInfo/X86/linkage-name.ll
index 9440f3a..c9bd2cf 100644
--- a/test/DebugInfo/X86/linkage-name.ll
+++ b/test/DebugInfo/X86/linkage-name.ll
@@ -26,7 +26,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18,  metadata !18, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, metadata !16, i32 5} ; [ DW_TAG_subprogram ]
diff --git a/test/DebugInfo/X86/low-pc-cu.ll b/test/DebugInfo/X86/low-pc-cu.ll
index 4dd5aaf..77f69b9 100644
--- a/test/DebugInfo/X86/low-pc-cu.ll
+++ b/test/DebugInfo/X86/low-pc-cu.ll
@@ -14,7 +14,7 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !12}
 !5 = metadata !{i32 786478, metadata !"_Z1qv", i32 0, metadata !6, metadata !"q", metadata !"q", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10} ; [ DW_TAG_subprogram ]
diff --git a/test/DebugInfo/X86/misched-dbg-value.ll b/test/DebugInfo/X86/misched-dbg-value.ll
index 0980e23..4b78c88 100644
--- a/test/DebugInfo/X86/misched-dbg-value.ll
+++ b/test/DebugInfo/X86/misched-dbg-value.ll
@@ -43,15 +43,15 @@ entry:
   tail call void @llvm.dbg.value(metadata !{i32 %add}, i64 0, metadata !27), !dbg !68
   %idxprom = sext i32 %add to i64, !dbg !69
   %arrayidx = getelementptr inbounds i32* %Array1Par, i64 %idxprom, !dbg !69
-  store i32 %IntParI2, i32* %arrayidx, align 4, !dbg !69, !tbaa !70
+  store i32 %IntParI2, i32* %arrayidx, align 4, !dbg !69
   %add3 = add nsw i32 %IntParI1, 6, !dbg !73
   %idxprom4 = sext i32 %add3 to i64, !dbg !73
   %arrayidx5 = getelementptr inbounds i32* %Array1Par, i64 %idxprom4, !dbg !73
-  store i32 %IntParI2, i32* %arrayidx5, align 4, !dbg !73, !tbaa !70
+  store i32 %IntParI2, i32* %arrayidx5, align 4, !dbg !73
   %add6 = add nsw i32 %IntParI1, 35, !dbg !74
   %idxprom7 = sext i32 %add6 to i64, !dbg !74
   %arrayidx8 = getelementptr inbounds i32* %Array1Par, i64 %idxprom7, !dbg !74
-  store i32 %add, i32* %arrayidx8, align 4, !dbg !74, !tbaa !70
+  store i32 %add, i32* %arrayidx8, align 4, !dbg !74
   tail call void @llvm.dbg.value(metadata !{i32 %add}, i64 0, metadata !28), !dbg !75
   br label %for.body, !dbg !75
 
@@ -59,7 +59,7 @@ for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %idxprom, %entry ], [ %indvars.iv.next, %for.body ]
   %IntIndex.046 = phi i32 [ %add, %entry ], [ %inc, %for.body ]
   %arrayidx13 = getelementptr inbounds [51 x i32]* %Array2Par, i64 %idxprom, i64 %indvars.iv, !dbg !77
-  store i32 %add, i32* %arrayidx13, align 4, !dbg !77, !tbaa !70
+  store i32 %add, i32* %arrayidx13, align 4, !dbg !77
   %inc = add nsw i32 %IntIndex.046, 1, !dbg !75
   tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !28), !dbg !75
   %cmp = icmp sgt i32 %inc, %add3, !dbg !75
@@ -70,15 +70,15 @@ for.end:                                          ; preds = %for.body
   %sub = add nsw i32 %IntParI1, 4, !dbg !78
   %idxprom14 = sext i32 %sub to i64, !dbg !78
   %arrayidx17 = getelementptr inbounds [51 x i32]* %Array2Par, i64 %idxprom, i64 %idxprom14, !dbg !78
-  %0 = load i32* %arrayidx17, align 4, !dbg !78, !tbaa !70
+  %0 = load i32* %arrayidx17, align 4, !dbg !78
   %inc18 = add nsw i32 %0, 1, !dbg !78
-  store i32 %inc18, i32* %arrayidx17, align 4, !dbg !78, !tbaa !70
-  %1 = load i32* %arrayidx, align 4, !dbg !79, !tbaa !70
+  store i32 %inc18, i32* %arrayidx17, align 4, !dbg !78
+  %1 = load i32* %arrayidx, align 4, !dbg !79
   %add22 = add nsw i32 %IntParI1, 25, !dbg !79
   %idxprom23 = sext i32 %add22 to i64, !dbg !79
   %arrayidx25 = getelementptr inbounds [51 x i32]* %Array2Par, i64 %idxprom23, i64 %idxprom, !dbg !79
-  store i32 %1, i32* %arrayidx25, align 4, !dbg !79, !tbaa !70
-  store i32 5, i32* @IntGlob, align 4, !dbg !80, !tbaa !70
+  store i32 %1, i32* %arrayidx25, align 4, !dbg !79
+  store i32 5, i32* @IntGlob, align 4, !dbg !80
   ret void, !dbg !81
 }
 
@@ -89,7 +89,7 @@ attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 12, metadata !3, metadata !"clang version 3.3 (trunk 175015)", i1 true, metadata !"", i32 0, metadata !1, metadata !10, metadata !11, metadata !29, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, i32 12, metadata !3, metadata !"clang version 3.3 (trunk 175015)", i1 true, metadata !"", i32 0, metadata !1, metadata !10, metadata !11, metadata !29,  metadata !29, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c] [DW_LANG_C99]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 786436, metadata !82, null, metadata !"", i32 128, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 128, size 32, align 32, offset 0] [from ]
 !3 = metadata !{i32 786473, metadata !82} ; [ DW_TAG_file_type ]
@@ -159,9 +159,6 @@ attributes #1 = { nounwind readnone }
 !67 = metadata !{i32 184, i32 0, metadata !12, null}
 !68 = metadata !{i32 189, i32 0, metadata !12, null}
 !69 = metadata !{i32 190, i32 0, metadata !12, null}
-!70 = metadata !{metadata !"int", metadata !71}
-!71 = metadata !{metadata !"omnipotent char", metadata !72}
-!72 = metadata !{metadata !"Simple C/C++ TBAA"}
 !73 = metadata !{i32 191, i32 0, metadata !12, null}
 !74 = metadata !{i32 192, i32 0, metadata !12, null}
 !75 = metadata !{i32 193, i32 0, metadata !76, null}
diff --git a/test/DebugInfo/X86/multiple-at-const-val.ll b/test/DebugInfo/X86/multiple-at-const-val.ll
index f6ca10b..7779d1e 100644
--- a/test/DebugInfo/X86/multiple-at-const-val.ll
+++ b/test/DebugInfo/X86/multiple-at-const-val.ll
@@ -31,7 +31,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !961, metadata !"clang version 3.3 (trunk 174207)", i1 true, metadata !"", i32 0, metadata !1, metadata !955, metadata !956, metadata !1786, metadata !""} ; [ DW_TAG_compile_unit ] [/privite/tmp/student2.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, i32 4, metadata !961, metadata !"clang version 3.3 (trunk 174207)", i1 true, metadata !"", i32 0, metadata !1, metadata !955, metadata !956, metadata !1786,  metadata !1786, metadata !""} ; [ DW_TAG_compile_unit ] [/privite/tmp/student2.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !26}
 !4 = metadata !{i32 786489, null, metadata !"std", metadata !5, i32 48} ; [ DW_TAG_namespace ]
 !5 = metadata !{i32 786473, metadata !1801} ; [ DW_TAG_file_type ]
diff --git a/test/DebugInfo/X86/nondefault-subrange-array.ll b/test/DebugInfo/X86/nondefault-subrange-array.ll
index 33a6f8b..a5f786c 100644
--- a/test/DebugInfo/X86/nondefault-subrange-array.ll
+++ b/test/DebugInfo/X86/nondefault-subrange-array.ll
@@ -27,7 +27,7 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
diff --git a/test/DebugInfo/X86/objc-fwd-decl.ll b/test/DebugInfo/X86/objc-fwd-decl.ll
index 1847d2c..3070ff8 100644
--- a/test/DebugInfo/X86/objc-fwd-decl.ll
+++ b/test/DebugInfo/X86/objc-fwd-decl.ll
@@ -12,7 +12,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10, !11, !12}
 
-!0 = metadata !{i32 786449, metadata !13, i32 16, metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !13, i32 16, metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, %0** @a, null} ; [ DW_TAG_variable ]
diff --git a/test/DebugInfo/X86/op_deref.ll b/test/DebugInfo/X86/op_deref.ll
index 3bb93e7..c3580a7 100644
--- a/test/DebugInfo/X86/op_deref.ll
+++ b/test/DebugInfo/X86/op_deref.ll
@@ -59,7 +59,7 @@ declare void @llvm.stackrestore(i8*) nounwind
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.2 (trunk 156005) (llvm/trunk 156000)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.2 (trunk 156005) (llvm/trunk 156000)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !"testVLAwithSize", metadata !"testVLAwithSize", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @testVLAwithSize, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ]
diff --git a/test/DebugInfo/X86/pointer-type-size.ll b/test/DebugInfo/X86/pointer-type-size.ll
index aa56058..b065353 100644
--- a/test/DebugInfo/X86/pointer-type-size.ll
+++ b/test/DebugInfo/X86/pointer-type-size.ll
@@ -10,7 +10,7 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 147882)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 147882)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720948, i32 0, null, metadata !"crass", metadata !"crass", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %struct.crass* @crass, null} ; [ DW_TAG_variable ]
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
index 61df4ad..54e0c8b 100644
--- a/test/DebugInfo/X86/pr11300.ll
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -31,7 +31,7 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !32, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !32, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !20}
 !5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !18, i32 4} ; [ DW_TAG_subprogram ]
diff --git a/test/DebugInfo/X86/pr13303.ll b/test/DebugInfo/X86/pr13303.ll
index 3495623..63ddfa7 100644
--- a/test/DebugInfo/X86/pr13303.ll
+++ b/test/DebugInfo/X86/pr13303.ll
@@ -14,7 +14,7 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 160143)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 160143)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
diff --git a/test/DebugInfo/X86/prologue-stack.ll b/test/DebugInfo/X86/prologue-stack.ll
index 6e49177..00ee7a0 100644
--- a/test/DebugInfo/X86/prologue-stack.ll
+++ b/test/DebugInfo/X86/prologue-stack.ll
@@ -20,7 +20,7 @@ declare i32 @callme(i32)
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 164980) (llvm/trunk 164979)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 164980) (llvm/trunk 164979)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !"isel_line_test2", metadata !"isel_line_test2", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @isel_line_test2, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [isel_line_test2]
diff --git a/test/DebugInfo/X86/rvalue-ref.ll b/test/DebugInfo/X86/rvalue-ref.ll
index ae2e3d4..b5aa4f64 100644
--- a/test/DebugInfo/X86/rvalue-ref.ll
+++ b/test/DebugInfo/X86/rvalue-ref.ll
@@ -22,7 +22,7 @@ declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooOi", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @_Z3fooOi, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
diff --git a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
index 39a026c..620478a 100644
--- a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
+++ b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -O0 %s -mtriple=x86_64-apple-darwin -filetype=obj -o %t
 ; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=x86_64-apple-macosx10.7 | FileCheck %s -check-prefix=ASM
 
 ; rdar://13067005
 ; CHECK: .debug_info contents:
@@ -20,6 +21,11 @@
 ; CHECK: file_names[  1]    0 0x00000000 0x00000000 simple2.c
 ; CHECK-NOT: file_names
 
+; PR15408
+; ASM: L__DWARF__debug_info_begin0:
+; ASM: .long   0                       ## DW_AT_stmt_list
+; ASM: L__DWARF__debug_info_begin1:
+; ASM: .long   0                       ## DW_AT_stmt_list
 define i32 @test(i32 %a) nounwind uwtable ssp {
 entry:
   %a.addr = alloca i32, align 4
@@ -42,7 +48,7 @@ entry:
 }
 
 !llvm.dbg.cu = !{!0, !10}
-!0 = metadata !{i32 786449, metadata !23, i32 12, metadata !"clang version 3.3", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !23, i32 12, metadata !"clang version 3.3", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{metadata !2}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
@@ -51,7 +57,7 @@ entry:
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
 !8 = metadata !{metadata !9, metadata !9}
 !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786449, metadata !24, i32 12, metadata !"clang version 3.3 (trunk 172862)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !11, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!10 = metadata !{i32 786449, metadata !24, i32 12, metadata !"clang version 3.3 (trunk 172862)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !11, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !11 = metadata !{metadata !13}
 !13 = metadata !{i32 786478, metadata !24, metadata !"fn", metadata !"fn", metadata !"", metadata !14, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @fn, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [fn]
 !14 = metadata !{i32 786473, metadata !24} ; [ DW_TAG_file_type ]
diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll
index 8df281d..d9604de 100644
--- a/test/DebugInfo/X86/stringpool.ll
+++ b/test/DebugInfo/X86/stringpool.ll
@@ -5,7 +5,7 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.1 (trunk 143009)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.1 (trunk 143009)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 720948, i32 0, null, metadata !"yyyy", metadata !"yyyy", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @yyyy, null} ; [ DW_TAG_variable ]
diff --git a/test/DebugInfo/X86/struct-loc.ll b/test/DebugInfo/X86/struct-loc.ll
index bdf104f..76cb1f7 100644
--- a/test/DebugInfo/X86/struct-loc.ll
+++ b/test/DebugInfo/X86/struct-loc.ll
@@ -13,7 +13,7 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 5, metadata !7, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ]
diff --git a/test/DebugInfo/X86/subrange-type.ll b/test/DebugInfo/X86/subrange-type.ll
index efc5bf0..da95893 100644
--- a/test/DebugInfo/X86/subrange-type.ll
+++ b/test/DebugInfo/X86/subrange-type.ll
@@ -20,7 +20,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !17, i32 12, metadata !"clang version 3.3 (trunk 171472) (llvm/trunk 171487)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, metadata !17, i32 12, metadata !"clang version 3.3 (trunk 171472) (llvm/trunk 171487)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [main]
diff --git a/test/DebugInfo/X86/subreg.ll b/test/DebugInfo/X86/subreg.ll
index 027589b..c7f8638 100644
--- a/test/DebugInfo/X86/subreg.ll
+++ b/test/DebugInfo/X86/subreg.ll
@@ -22,7 +22,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !0 = metadata !{i32 786689, metadata !1, metadata !"zzz", metadata !2, i32 16777219, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
 !1 = metadata !{i32 786478, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 3, metadata !4, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i16 (i16)* @f, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
 !2 = metadata !{i32 786473, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !3} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, null, null, metadata !9, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
 !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !5 = metadata !{null}
 !6 = metadata !{i32 786468, metadata !3, metadata !"short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/X86/union-template.ll b/test/DebugInfo/X86/union-template.ll
index 0f5538e..8d23cae 100644
--- a/test/DebugInfo/X86/union-template.ll
+++ b/test/DebugInfo/X86/union-template.ll
@@ -28,7 +28,7 @@ attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 (trunk 178499) (llvm/trunk 178472)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.cc] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 (trunk 178499) (llvm/trunk 178472)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9,  metadata !9, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{metadata !"foo.cc", metadata !"/usr/local/google/home/echristo/tmp"}
 !2 = metadata !{i32 0}
 !3 = metadata !{metadata !4}
diff --git a/test/DebugInfo/X86/vector.ll b/test/DebugInfo/X86/vector.ll
index 570adf9..658303a 100644
--- a/test/DebugInfo/X86/vector.ll
+++ b/test/DebugInfo/X86/vector.ll
@@ -11,7 +11,7 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.3 (trunk 171825) (llvm/trunk 171822)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/foo.c] [DW_LANG_C99]
+!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.3 (trunk 171825) (llvm/trunk 171822)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/foo.c] [DW_LANG_C99]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, <4 x i32>* @a, null} ; [ DW_TAG_variable ] [a] [line 3] [def]
diff --git a/test/DebugInfo/array.ll b/test/DebugInfo/array.ll
index 3077110..7dd57d7 100644
--- a/test/DebugInfo/array.ll
+++ b/test/DebugInfo/array.ll
@@ -16,7 +16,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 !0 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
 !1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.0 (trunk 129138)", i1 false, metadata !"", i32 0, null, null, metadata !13, null, null} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.0 (trunk 129138)", i1 false, metadata !"", i32 0, null, null, metadata !13, null,  null, null} ; [ DW_TAG_compile_unit ]
 !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !4 = metadata !{metadata !5}
 !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/dwarf-public-names.ll b/test/DebugInfo/dwarf-public-names.ll
index 52b2397..5d33048 100644
--- a/test/DebugInfo/dwarf-public-names.ll
+++ b/test/DebugInfo/dwarf-public-names.ll
@@ -1,6 +1,7 @@
+; REQUIRES: object-emission
+
 ; RUN: llc -generate-dwarf-pubnames -filetype=obj -o %t.o < %s
 ; RUN: llvm-dwarfdump -debug-dump=pubnames %t.o | FileCheck %s
-; XFAIL: hexagon
 ; ModuleID = 'dwarf-public-names.cpp'
 ;
 ; Generated from:
@@ -85,7 +86,7 @@ attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !4, metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, i32 4, metadata !4, metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24,  metadata !24, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !2 = metadata !{metadata !3, metadata !18, metadata !19, metadata !20}
 !3 = metadata !{i32 786478, metadata !4, null, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !12, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
diff --git a/test/DebugInfo/dwarfdump-zlib.test b/test/DebugInfo/dwarfdump-zlib.test
new file mode 100644
index 0000000..8ce2cf7
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-zlib.test
@@ -0,0 +1,12 @@
+REQUIRES: zlib
+
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test-zlib.elf-x86-64  \
+RUN:   | FileCheck %s -check-prefix FULLDUMP
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test-zlib.elf-x86-64  \
+RUN:   --address=0x400559 --functions | FileCheck %s -check-prefix MAIN
+
+FULLDUMP: .debug_abbrev contents
+FULLDUMP: .debug_info contents
+
+MAIN: main
+MAIN-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test-zlib.cc:16
diff --git a/test/DebugInfo/inline-debug-info-multiret.ll b/test/DebugInfo/inline-debug-info-multiret.ll
new file mode 100644
index 0000000..108f212
--- /dev/null
+++ b/test/DebugInfo/inline-debug-info-multiret.ll
@@ -0,0 +1,154 @@
+; RUN: opt -inline -S < %s | FileCheck %s
+;
+; A hand-edited version of inline-debug-info.ll to test inlining of a
+; function with multiple returns.
+;
+; Make sure the branch instructions created during inlining has a debug location,
+; so the range of the inlined function is correct.
+; CHECK: br label %_Z4testi.exit, !dbg ![[MD:[0-9]+]]
+; CHECK: br label %_Z4testi.exit, !dbg ![[MD]]
+; CHECK: br label %invoke.cont, !dbg ![[MD]]
+; The branch instruction has the source location of line 9 and its inlined location
+; has the source location of line 14.
+; CHECK: ![[INL:[0-9]+]] = metadata !{i32 14, i32 0, metadata {{.*}}, null}
+; CHECK: ![[MD]] = metadata !{i32 9, i32 0, metadata {{.*}}, metadata ![[INL]]}
+
+; ModuleID = 'test.cpp'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin12.0.0"
+
+@_ZTIi = external constant i8*
+@global_var = external global i32
+
+; copy of above function with multiple returns
+define i32 @_Z4testi(i32 %k)  {
+entry:
+  %retval = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  %k2 = alloca i32, align 4
+  store i32 %k, i32* %k.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %k.addr}, metadata !13), !dbg !14
+  call void @llvm.dbg.declare(metadata !{i32* %k2}, metadata !15), !dbg !16
+  %0 = load i32* %k.addr, align 4, !dbg !16
+  %call = call i32 @_Z8test_exti(i32 %0), !dbg !16
+  store i32 %call, i32* %k2, align 4, !dbg !16
+  %1 = load i32* %k2, align 4, !dbg !17
+  %cmp = icmp sgt i32 %1, 100, !dbg !17
+  br i1 %cmp, label %if.then, label %if.end, !dbg !17
+
+if.then:                                          ; preds = %entry
+  %2 = load i32* %k2, align 4, !dbg !18
+  store i32 %2, i32* %retval, !dbg !18
+  br label %return, !dbg !18
+
+if.end:                                           ; preds = %entry
+  store i32 0, i32* %retval, !dbg !19
+  %3 = load i32* %retval, !dbg !20                ; hand-edited
+  ret i32 %3, !dbg !20                            ; hand-edited
+
+return:                                           ; preds = %if.end, %if.then
+  %4 = load i32* %retval, !dbg !20
+  ret i32 %4, !dbg !20
+}
+
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare i32 @_Z8test_exti(i32)
+
+define i32 @_Z5test2v()  {
+entry:
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %e = alloca i32, align 4
+  %0 = load i32* @global_var, align 4, !dbg !21
+  %call = invoke i32 @_Z4testi(i32 %0)
+          to label %invoke.cont unwind label %lpad, !dbg !21
+
+invoke.cont:                                      ; preds = %entry
+  br label %try.cont, !dbg !23
+
+lpad:                                             ; preds = %entry
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*), !dbg !21
+  %2 = extractvalue { i8*, i32 } %1, 0, !dbg !21
+  store i8* %2, i8** %exn.slot, !dbg !21
+  %3 = extractvalue { i8*, i32 } %1, 1, !dbg !21
+  store i32 %3, i32* %ehselector.slot, !dbg !21
+  br label %catch.dispatch, !dbg !21
+
+catch.dispatch:                                   ; preds = %lpad
+  %sel = load i32* %ehselector.slot, !dbg !23
+  %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2, !dbg !23
+  %matches = icmp eq i32 %sel, %4, !dbg !23
+  br i1 %matches, label %catch, label %eh.resume, !dbg !23
+
+catch:                                            ; preds = %catch.dispatch
+  call void @llvm.dbg.declare(metadata !{i32* %e}, metadata !24), !dbg !25
+  %exn = load i8** %exn.slot, !dbg !23
+  %5 = call i8* @__cxa_begin_catch(i8* %exn) #2, !dbg !23
+  %6 = bitcast i8* %5 to i32*, !dbg !23
+  %7 = load i32* %6, align 4, !dbg !23
+  store i32 %7, i32* %e, align 4, !dbg !23
+  store i32 0, i32* @global_var, align 4, !dbg !26
+  call void @__cxa_end_catch() #2, !dbg !28
+  br label %try.cont, !dbg !28
+
+try.cont:                                         ; preds = %catch, %invoke.cont
+  store i32 1, i32* @global_var, align 4, !dbg !29
+  ret i32 0, !dbg !30
+
+eh.resume:                                        ; preds = %catch.dispatch
+  %exn1 = load i8** %exn.slot, !dbg !23
+  %sel2 = load i32* %ehselector.slot, !dbg !23
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn1, 0, !dbg !23
+  %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel2, 1, !dbg !23
+  resume { i8*, i32 } %lpad.val3, !dbg !23
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #1
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"<unknown>", metadata !""}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !10}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test", metadata !"test", metadata !"_Z4testi", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4testi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [test]
+!5 = metadata !{metadata !"test.cpp", metadata !""}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [test.cpp]
+!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !9}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test2", metadata !"test2", metadata !"_Z5test2v", i32 11, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z5test2v, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [test2]
+!11 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{metadata !9}
+!13 = metadata !{i32 786689, metadata !4, metadata !"k", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [k] [line 4]
+!14 = metadata !{i32 4, i32 0, metadata !4, null}
+!15 = metadata !{i32 786688, metadata !4, metadata !"k2", metadata !6, i32 5, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k2] [line 5]
+!16 = metadata !{i32 5, i32 0, metadata !4, null}
+!17 = metadata !{i32 6, i32 0, metadata !4, null}
+!18 = metadata !{i32 7, i32 0, metadata !4, null}
+!19 = metadata !{i32 8, i32 0, metadata !4, null}
+!20 = metadata !{i32 9, i32 0, metadata !4, null}
+!21 = metadata !{i32 14, i32 0, metadata !22, null}
+!22 = metadata !{i32 786443, metadata !5, metadata !10, i32 13, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [test.cpp]
+!23 = metadata !{i32 15, i32 0, metadata !22, null}
+!24 = metadata !{i32 786688, metadata !10, metadata !"e", metadata !6, i32 16, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [e] [line 16]
+!25 = metadata !{i32 16, i32 0, metadata !10, null}
+!26 = metadata !{i32 17, i32 0, metadata !27, null}
+!27 = metadata !{i32 786443, metadata !5, metadata !10, i32 16, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [test.cpp]
+!28 = metadata !{i32 18, i32 0, metadata !27, null}
+!29 = metadata !{i32 19, i32 0, metadata !10, null}
+!30 = metadata !{i32 20, i32 0, metadata !10, null}
diff --git a/test/DebugInfo/inline-debug-info.ll b/test/DebugInfo/inline-debug-info.ll
new file mode 100644
index 0000000..7c3267a
--- /dev/null
+++ b/test/DebugInfo/inline-debug-info.ll
@@ -0,0 +1,172 @@
+; RUN: opt -inline -S < %s | FileCheck %s
+
+; Created from source
+;
+;
+;  1 // test.cpp
+;  2 extern int global_var;
+;  3 extern int test_ext(int k);
+;  4 int test (int k) {
+;  5   int k2 = test_ext(k);
+;  6   if (k2 > 100)
+;  7     return k2;
+;  8   return 0;
+;  9 }
+; 10
+; 11 int test2() {
+; 12   try
+; 13   {
+; 14     test(global_var);
+; 15   }
+; 16   catch (int e) {
+; 17     global_var = 0;
+; 18   }
+; 19   global_var = 1;
+; 20   return 0;
+; 21 }
+
+; CHECK: _Z4testi.exit:
+; Make sure the branch instruction created during inlining has a debug location,
+; so the range of the inlined function is correct.
+; CHECK: br label %invoke.cont, !dbg ![[MD:[0-9]+]]
+; The branch instruction has the source location of line 9 and its inlined location
+; has the source location of line 14.
+; CHECK: ![[INL:[0-9]+]] = metadata !{i32 14, i32 0, metadata {{.*}}, null}
+; CHECK: ![[MD]] = metadata !{i32 9, i32 0, metadata {{.*}}, metadata ![[INL]]}
+
+; ModuleID = 'test.cpp'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin12.0.0"
+
+@_ZTIi = external constant i8*
+@global_var = external global i32
+
+define i32 @_Z4testi(i32 %k)  {
+entry:
+  %retval = alloca i32, align 4
+  %k.addr = alloca i32, align 4
+  %k2 = alloca i32, align 4
+  store i32 %k, i32* %k.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %k.addr}, metadata !13), !dbg !14
+  call void @llvm.dbg.declare(metadata !{i32* %k2}, metadata !15), !dbg !16
+  %0 = load i32* %k.addr, align 4, !dbg !16
+  %call = call i32 @_Z8test_exti(i32 %0), !dbg !16
+  store i32 %call, i32* %k2, align 4, !dbg !16
+  %1 = load i32* %k2, align 4, !dbg !17
+  %cmp = icmp sgt i32 %1, 100, !dbg !17
+  br i1 %cmp, label %if.then, label %if.end, !dbg !17
+
+if.then:                                          ; preds = %entry
+  %2 = load i32* %k2, align 4, !dbg !18
+  store i32 %2, i32* %retval, !dbg !18
+  br label %return, !dbg !18
+
+if.end:                                           ; preds = %entry
+  store i32 0, i32* %retval, !dbg !19
+  br label %return, !dbg !19
+
+return:                                           ; preds = %if.end, %if.then
+  %3 = load i32* %retval, !dbg !20
+  ret i32 %3, !dbg !20
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+declare i32 @_Z8test_exti(i32)
+
+define i32 @_Z5test2v()  {
+entry:
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %e = alloca i32, align 4
+  %0 = load i32* @global_var, align 4, !dbg !21
+  %call = invoke i32 @_Z4testi(i32 %0)
+          to label %invoke.cont unwind label %lpad, !dbg !21
+
+invoke.cont:                                      ; preds = %entry
+  br label %try.cont, !dbg !23
+
+lpad:                                             ; preds = %entry
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*), !dbg !21
+  %2 = extractvalue { i8*, i32 } %1, 0, !dbg !21
+  store i8* %2, i8** %exn.slot, !dbg !21
+  %3 = extractvalue { i8*, i32 } %1, 1, !dbg !21
+  store i32 %3, i32* %ehselector.slot, !dbg !21
+  br label %catch.dispatch, !dbg !21
+
+catch.dispatch:                                   ; preds = %lpad
+  %sel = load i32* %ehselector.slot, !dbg !23
+  %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2, !dbg !23
+  %matches = icmp eq i32 %sel, %4, !dbg !23
+  br i1 %matches, label %catch, label %eh.resume, !dbg !23
+
+catch:                                            ; preds = %catch.dispatch
+  call void @llvm.dbg.declare(metadata !{i32* %e}, metadata !24), !dbg !25
+  %exn = load i8** %exn.slot, !dbg !23
+  %5 = call i8* @__cxa_begin_catch(i8* %exn) #2, !dbg !23
+  %6 = bitcast i8* %5 to i32*, !dbg !23
+  %7 = load i32* %6, align 4, !dbg !23
+  store i32 %7, i32* %e, align 4, !dbg !23
+  store i32 0, i32* @global_var, align 4, !dbg !26
+  call void @__cxa_end_catch() #2, !dbg !28
+  br label %try.cont, !dbg !28
+
+try.cont:                                         ; preds = %catch, %invoke.cont
+  store i32 1, i32* @global_var, align 4, !dbg !29
+  ret i32 0, !dbg !30
+
+eh.resume:                                        ; preds = %catch.dispatch
+  %exn1 = load i8** %exn.slot, !dbg !23
+  %sel2 = load i32* %ehselector.slot, !dbg !23
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn1, 0, !dbg !23
+  %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel2, 1, !dbg !23
+  resume { i8*, i32 } %lpad.val3, !dbg !23
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #1
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"<unknown>", metadata !""}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4, metadata !10}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test", metadata !"test", metadata !"_Z4testi", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4testi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [test]
+!5 = metadata !{metadata !"test.cpp", metadata !""}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [test.cpp]
+!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !9}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test2", metadata !"test2", metadata !"_Z5test2v", i32 11, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z5test2v, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [test2]
+!11 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = metadata !{metadata !9}
+!13 = metadata !{i32 786689, metadata !4, metadata !"k", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [k] [line 4]
+!14 = metadata !{i32 4, i32 0, metadata !4, null}
+!15 = metadata !{i32 786688, metadata !4, metadata !"k2", metadata !6, i32 5, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k2] [line 5]
+!16 = metadata !{i32 5, i32 0, metadata !4, null}
+!17 = metadata !{i32 6, i32 0, metadata !4, null}
+!18 = metadata !{i32 7, i32 0, metadata !4, null}
+!19 = metadata !{i32 8, i32 0, metadata !4, null}
+!20 = metadata !{i32 9, i32 0, metadata !4, null}
+!21 = metadata !{i32 14, i32 0, metadata !22, null}
+!22 = metadata !{i32 786443, metadata !5, metadata !10, i32 13, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [test.cpp]
+!23 = metadata !{i32 15, i32 0, metadata !22, null}
+!24 = metadata !{i32 786688, metadata !10, metadata !"e", metadata !6, i32 16, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [e] [line 16]
+!25 = metadata !{i32 16, i32 0, metadata !10, null}
+!26 = metadata !{i32 17, i32 0, metadata !27, null}
+!27 = metadata !{i32 786443, metadata !5, metadata !10, i32 16, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [test.cpp]
+!28 = metadata !{i32 18, i32 0, metadata !27, null}
+!29 = metadata !{i32 19, i32 0, metadata !10, null}
+!30 = metadata !{i32 20, i32 0, metadata !10, null}
diff --git a/test/DebugInfo/inlined-vars.ll b/test/DebugInfo/inlined-vars.ll
index f302294..841daaa 100644
--- a/test/DebugInfo/inlined-vars.ll
+++ b/test/DebugInfo/inlined-vars.ll
@@ -17,7 +17,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 159419)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 159419)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !10}
 !5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 10, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !1, i32 10} ; [ DW_TAG_subprogram ]
diff --git a/test/DebugInfo/llvm-symbolizer.test b/test/DebugInfo/llvm-symbolizer.test
index 842a5e6..163bd8e 100644
--- a/test/DebugInfo/llvm-symbolizer.test
+++ b/test/DebugInfo/llvm-symbolizer.test
@@ -1,7 +1,7 @@
 RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64 0x400559" > %t.input
 RUN: echo "%p/Inputs/dwarfdump-test4.elf-x86-64 0x62c" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x710" >> %t.input
-RUN: echo '"%p/Inputs/dwarfdump-test3.elf-x86-64 space" 0x633' >> %t.input
+RUN: echo "\"%p/Inputs/dwarfdump-test3.elf-x86-64 space\" 0x633" >> %t.input
 
 RUN: llvm-symbolizer --functions --inlining --demangle=false < %t.input \
 RUN:    | FileCheck %s
diff --git a/test/DebugInfo/member-pointers.ll b/test/DebugInfo/member-pointers.ll
index 4b77189..20f4e68 100644
--- a/test/DebugInfo/member-pointers.ll
+++ b/test/DebugInfo/member-pointers.ll
@@ -1,14 +1,16 @@
+; REQUIRES: object-emission
+; XFAIL: hexagon
+
 ; RUN: llc -filetype=obj -O0 < %s > %t
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 ; CHECK: DW_TAG_ptr_to_member_type
 ; CHECK: [[TYPE:.*]]:   DW_TAG_subroutine_type
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK-NEXT: DW_AT_type
-; CHECK-NEXT: DW_AT_artificial [DW_FORM_flag_present]
+; CHECK-NEXT: DW_AT_artificial [DW_FORM_flag
 ; CHECK: DW_TAG_ptr_to_member_type
 ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]       (cu + {{.*}} => {[[TYPE]]})
 ; IR generated from clang -g with the following source:
-; XFAIL: hexagon
 ; struct S {
 ; };
 ;
@@ -20,7 +22,7 @@
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/blaikie/Development/scratch/simple.cpp] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/blaikie/Development/scratch/simple.cpp] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !10}
 !5 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 4, metadata !7, i32 0, i32 1, i64* @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
diff --git a/test/DebugInfo/namespace.ll b/test/DebugInfo/namespace.ll
index 8d59b52..a7dcf7c 100644
--- a/test/DebugInfo/namespace.ll
+++ b/test/DebugInfo/namespace.ll
@@ -1,18 +1,45 @@
+; REQUIRES: object-emission
+
 ; RUN: llc -O0 -filetype=obj < %s > %t
 ; RUN: llvm-dwarfdump %t | FileCheck %s
 ; CHECK: debug_info contents
-; CHECK: DW_TAG_namespace
+; CHECK: [[NS1:0x[0-9a-f]*]]:{{ *}}DW_TAG_namespace
 ; CHECK-NEXT: DW_AT_name{{.*}} = "A"
 ; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1:[0-9]]])
 ; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x03)
 ; CHECK-NOT: NULL
-; CHECK: DW_TAG_namespace
+; CHECK: [[NS2:0x[0-9a-f]*]]:{{ *}}DW_TAG_namespace
 ; CHECK-NEXT: DW_AT_name{{.*}} = "B"
 ; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2:[0-9]]])
 ; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x01)
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name{{.*}}= "i"
+; CHECK: NULL
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_module
+; This is a bug, it should be in F2 but it inherits the file from its
+; enclosing scope
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x04)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS2]]})
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NEXT: DW_AT_MIPS_linkage_name
+; CHECK-NEXT: DW_AT_name{{.*}}= "func"
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_module
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x0e)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_lexical_block
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_module
+; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x0b)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS2]]})
+
 ; CHECK: file_names[  [[F1]]]{{.*}}debug-info-namespace.cpp
 ; CHECK: file_names[  [[F2]]]{{.*}}foo.cpp
 
@@ -23,20 +50,81 @@
 ; namespace B {
 ; int i;
 ; }
+; using namespace B;
+; }
+;
+; using namespace A;
+; 
+; int func(bool b) {
+;   if (b) {
+;     using namespace A::B;
+;     return i;
+;   }
+;   using namespace A;
+;   return B::i;
 ; }
 
 @_ZN1A1B1iE = global i32 0, align 4
 
+; Function Attrs: nounwind uwtable
+define i32 @_Z4funcb(i1 zeroext %b) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %b.addr = alloca i8, align 1
+  %frombool = zext i1 %b to i8
+  store i8 %frombool, i8* %b.addr, align 1
+  call void @llvm.dbg.declare(metadata !{i8* %b.addr}, metadata !21), !dbg !22
+  %0 = load i8* %b.addr, align 1, !dbg !23
+  %tobool = trunc i8 %0 to i1, !dbg !23
+  br i1 %tobool, label %if.then, label %if.end, !dbg !23
+
+if.then:                                          ; preds = %entry
+  %1 = load i32* @_ZN1A1B1iE, align 4, !dbg !24
+  store i32 %1, i32* %retval, !dbg !24
+  br label %return, !dbg !24
+
+if.end:                                           ; preds = %entry
+  %2 = load i32* @_ZN1A1B1iE, align 4, !dbg !25
+  store i32 %2, i32* %retval, !dbg !25
+  br label %return, !dbg !25
+
+return:                                           ; preds = %if.end, %if.then
+  %3 = load i32* %retval, !dbg !26
+  ret i32 %3, !dbg !26
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, metadata !2, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !3, metadata !4, metadata !""} ; [ DW_TAG_compile_unit ] [/home/foo/debug-info-namespace.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{i32 786473, metadata !2}          ; [ DW_TAG_file_type ] [/home/foo/debug-info-namespace.cpp]
-!2 = metadata !{metadata !"debug-info-namespace.cpp", metadata !"/home/foo"}
-!3 = metadata !{i32 0}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786484, i32 0, metadata !6, metadata !"i", metadata !"i", metadata !"_ZN1A1B1iE", metadata !7, i32 2, metadata !10, i32 0, i32 1, i32* @_ZN1A1B1iE, null} ; [ DW_TAG_variable ] [i] [line 2] [def]
-!6 = metadata !{i32 786489, metadata !8, metadata !9, metadata !"B", i32 1} ; [ DW_TAG_namespace ] [B] [line 1]
-!7 = metadata !{i32 786473, metadata !8}          ; [ DW_TAG_file_type ] [/home/foo/foo.cpp]
-!8 = metadata !{metadata !"foo.cpp", metadata !"/home/foo"}
-!9 = metadata !{i32 786489, metadata !2, null, metadata !"A", i32 3} ; [ DW_TAG_namespace ] [A] [line 3]
-!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !11, metadata !15, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/llvm/src/tools/clang//usr/local/google/home/blaikie/dev/llvm/src/tools/clang/test/CodeGenCXX/debug-info-namespace.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !"/usr/local/google/home/blaikie/dev/llvm/src/tools/clang/test/CodeGenCXX/debug-info-namespace.cpp", metadata !"/usr/local/google/home/blaikie/dev/llvm/src/tools/clang"}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"func", metadata !"func", metadata !"_Z4funcb", i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i1)* @_Z4funcb, null, null, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [func]
+!5 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/blaikie/dev/llvm/src/tools/clang"}
+!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug/foo.cpp]
+!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 786484, i32 0, metadata !13, metadata !"i", metadata !"i", metadata !"_ZN1A1B1iE", metadata !6, i32 2, metadata !9, i32 0, i32 1, i32* @_ZN1A1B1iE, null} ; [ DW_TAG_variable ] [i] [line 2] [def]
+!13 = metadata !{i32 786489, metadata !5, metadata !14, metadata !"B", i32 1} ; [ DW_TAG_namespace ] [B] [line 1]
+!14 = metadata !{i32 786489, metadata !1, null, metadata !"A", i32 3} ; [ DW_TAG_namespace ] [A] [line 3]
+!15 = metadata !{metadata !16, metadata !17, metadata !18, metadata !20}
+!16 = metadata !{i32 786490, metadata !14, metadata !13, i32 4} ; [ DW_TAG_imported_module ]
+!17 = metadata !{i32 786490, metadata !0, metadata !14, i32 7} ; [ DW_TAG_imported_module ]
+!18 = metadata !{i32 786490, metadata !19, metadata !13, i32 11} ; [ DW_TAG_imported_module ]
+!19 = metadata !{i32 786443, metadata !5, metadata !4, i32 10, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug/foo.cpp]
+!20 = metadata !{i32 786490, metadata !4, metadata !14, i32 14} ; [ DW_TAG_imported_module ]
+!21 = metadata !{i32 786689, metadata !4, metadata !"b", metadata !6, i32 16777225, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 9]
+!22 = metadata !{i32 9, i32 0, metadata !4, null}
+!23 = metadata !{i32 10, i32 0, metadata !4, null}
+!24 = metadata !{i32 12, i32 0, metadata !19, null}
+!25 = metadata !{i32 15, i32 0, metadata !4, null}
+!26 = metadata !{i32 16, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/two-cus-from-same-file.ll b/test/DebugInfo/two-cus-from-same-file.ll
index 58671d5..22cf4eb 100644
--- a/test/DebugInfo/two-cus-from-same-file.ll
+++ b/test/DebugInfo/two-cus-from-same-file.ll
@@ -3,10 +3,11 @@
 ;   blow llc up and produces something reasonable.
 ;
 
+; REQUIRES: object-emission
+
 ; RUN: llc %s -o %t -filetype=obj -O0
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
 
-; XFAIL: hexagon
 ; ModuleID = 'test.bc'
 
 @str = private unnamed_addr constant [4 x i8] c"FOO\00"
@@ -33,14 +34,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0, !9}
 
-!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5}
 !5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
 !6 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
 !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !8 = metadata !{null}
-!9 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !10, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
+!9 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !10, metadata !1, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
 !10 = metadata !{metadata !12}
 !12 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 11, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !19, i32 11} ; [ DW_TAG_subprogram ]
 !13 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
diff --git a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
index 0ab0274..349db69 100644
--- a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
@@ -1,4 +1,5 @@
-; RUN: %lli -force-interpreter=true %s | grep 1
+; RUN: %lli -force-interpreter=true %s | FileCheck %s
+; CHECK: 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
index 0912897..9897602 100644
--- a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
@@ -1,4 +1,5 @@
-; RUN: %lli_mcjit -force-interpreter=true %s | grep 1
+; RUN: %lli_mcjit -force-interpreter=true %s | FileCheck %s
+; CHECK: 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll b/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll
new file mode 100644
index 0000000..3f402c5
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll
@@ -0,0 +1,25 @@
+; RUN: %lli_mcjit %s
+;
+; Verify relocations to global symbols with addend work correctly.
+;
+; Compiled from this C code:
+;
+; int test[2] = { -1, 0 };
+; int *p = &test[1];
+; 
+; int main (void)
+; {
+;   return *p;
+; }
+; 
+
+@test = global [2 x i32] [i32 -1, i32 0], align 4
+@p = global i32* getelementptr inbounds ([2 x i32]* @test, i64 0, i64 1), align 8
+
+define i32 @main() {
+entry:
+  %0 = load i32** @p, align 8
+  %1 = load i32* %0, align 4
+  ret i32 %1
+}
+
diff --git a/test/ExecutionEngine/MCJIT/eh.ll b/test/ExecutionEngine/MCJIT/eh.ll
new file mode 100644
index 0000000..c213573
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/eh.ll
@@ -0,0 +1,32 @@
+; RUN: %lli_mcjit %s
+; XFAIL: arm, cygwin, win32, mingw
+declare i8* @__cxa_allocate_exception(i64)
+declare void @__cxa_throw(i8*, i8*, i8*)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_end_catch()
+declare i8* @__cxa_begin_catch(i8*)
+
+@_ZTIi = external constant i8*
+
+define void @throwException() {
+  %exception = tail call i8* @__cxa_allocate_exception(i64 4)
+  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+  unreachable
+}
+
+define i32 @main() {
+entry:
+  invoke void @throwException()
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %p = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %e = extractvalue { i8*, i32 } %p, 0
+  call i8* @__cxa_begin_catch(i8* %e)
+  call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/fpbitcast.ll b/test/ExecutionEngine/MCJIT/fpbitcast.ll
index fb5ab6f..ea39617 100644
--- a/test/ExecutionEngine/MCJIT/fpbitcast.ll
+++ b/test/ExecutionEngine/MCJIT/fpbitcast.ll
@@ -1,5 +1,6 @@
-; RUN: %lli_mcjit -force-interpreter=true %s | grep 40091eb8
-;
+; RUN: %lli_mcjit -force-interpreter=true %s | FileCheck %s
+; CHECK: 40091eb8
+
 define i32 @test(double %x) {
 entry:
 	%x46.i = bitcast double %x to i64	
diff --git a/test/ExecutionEngine/MCJIT/lit.local.cfg b/test/ExecutionEngine/MCJIT/lit.local.cfg
index fc29f65..30ed4e8 100644
--- a/test/ExecutionEngine/MCJIT/lit.local.cfg
+++ b/test/ExecutionEngine/MCJIT/lit.local.cfg
@@ -8,16 +8,17 @@ def getRoot(config):
 root = getRoot(config)
 
 targets = set(root.targets_to_build.split())
-if ('X86' in targets) | ('ARM' in targets) | ('Mips' in targets) | \
-   ('PowerPC' in targets):
+if ('X86' in targets) | ('AArch64' in targets) | ('ARM' in targets) | \
+   ('Mips' in targets) | ('PowerPC' in targets) | ('SystemZ' in targets):
     config.unsupported = False
 else:
     config.unsupported = True
 
-if root.host_arch not in ['x86', 'x86_64', 'ARM', 'Mips', 'PowerPC']:
+if root.host_arch not in ['i386', 'x86', 'x86_64',
+                          'AArch64', 'ARM', 'Mips', 'PowerPC', 'SystemZ']:
     config.unsupported = True
 
-if root.host_os in ['Darwin']:
+if 'i386-apple-darwin'  in root.target_triple:
     config.unsupported = True
 
 if 'powerpc' in root.target_triple and not 'powerpc64' in root.target_triple:
diff --git a/test/ExecutionEngine/MCJIT/non-extern-addend.ll b/test/ExecutionEngine/MCJIT/non-extern-addend.ll
new file mode 100644
index 0000000..3a6e634
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/non-extern-addend.ll
@@ -0,0 +1,12 @@
+; RUN: %lli_mcjit %s > /dev/null
+
+define i32 @foo(i32 %X, i32 %Y, double %A) {
+	%cond212 = fcmp ueq double %A, 2.000000e+00		; <i1> [#uses=1]
+	%cast110 = zext i1 %cond212 to i32		; <i32> [#uses=1]
+	ret i32 %cast110
+}
+
+define i32 @main() {
+	%reg212 = call i32 @foo( i32 0, i32 1, double 1.000000e+00 )		; <i32> [#uses=1]
+	ret i32 %reg212
+}
diff --git a/test/ExecutionEngine/MCJIT/test-global-ctors.ll b/test/ExecutionEngine/MCJIT/test-global-ctors.ll
index 4510d9b..947d8f5 100644
--- a/test/ExecutionEngine/MCJIT/test-global-ctors.ll
+++ b/test/ExecutionEngine/MCJIT/test-global-ctors.ll
@@ -1,4 +1,5 @@
 ; RUN: %lli_mcjit %s > /dev/null
+; XFAIL: darwin
 @var = global i32 1, align 4
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @ctor_func }]
 @llvm.global_dtors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @dtor_func }]
diff --git a/test/ExecutionEngine/fpbitcast.ll b/test/ExecutionEngine/fpbitcast.ll
index fa84be4..e6d06f8 100644
--- a/test/ExecutionEngine/fpbitcast.ll
+++ b/test/ExecutionEngine/fpbitcast.ll
@@ -1,5 +1,6 @@
-; RUN: %lli -force-interpreter=true %s | grep 40091eb8
-;
+; RUN: %lli -force-interpreter=true %s | FileCheck %s
+; CHECK: 40091eb8
+
 define i32 @test(double %x) {
 entry:
 	%x46.i = bitcast double %x to i64	
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
index 1f8ae69..b6945ad 100644
--- a/test/ExecutionEngine/lit.local.cfg
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -7,7 +7,7 @@ def getRoot(config):
 
 root = getRoot(config)
 
-if root.host_arch in ['PowerPC', 'AArch64']:
+if root.host_arch in ['PowerPC', 'AArch64', 'SystemZ']:
     config.unsupported = True
 
 if 'hexagon' in root.target_triple:
diff --git a/test/ExecutionEngine/test-interp-vec-arithm_float.ll b/test/ExecutionEngine/test-interp-vec-arithm_float.ll
new file mode 100644
index 0000000..d7f4ac9
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-arithm_float.ll
@@ -0,0 +1,20 @@
+; RUN: %lli %s > /dev/null
+
+
+define i32 @main() {
+
+    %A_float = fadd <4 x float> <float 0.0, float 11.0, float 22.0, float 33.0>, <float 44.0, float 55.0, float 66.0, float 77.0>
+    %B_float = fsub <4 x float> %A_float, <float 88.0, float 99.0, float 100.0, float 111.0>
+    %C_float = fmul <4 x float> %B_float, %B_float
+    %D_float = fdiv <4 x float> %C_float, %B_float
+    %E_float = frem <4 x float> %D_float, %A_float
+
+
+    %A_double = fadd <3 x double> <double 0.0, double 111.0, double 222.0>, <double 444.0, double 555.0, double 665.0>
+    %B_double = fsub <3 x double> %A_double, <double 888.0, double 999.0, double 1001.0>
+    %C_double = fmul <3 x double> %B_double, %B_double
+    %D_double = fdiv <3 x double> %C_double, %B_double
+    %E_double = frem <3 x double> %D_double, %A_double
+
+    ret i32 0
+}
diff --git a/test/ExecutionEngine/test-interp-vec-arithm_int.ll b/test/ExecutionEngine/test-interp-vec-arithm_int.ll
new file mode 100644
index 0000000..0ee14fe
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-arithm_int.ll
@@ -0,0 +1,37 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() {
+    %A_i8 = add <5 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4>, <i8 12, i8 34, i8 56, i8 78, i8 89>
+    %B_i8 = sub <5 x i8> %A_i8, <i8 11, i8 22, i8 33, i8 44, i8 55>
+    %C_i8 = mul <5 x i8> %B_i8, %B_i8
+    %D_i8 = sdiv <5 x i8> %C_i8, %C_i8
+    %E_i8 = srem <5 x i8> %D_i8, %D_i8
+    %F_i8 = udiv <5 x i8> <i8 5, i8 6, i8 7, i8 8, i8 9>, <i8 6, i8 5, i8 4, i8 3, i8 2>
+    %G_i8 = urem <5 x i8> <i8 6, i8 7, i8 8, i8 9, i8 10>, <i8 5, i8 4, i8 2, i8 2, i8 1>
+
+    %A_i16 = add <4 x i16> <i16 0, i16 1, i16 2, i16 3>, <i16 123, i16 345, i16 567, i16 789>
+    %B_i16 = sub <4 x i16> %A_i16, <i16 111, i16 222, i16 333, i16 444>
+    %C_i16 = mul <4 x i16> %B_i16, %B_i16
+    %D_i16 = sdiv <4 x i16> %C_i16, %C_i16
+    %E_i16 = srem <4 x i16> %D_i16, %D_i16
+    %F_i16 = udiv <4 x i16> <i16 5, i16 6, i16 7, i16 8>, <i16 6, i16 5, i16 4, i16 3>
+    %G_i16 = urem <4 x i16> <i16 6, i16 7, i16 8, i16 9>, <i16 5, i16 4, i16 3, i16 2>
+
+    %A_i32 = add <3 x i32> <i32 0, i32 1, i32 2>, <i32 1234, i32 3456, i32 5678>
+    %B_i32 = sub <3 x i32> %A_i32, <i32 1111, i32 2222, i32 3333>
+    %C_i32 = mul <3 x i32> %B_i32, %B_i32
+    %D_i32 = sdiv <3 x i32> %C_i32, %C_i32
+    %E_i32 = srem <3 x i32> %D_i32, %D_i32
+    %F_i32 = udiv <3 x i32> <i32 5, i32 6, i32 7>, <i32 6, i32 5, i32 4>
+    %G_i32 = urem <3 x i32> <i32 6, i32 7, i32 8>, <i32 5, i32 4, i32 3>
+
+    %A_i64 = add <2 x i64> <i64 0, i64 1>, <i64 12455, i64 34567>
+    %B_i64 = sub <2 x i64> %A_i64, <i64 11111, i64 22222>
+    %C_i64 = mul <2 x i64> %B_i64, %B_i64
+    %D_i64 = sdiv <2 x i64> %C_i64, %C_i64
+    %E_i64 = srem <2 x i64> %D_i64, %D_i64
+    %F_i64 = udiv <2 x i64> <i64 5, i64 6>, <i64 6, i64 5>
+    %G_i64 = urem <2 x i64> <i64 6, i64 7>, <i64 5, i64 3>
+ 
+    ret i32 0
+}
diff --git a/test/ExecutionEngine/test-interp-vec-loadstore.ll b/test/ExecutionEngine/test-interp-vec-loadstore.ll
index e9f5b44..e500711 100644
--- a/test/ExecutionEngine/test-interp-vec-loadstore.ll
+++ b/test/ExecutionEngine/test-interp-vec-loadstore.ll
@@ -1,4 +1,5 @@
 ; RUN: %lli -force-interpreter=true %s | FileCheck %s
+; XFAIL: mips
 ; CHECK: 1
 ; CHECK: 2
 ; CHECK: 3
diff --git a/test/ExecutionEngine/test-interp-vec-logical.ll b/test/ExecutionEngine/test-interp-vec-logical.ll
new file mode 100644
index 0000000..f8f1f0d
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-logical.ll
@@ -0,0 +1,22 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() {
+    %A_i8 = and <5 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4>, <i8 8, i8 8, i8 8, i8 8, i8 8>
+    %B_i8 = or <5 x i8> %A_i8, <i8 7, i8 7, i8 7, i8 7, i8 7>
+    %C_i8 = xor <5 x i8> %B_i8, %A_i8
+
+    %A_i16 = and <4 x i16> <i16 4, i16 4, i16 4, i16 4>, <i16 8, i16 8, i16 8, i16 8>
+    %B_i16 = or <4 x i16> %A_i16, <i16 7, i16 7, i16 7, i16 7>
+    %C_i16 = xor <4 x i16> %B_i16, %A_i16
+
+    %A_i32 = and <3 x i32> <i32 4, i32 4, i32 4>, <i32 8, i32 8, i32 8>
+    %B_i32 = or <3 x i32> %A_i32, <i32 7, i32 7, i32 7>
+    %C_i32 = xor <3 x i32> %B_i32, %A_i32
+
+    %A_i64 = and <2 x i64> <i64 4, i64 4>, <i64 8, i64 8>
+    %B_i64 = or <2 x i64> %A_i64, <i64 7, i64 7>
+    %C_i64 = xor <2 x i64> %B_i64, %A_i64
+
+    ret i32 0
+}
+
diff --git a/test/ExecutionEngine/test-interp-vec-setcond-fp.ll b/test/ExecutionEngine/test-interp-vec-setcond-fp.ll
new file mode 100644
index 0000000..8b9b7c7
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-setcond-fp.ll
@@ -0,0 +1,25 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() {
+    %double1 = fadd <2 x double> <double 0.0, double 0.0>, <double 0.0, double 0.0>
+    %double2 = fadd <2 x double> <double 0.0, double 0.0>, <double 0.0, double 0.0>
+    %float1 = fadd <3 x float> <float 0.0, float 0.0, float 0.0>, <float 0.0, float 0.0, float 0.0>
+    %float2 = fadd <3 x float> <float 0.0, float 0.0, float 0.0>, <float 0.0, float 0.0, float 0.0>
+    %test49 = fcmp oeq <3 x float> %float1, %float2
+    %test50 = fcmp oge <3 x float> %float1, %float2
+    %test51 = fcmp ogt <3 x float> %float1, %float2
+    %test52 = fcmp ole <3 x float> %float1, %float2
+    %test53 = fcmp olt <3 x float> %float1, %float2
+    %test54 = fcmp une <3 x float> %float1, %float2
+
+    %test55 = fcmp oeq <2 x double> %double1, %double2
+    %test56 = fcmp oge <2 x double> %double1, %double2
+    %test57 = fcmp ogt <2 x double> %double1, %double2
+    %test58 = fcmp ole <2 x double> %double1, %double2
+    %test59 = fcmp olt <2 x double> %double1, %double2
+    %test60 = fcmp une <2 x double> %double1, %double2
+
+    ret i32 0
+}
+
+
diff --git a/test/ExecutionEngine/test-interp-vec-setcond-int.ll b/test/ExecutionEngine/test-interp-vec-setcond-int.ll
new file mode 100644
index 0000000..4c89109
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-setcond-int.ll
@@ -0,0 +1,69 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() {
+    %int1 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0>
+    %int2 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0>
+    %long1 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0>
+    %long2 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0>
+    %sbyte1 = add <5 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0>
+    %sbyte2 = add <5 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0>
+    %short1 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0>
+    %short2 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0>
+    %ubyte1 = add <5 x i8>  <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0>
+    %ubyte2 = add <5 x i8>  <i8 0, i8 0, i8 0, i8 0, i8 0>, <i8 0, i8 0, i8 0, i8 0, i8 0>
+    %uint1 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0>
+    %uint2 = add <3 x i32> <i32 0, i32 0, i32 0>, <i32 0, i32 0, i32 0>
+    %ulong1 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0>
+    %ulong2 = add <2 x i64> <i64 0, i64 0>, <i64 0, i64 0>
+    %ushort1 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0>
+    %ushort2 = add <4 x i16> <i16 0, i16 0, i16 0, i16 0>, <i16 0, i16 0, i16 0, i16 0>
+    %test1 = icmp eq <5 x i8> %ubyte1, %ubyte2
+    %test2 = icmp uge <5 x i8> %ubyte1, %ubyte2
+    %test3 = icmp ugt <5 x i8> %ubyte1, %ubyte2
+    %test4 = icmp ule <5 x i8> %ubyte1, %ubyte2
+    %test5 = icmp ult <5 x i8> %ubyte1, %ubyte2
+    %test6 = icmp ne <5 x i8> %ubyte1, %ubyte2
+    %test7 = icmp eq <4 x i16> %ushort1, %ushort2
+    %test8 = icmp uge <4 x i16> %ushort1, %ushort2
+    %test9 = icmp ugt <4 x i16> %ushort1, %ushort2
+    %test10 = icmp ule <4 x i16> %ushort1, %ushort2
+    %test11 = icmp ult <4 x i16> %ushort1, %ushort2
+    %test12 = icmp ne <4 x i16> %ushort1, %ushort2 
+    %test13 = icmp eq <3 x i32> %uint1, %uint2
+    %test14 = icmp uge <3 x i32> %uint1, %uint2
+    %test15 = icmp ugt <3 x i32> %uint1, %uint2
+    %test16 = icmp ule <3 x i32> %uint1, %uint2
+    %test17 = icmp ult <3 x i32> %uint1, %uint2
+    %test18 = icmp ne <3 x i32> %uint1, %uint2
+    %test19 = icmp eq <2 x i64> %ulong1, %ulong2
+    %test20 = icmp uge <2 x i64> %ulong1, %ulong2
+    %test21 = icmp ugt <2 x i64> %ulong1, %ulong2
+    %test22 = icmp ule <2 x i64> %ulong1, %ulong2
+    %test23 = icmp ult <2 x i64> %ulong1, %ulong2
+    %test24 = icmp ne <2 x i64> %ulong1, %ulong2
+    %test25 = icmp eq <5 x i8> %sbyte1, %sbyte2
+    %test26 = icmp sge <5 x i8> %sbyte1, %sbyte2
+    %test27 = icmp sgt <5 x i8> %sbyte1, %sbyte2
+    %test28 = icmp sle <5 x i8> %sbyte1, %sbyte2
+    %test29 = icmp slt <5 x i8> %sbyte1, %sbyte2
+    %test30 = icmp ne <5 x i8> %sbyte1, %sbyte2
+    %test31 = icmp eq <4 x i16> %short1, %short2
+    %test32 = icmp sge <4 x i16> %short1, %short2
+    %test33 = icmp sgt <4 x i16> %short1, %short2
+    %test34 = icmp sle <4 x i16> %short1, %short2
+    %test35 = icmp slt <4 x i16> %short1, %short2
+    %test36 = icmp ne <4 x i16> %short1, %short2
+    %test37 = icmp eq <3 x i32> %int1, %int2
+    %test38 = icmp sge <3 x i32> %int1, %int2
+    %test39 = icmp sgt <3 x i32> %int1, %int2
+    %test40 = icmp sle <3 x i32> %int1, %int2
+    %test41 = icmp slt <3 x i32> %int1, %int2
+    %test42 = icmp ne <3 x i32> %int1, %int2
+    %test43 = icmp eq <2 x i64> %long1, %long2
+    %test44 = icmp sge <2 x i64> %long1, %long2
+    %test45 = icmp sgt <2 x i64> %long1, %long2
+    %test46 = icmp sle <2 x i64> %long1, %long2
+    %test47 = icmp slt <2 x i64> %long1, %long2
+    %test48 = icmp ne <2 x i64> %long1, %long2
+    ret i32 0
+}
diff --git a/test/Feature/aliases.ll b/test/Feature/aliases.ll
index d44dff4..13938121 100644
--- a/test/Feature/aliases.ll
+++ b/test/Feature/aliases.ll
@@ -2,6 +2,8 @@
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
 
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @foo1 to i8*)], section "llvm.metadata"
+
 @bar = external global i32
 @foo1 = alias i32* @bar
 @foo2 = alias i32* @bar
diff --git a/test/FileCheck/check-not-diaginfo.txt b/test/FileCheck/check-not-diaginfo.txt
new file mode 100644
index 0000000..a4c3ca8
--- /dev/null
+++ b/test/FileCheck/check-not-diaginfo.txt
@@ -0,0 +1,7 @@
+; RUN: FileCheck -input-file %s %s 2>&1 | FileCheck -check-prefix DIAG %s
+
+CHECK-NOT: test
+
+DIAG:         CHECK-NOT: pattern specified here
+DIAG-NEXT:    CHECK-NOT: test
+DIAG-NEXT: {{^           \^}}
diff --git a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
index 0ecff40..19dd45b 100644
--- a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
+++ b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
@@ -49,7 +49,7 @@ define void @MemSetTest(i8* nocapture %x)  {
 entry:
     tail call void @llvm.memset.p0i8.i64(i8* %x, i8 77, i64 16, i32 4, i1 false)
     ret void
-; CHECK define void @MemSetTest
+; CHECK: define void @MemSetTest
 ; CHECK: call i8* @memset
 ; CHECK: ret void
 }
diff --git a/test/Integer/2007-01-19-TruncSext.ll b/test/Integer/2007-01-19-TruncSext.ll
index 3fee6bc..e6d89dd 100644
--- a/test/Integer/2007-01-19-TruncSext.ll
+++ b/test/Integer/2007-01-19-TruncSext.ll
@@ -1,7 +1,8 @@
 ; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
-; RUN: llvm-as < %s | lli --force-interpreter=true | grep -- -255
+; RUN: llvm-as < %s | lli --force-interpreter=true | FileCheck %s
+; CHECK: -255
 
 @ARRAY   = global [ 20 x i17 ] zeroinitializer
 @FORMAT  = constant [ 4 x i8 ] c"%d\0A\00"
diff --git a/test/Integer/fold-fpcast_bt.ll b/test/Integer/fold-fpcast_bt.ll
index 8e5f838..0ce776d 100644
--- a/test/Integer/fold-fpcast_bt.ll
+++ b/test/Integer/fold-fpcast_bt.ll
@@ -1,4 +1,5 @@
-; RUN: llvm-as < %s | llvm-dis | not grep bitcast
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; CHECK-NOT: bitcast
 
 define i60 @test1() {
    ret i60 fptoui(float 0x400D9999A0000000 to i60)
diff --git a/test/Integer/packed_struct_bt.ll b/test/Integer/packed_struct_bt.ll
index 257c1c6..b8301ba 100644
--- a/test/Integer/packed_struct_bt.ll
+++ b/test/Integer/packed_struct_bt.ll
@@ -1,9 +1,9 @@
 ; RUN: llvm-as < %s | llvm-dis > %t1.ll
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
-; RUN: not grep cast %t2.ll
-; RUN: grep "}>" %t2.ll
-; END.
+; RUN: FileCheck %s --input-file=%t2.ll
+; CHECK-NOT: cast
+; CHECK: }>
 
 %struct.anon = type <{ i8, i35, i35, i35 }>
 @foos = external global %struct.anon 
diff --git a/test/Linker/2003-01-30-LinkerRename.ll b/test/Linker/2003-01-30-LinkerRename.ll
index e7431ec..cbf7541 100644
--- a/test/Linker/2003-01-30-LinkerRename.ll
+++ b/test/Linker/2003-01-30-LinkerRename.ll
@@ -3,7 +3,8 @@
 
 ; RUN: echo "define internal i32 @foo() { ret i32 7 } " | llvm-as > %t.1.bc
 ; RUN: llvm-as %s -o %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "@foo()" | grep -v internal
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: internal{{.*}}@foo{{[0-9]}}()
 
 define i32 @foo() { ret i32 0 }
 
diff --git a/test/Linker/2003-01-30-LinkerTypeRename.ll b/test/Linker/2003-01-30-LinkerTypeRename.ll
index 94fb5e0..d61eb6d 100644
--- a/test/Linker/2003-01-30-LinkerTypeRename.ll
+++ b/test/Linker/2003-01-30-LinkerTypeRename.ll
@@ -3,8 +3,9 @@
 
 ; RUN: echo "%%Ty = type opaque @GV = external global %%Ty*" | llvm-as > %t.1.bc
 ; RUN: llvm-as < %s > %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "%%Ty " | not grep opaque
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: = global %Ty
 
 %Ty = type {i32}
 
-@GV = global %Ty* null
-\ No newline at end of file
+@GV = global %Ty* null
diff --git a/test/Linker/2003-04-23-LinkOnceLost.ll b/test/Linker/2003-04-23-LinkOnceLost.ll
index 98a943a..e452890 100644
--- a/test/Linker/2003-04-23-LinkOnceLost.ll
+++ b/test/Linker/2003-04-23-LinkOnceLost.ll
@@ -4,7 +4,8 @@
 ; RUN: echo " define linkonce void @foo() { ret void } " | \
 ; RUN:   llvm-as -o %t.2.bc
 ; RUN: llvm-as %s -o %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep foo | grep linkonce
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: linkonce{{.*}}foo
 
 declare void @foo()
 
diff --git a/test/Linker/2003-05-31-LinkerRename.ll b/test/Linker/2003-05-31-LinkerRename.ll
index dff861d..2e734be 100644
--- a/test/Linker/2003-05-31-LinkerRename.ll
+++ b/test/Linker/2003-05-31-LinkerRename.ll
@@ -6,7 +6,8 @@
 
 ; RUN: echo " define internal i32 @foo() { ret i32 7 } " | llvm-as > %t.1.bc
 ; RUN: llvm-as < %s > %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep internal | not grep "@foo("
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: internal {{.*}} @foo{{[0-9]}}(
 
 declare i32 @foo() 
 
diff --git a/test/Linker/2003-08-23-GlobalVarLinking.ll b/test/Linker/2003-08-23-GlobalVarLinking.ll
index e934836..122bc41 100644
--- a/test/Linker/2003-08-23-GlobalVarLinking.ll
+++ b/test/Linker/2003-08-23-GlobalVarLinking.ll
@@ -1,7 +1,8 @@
 ; RUN: llvm-as < %s > %t.out1.bc
 ; RUN: echo "%%T1 = type opaque %%T2 = type opaque @S = external global { i32, %%T1* } declare void @F(%%T2*)"\
 ; RUN:   | llvm-as > %t.out2.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep opaque
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | FileCheck %s
+; CHECK-NOT: opaque
 
 ; After linking this testcase, there should be no opaque types left.  The two
 ; S's should cause the opaque type to be resolved to 'int'.
diff --git a/test/Linker/2003-08-24-InheritPtrSize.ll b/test/Linker/2003-08-24-InheritPtrSize.ll
index 51d544b..dbaf9bc 100644
--- a/test/Linker/2003-08-24-InheritPtrSize.ll
+++ b/test/Linker/2003-08-24-InheritPtrSize.ll
@@ -3,7 +3,8 @@
 
 ; RUN: llvm-as < %s > %t.out1.bc
 ; RUN: echo "" | llvm-as > %t.out2.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc 2>&1 | not grep warning
+; RUN: llvm-link %t.out1.bc %t.out2.bc 2>&1 | FileCheck %s 
+; CHECK-NOT: warning
 
 target datalayout = "e-p:64:64"
 
diff --git a/test/Linker/2004-12-03-DisagreeingType.ll b/test/Linker/2004-12-03-DisagreeingType.ll
index 73d7a40..63e1529 100644
--- a/test/Linker/2004-12-03-DisagreeingType.ll
+++ b/test/Linker/2004-12-03-DisagreeingType.ll
@@ -1,7 +1,8 @@
 ; RUN: echo "@G = weak global {{{{double}}}} zeroinitializer " | \
 ; RUN:   llvm-as > %t.out2.bc
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep "}"
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | FileCheck %s
+; CHECK-NOT: }
 
 ; When linked, the global above should be eliminated, being merged with the 
 ; global below.
diff --git a/test/Linker/2005-02-12-ConstantGlobals-2.ll b/test/Linker/2005-02-12-ConstantGlobals-2.ll
index 30bfafe..7d2e813 100644
--- a/test/Linker/2005-02-12-ConstantGlobals-2.ll
+++ b/test/Linker/2005-02-12-ConstantGlobals-2.ll
@@ -3,6 +3,7 @@
 
 ; RUN: echo "@X = external constant i32" | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "global i32 7"
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: global i32 7
 
 @X = global i32 7
diff --git a/test/Linker/2005-02-12-ConstantGlobals.ll b/test/Linker/2005-02-12-ConstantGlobals.ll
index 93709cf..db99060 100644
--- a/test/Linker/2005-02-12-ConstantGlobals.ll
+++ b/test/Linker/2005-02-12-ConstantGlobals.ll
@@ -3,6 +3,7 @@
 
 ; RUN: echo "@X = global i32 7" | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "global i32 7"
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: global i32 7
 
 @X = external constant i32		; <i32*> [#uses=0]
diff --git a/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll b/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
index d7a34c8..b99b3a8 100644
--- a/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
+++ b/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
@@ -1,7 +1,8 @@
 ; RUN: echo " @G = appending global [0 x i32] zeroinitializer " | \
 ; RUN:   llvm-as > %t.out2.bc
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc -S | grep "@G ="
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | FileCheck %s
+; CHECK: @G =
 
 ; When linked, the globals should be merged, and the result should still 
 ; be named '@G'.
diff --git a/test/Linker/2006-06-15-GlobalVarAlignment.ll b/test/Linker/2006-06-15-GlobalVarAlignment.ll
index eec8f63..c9f9b0e 100644
--- a/test/Linker/2006-06-15-GlobalVarAlignment.ll
+++ b/test/Linker/2006-06-15-GlobalVarAlignment.ll
@@ -2,6 +2,7 @@
 
 ; RUN: echo "@X = global i32 7, align 8" | llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep "align 8"
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: align 8
 
 @X = weak global i32 7, align 4
diff --git a/test/Linker/2008-03-07-DroppedSection_a.ll b/test/Linker/2008-03-07-DroppedSection_a.ll
index ec9d5c2..58baad9 100644
--- a/test/Linker/2008-03-07-DroppedSection_a.ll
+++ b/test/Linker/2008-03-07-DroppedSection_a.ll
@@ -1,7 +1,8 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: llvm-as < %p/2008-03-07-DroppedSection_b.ll > %t2.bc
 ; RUN: llvm-link %t.bc %t2.bc -o %t3.bc
-; RUN: llvm-dis < %t3.bc | grep ".data.init_task"
+; RUN: llvm-dis < %t3.bc | FileCheck %s
+; CHECK: .data.init_task
 
 ; ModuleID = 't.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Linker/2008-03-07-DroppedSection_b.ll b/test/Linker/2008-03-07-DroppedSection_b.ll
index 63b64f6..9bcb80d 100644
--- a/test/Linker/2008-03-07-DroppedSection_b.ll
+++ b/test/Linker/2008-03-07-DroppedSection_b.ll
@@ -1,7 +1,8 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: llvm-as < %p/2008-03-07-DroppedSection_a.ll > %t2.bc
 ; RUN: llvm-link %t.bc %t2.bc -o %t3.bc
-; RUN: llvm-dis < %t3.bc | grep ".data.init_task"
+; RUN: llvm-dis < %t3.bc | FileCheck %s
+; CHECK: .data.init_task
 
 ; ModuleID = 'u.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Linker/2008-06-26-AddressSpace.ll b/test/Linker/2008-06-26-AddressSpace.ll
index e1d3574..d4310bc 100644
--- a/test/Linker/2008-06-26-AddressSpace.ll
+++ b/test/Linker/2008-06-26-AddressSpace.ll
@@ -2,8 +2,9 @@
 ; in different modules.
 ; RUN: llvm-as %s -o %t.foo1.bc
 ; RUN: echo | llvm-as -o %t.foo2.bc
-; RUN: llvm-link %t.foo2.bc %t.foo1.bc -S | grep "addrspace(2)"
-; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S | grep "addrspace(2)"
+; RUN: llvm-link %t.foo2.bc %t.foo1.bc -S | FileCheck %s
+; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S | FileCheck %s
+; CHECK: addrspace(2)
 ; rdar://6038021
 
 @G = addrspace(2) global i32 256 
diff --git a/test/Linker/2011-08-18-unique-class-type.ll b/test/Linker/2011-08-18-unique-class-type.ll
index cae1245..328e83b 100644
--- a/test/Linker/2011-08-18-unique-class-type.ll
+++ b/test/Linker/2011-08-18-unique-class-type.ll
@@ -1,4 +1,6 @@
-; RUN: llvm-link %s %p/2011-08-18-unique-class-type2.ll -S -o - | grep DW_TAG_class_type | count 1
+; RUN: llvm-link %s %p/2011-08-18-unique-class-type2.ll -S -o - | FileCheck %s
+; CHECK: DW_TAG_class_type
+; CHECK-NOT: DW_TAG_class_type
 ; Test to check there is only one MDNode for class A after linking.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/test/Linker/2011-08-18-unique-debug-type.ll b/test/Linker/2011-08-18-unique-debug-type.ll
index 696fdb3..cc0df4d 100644
--- a/test/Linker/2011-08-18-unique-debug-type.ll
+++ b/test/Linker/2011-08-18-unique-debug-type.ll
@@ -1,6 +1,6 @@
-
-; RUN: llvm-link %s %p/2011-08-18-unique-debug-type2.ll -S -o - | grep "int" | grep -v "^; ModuleID" | count 1
+; RUN: llvm-link %s %p/2011-08-18-unique-debug-type2.ll -S -o - | FileCheck %s
 ; Test to check only one MDNode for "int" after linking.
+; CHECK: !"int"
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
 
diff --git a/test/Linker/AppendingLinkage.ll b/test/Linker/AppendingLinkage.ll
index 014ead9..5beff5a 100644
--- a/test/Linker/AppendingLinkage.ll
+++ b/test/Linker/AppendingLinkage.ll
@@ -3,7 +3,8 @@
 ; RUN: echo "@X = appending global [1 x i32] [i32 8] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep 7 | grep 4 | grep 8
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: [i32 7, i32 4, i32 8]
 
 @X = appending global [2 x i32] [ i32 7, i32 4 ]		; <[2 x i32]*> [#uses=2]
 @Y = global i32* getelementptr ([2 x i32]* @X, i64 0, i64 0)		; <i32**> [#uses=0]
diff --git a/test/Linker/AppendingLinkage2.ll b/test/Linker/AppendingLinkage2.ll
index 7385efb..341ca16 100644
--- a/test/Linker/AppendingLinkage2.ll
+++ b/test/Linker/AppendingLinkage2.ll
@@ -3,6 +3,7 @@
 ; RUN: echo "@X = appending global [1 x i32] [i32 8] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep 7 | grep 8
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: [i32 7, i32 8]
 
 @X = appending global [1 x i32] [ i32 7 ]		; <[1 x i32]*> [#uses=0]
diff --git a/test/Linker/ConstantGlobals1.ll b/test/Linker/ConstantGlobals1.ll
index 716eb3d..a2bb6fb 100644
--- a/test/Linker/ConstantGlobals1.ll
+++ b/test/Linker/ConstantGlobals1.ll
@@ -3,7 +3,8 @@
 ; RUN: echo "@X = constant [1 x i32] [i32 8] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: constant
 
 @X = external global [1 x i32]		; <[1 x i32]*> [#uses=0]
 
diff --git a/test/Linker/ConstantGlobals2.ll b/test/Linker/ConstantGlobals2.ll
index ad0f8e2..4713779 100644
--- a/test/Linker/ConstantGlobals2.ll
+++ b/test/Linker/ConstantGlobals2.ll
@@ -3,7 +3,8 @@
 ; RUN: echo "@X = external global [1 x i32] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: constant
 
 @X = constant [1 x i32] [ i32 12 ]		; <[1 x i32]*> [#uses=0]
 
diff --git a/test/Linker/ConstantGlobals3.ll b/test/Linker/ConstantGlobals3.ll
index 5aa26bc..6b4ed24 100644
--- a/test/Linker/ConstantGlobals3.ll
+++ b/test/Linker/ConstantGlobals3.ll
@@ -3,6 +3,7 @@
 ; RUN: echo "@X = external constant [1 x i32] " | \
 ; RUN:   llvm-as > %t.2.bc
 ; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
+; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; CHECK: constant
 
 @X = external global [1 x i32]		; <[1 x i32]*> [#uses=0]
diff --git a/test/Linker/link-global-to-func.ll b/test/Linker/link-global-to-func.ll
index 9d969d7..4d83fe5 100644
--- a/test/Linker/link-global-to-func.ll
+++ b/test/Linker/link-global-to-func.ll
@@ -1,7 +1,8 @@
 ; RUN: llvm-as %s -o %t1.bc
 ; RUN: echo "declare void @__eprintf(i8*, i8*, i32, i8*) noreturn     define void @foo() {      tail call void @__eprintf( i8* undef, i8* undef, i32 4, i8* null ) noreturn nounwind       unreachable }" | llvm-as -o %t2.bc
-; RUN: llvm-link %t2.bc %t1.bc -S | grep __eprintf
-; RUN: llvm-link %t1.bc %t2.bc -S | grep __eprintf
+; RUN: llvm-link %t2.bc %t1.bc -S | FileCheck %s
+; RUN: llvm-link %t1.bc %t2.bc -S | FileCheck %s
+; CHECK: __eprintf
 
 ; rdar://6072702
 
diff --git a/test/Linker/linknamedmdnode.ll b/test/Linker/linknamedmdnode.ll
index e6b779f..73e7554 100644
--- a/test/Linker/linknamedmdnode.ll
+++ b/test/Linker/linknamedmdnode.ll
@@ -1,6 +1,7 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: llvm-as < %p/linknamedmdnode2.ll > %t2.bc
-; RUN: llvm-link %t.bc %t2.bc -S | grep "!llvm.stuff = !{!0, !1}"
+; RUN: llvm-link %t.bc %t2.bc -S | FileCheck %s
+; CHECK: !llvm.stuff = !{!0, !1}
 
 !0 = metadata !{i32 42}
 !llvm.stuff = !{!0}
diff --git a/test/Linker/redefinition.ll b/test/Linker/redefinition.ll
index 23ba6a1..64a8c34 100644
--- a/test/Linker/redefinition.ll
+++ b/test/Linker/redefinition.ll
@@ -3,8 +3,7 @@
 ; RUN: llvm-as %s -o %t.foo1.bc
 ; RUN: llvm-as %s -o %t.foo2.bc
 ; RUN: echo "define void @foo(i32 %x) { ret void }" | llvm-as -o %t.foo3.bc
-; RUN: not llvm-link %t.foo1.bc %t.foo2.bc -o %t.bc 2>&1 | \
-; RUN:   grep "symbol multiply defined"
-; RUN: not llvm-link %t.foo1.bc %t.foo3.bc -o %t.bc 2>&1 | \
-; RUN:   grep "symbol multiply defined"
+; RUN: not llvm-link %t.foo1.bc %t.foo2.bc -o %t.bc 2>&1 | FileCheck %s
+; RUN: not llvm-link %t.foo1.bc %t.foo3.bc -o %t.bc 2>&1 | FileCheck %s
+; CHECK: symbol multiply defined
 define void @foo() { ret void }
diff --git a/test/Linker/weakextern.ll b/test/Linker/weakextern.ll
index 3a72a48..b9f2584 100644
--- a/test/Linker/weakextern.ll
+++ b/test/Linker/weakextern.ll
@@ -1,9 +1,10 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: llvm-as < %p/testlink1.ll > %t2.bc
 ; RUN: llvm-link %t.bc %t.bc %t2.bc -o %t1.bc
-; RUN: llvm-dis < %t1.bc | grep "kallsyms_names = extern_weak"
-; RUN: llvm-dis < %t1.bc | grep "MyVar = external global i32"
-; RUN: llvm-dis < %t1.bc | grep "Inte = global i32"
+; RUN: llvm-dis < %t1.bc | FileCheck %s
+; CHECK: kallsyms_names = extern_weak
+; CHECK: Inte = global i32
+; CHECK: MyVar = external global i32
 
 @kallsyms_names = extern_weak global [0 x i8]		; <[0 x i8]*> [#uses=0]
 @MyVar = extern_weak global i32		; <i32*> [#uses=0]
diff --git a/test/MC/AArch64/elf-globaladdress.ll b/test/MC/AArch64/elf-globaladdress.ll
index 190439d..942920b 100644
--- a/test/MC/AArch64/elf-globaladdress.ll
+++ b/test/MC/AArch64/elf-globaladdress.ll
@@ -1,10 +1,10 @@
 ;; RUN: llc -mtriple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
-;; RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+;; RUN:   llvm-readobj -h -r | FileCheck -check-prefix=OBJ %s
 
 ; Also take it on a round-trip through llvm-mc to stretch assembly-parsing's legs:
 ;; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | \
-;; RUN:     llvm-mc -arch=aarch64 -filetype=obj -o - | \
-;; RUN:     elf-dump | FileCheck -check-prefix=OBJ %s
+;; RUN:     llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj -o - | \
+;; RUN:     llvm-readobj -h -r | FileCheck -check-prefix=OBJ %s
 
 @var8 = global i8 0
 @var16 = global i16 0
@@ -35,77 +35,28 @@ define void @address() {
 }
 
 ; Check we're using EM_AARCH64
-; OBJ: 'e_machine', 0x00
-
-; OBJ: .rela.text
-
-; var8
-; R_AARCH64_ADR_PREL_PG_HI21 against var8
-; OBJ: 'r_sym', 0x0000000f
-; OBJ-NEXT: 'r_type', 0x00000113
-
-; R_AARCH64_LDST8_ABS_LO12_NC against var8
-; OBJ: 'r_sym', 0x0000000f
-; OBJ-NEXT: 'r_type', 0x00000116
-
-
-; var16
-; R_AARCH64_ADR_PREL_PG_HI21 against var16
-; OBJ: 'r_sym', 0x0000000c
-; OBJ-NEXT: 'r_type', 0x00000113
-
-; R_AARCH64_LDST16_ABS_LO12_NC against var16
-; OBJ: 'r_sym', 0x0000000c
-; OBJ-NEXT: 'r_type', 0x0000011c
-
-
-; var32
-; R_AARCH64_ADR_PREL_PG_HI21 against var32
-; OBJ: 'r_sym', 0x0000000d
-; OBJ-NEXT: 'r_type', 0x00000113
-
-; R_AARCH64_LDST32_ABS_LO12_NC against var32
-; OBJ: 'r_sym', 0x0000000d
-; OBJ-NEXT: 'r_type', 0x0000011d
-
-
-; var64
-; R_AARCH64_ADR_PREL_PG_HI21 against var64
-; OBJ: 'r_sym', 0x0000000e
-; OBJ-NEXT: 'r_type', 0x00000113
-
-; R_AARCH64_LDST64_ABS_LO12_NC against var64
-; OBJ: 'r_sym', 0x0000000e
-; OBJ-NEXT: 'r_type', 0x0000011e
+; OBJ: ElfHeader {
+; OBJ:   Machine: EM_AARCH64
+; OBJ: }
+
+; OBJ: Relocations [
+; OBJ:   Section (1) .text {
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var8
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_LDST8_ABS_LO12_NC  var8
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var16
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_LDST16_ABS_LO12_NC var16
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var32
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_LDST32_ABS_LO12_NC var32
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var64
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_LDST64_ABS_LO12_NC var64
 
 ; This is on the store, so not really important, but it stops the next
 ; match working.
-; R_AARCH64_LDST64_ABS_LO12_NC against var64
-; OBJ: 'r_sym', 0x0000000e
-; OBJ-NEXT: 'r_type', 0x0000011e
-
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_LDST64_ABS_LO12_NC var64
 
 ; Pure address-calculation against var64
-; R_AARCH64_ADR_PREL_PG_HI21 against var64
-; OBJ: 'r_sym', 0x0000000e
-; OBJ-NEXT: 'r_type', 0x00000113
-
-; R_AARCH64_ADD_ABS_LO12_NC against var64
-; OBJ: 'r_sym', 0x0000000e
-; OBJ-NEXT: 'r_type', 0x00000115
-
-
-; Make sure the symbols don't move around, otherwise relocation info
-; will be wrong:
-
-; OBJ: Symbol 12
-; OBJ-NEXT: var16
-
-; OBJ: Symbol 13
-; OBJ-NEXT: var32
-
-; OBJ: Symbol 14
-; OBJ-NEXT: var64
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21   var64
+; OBJ:     0x{{[0-9,A-F]+}} R_AARCH64_ADD_ABS_LO12_NC    var64
 
-; OBJ: Symbol 15
-; OBJ-NEXT: var8
+; OBJ:   }
+; OBJ: ]
diff --git a/test/MC/AArch64/elf-objdump.s b/test/MC/AArch64/elf-objdump.s
index c5aa5b1..51d444a 100644
--- a/test/MC/AArch64/elf-objdump.s
+++ b/test/MC/AArch64/elf-objdump.s
@@ -1,5 +1,5 @@
 // 64 bit little endian
-// RUN: llvm-mc -filetype=obj -arch=aarch64 -triple aarch64-none-linux-gnu %s -o - | llvm-objdump -d
+// RUN: llvm-mc -filetype=obj -triple aarch64-none-linux-gnu %s -o - | llvm-objdump -d
 
 // We just want to see if llvm-objdump works at all.
 // CHECK: .text
diff --git a/test/MC/AArch64/elf-reloc-addsubimm.s b/test/MC/AArch64/elf-reloc-addsubimm.s
index 7fa6e90..0321dda 100644
--- a/test/MC/AArch64/elf-reloc-addsubimm.s
+++ b/test/MC/AArch64/elf-reloc-addsubimm.s
@@ -1,13 +1,10 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         add x2, x3, #:lo12:some_label
-// OBJ: .rela.text
 
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000115
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: some_label
-\ No newline at end of file
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0 R_AARCH64_ADD_ABS_LO12_NC some_label 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-condbr.s b/test/MC/AArch64/elf-reloc-condbr.s
index 283d3b9..684e75a 100644
--- a/test/MC/AArch64/elf-reloc-condbr.s
+++ b/test/MC/AArch64/elf-reloc-condbr.s
@@ -1,13 +1,10 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         b.eq somewhere
-// OBJ: .rela.text
 
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000118
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: somewhere
-\ No newline at end of file
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0 R_AARCH64_CONDBR19 somewhere 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-ldrlit.s b/test/MC/AArch64/elf-reloc-ldrlit.s
index ce9ff49..de43c4f 100644
--- a/test/MC/AArch64/elf-reloc-ldrlit.s
+++ b/test/MC/AArch64/elf-reloc-ldrlit.s
@@ -1,28 +1,16 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         ldr x0, some_label
         ldr w3, some_label
         ldrsw x9, some_label
         prfm pldl3keep, some_label
-// OBJ: .rela.text
 
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000111
-
-// OBJ: 'r_offset', 0x0000000000000004
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000111
-
-// OBJ: 'r_offset', 0x0000000000000008
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000111
-
-// OBJ: 'r_offset', 0x000000000000000c
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000111
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: some_label
-\ No newline at end of file
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0 R_AARCH64_LD_PREL_LO19 some_label 0x0
+// OBJ-NEXT:     0x4 R_AARCH64_LD_PREL_LO19 some_label 0x0
+// OBJ-NEXT:     0x8 R_AARCH64_LD_PREL_LO19 some_label 0x0
+// OBJ-NEXT:     0xC R_AARCH64_LD_PREL_LO19 some_label 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-ldstunsimm.s b/test/MC/AArch64/elf-reloc-ldstunsimm.s
index 345fc82..e1f841bd 100644
--- a/test/MC/AArch64/elf-reloc-ldstunsimm.s
+++ b/test/MC/AArch64/elf-reloc-ldstunsimm.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         ldrb w0, [sp, #:lo12:some_label]
         ldrh w0, [sp, #:lo12:some_label]
@@ -7,28 +7,12 @@
         ldr x0, [sp, #:lo12:some_label]
         str q0, [sp, #:lo12:some_label]
 
-// OBJ: .rela.text
-
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000116
-
-// OBJ: 'r_offset', 0x0000000000000004
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000011c
-
-// OBJ: 'r_offset', 0x0000000000000008
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000011d
-
-// OBJ: 'r_offset', 0x000000000000000c
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000011e
-
-// OBJ: 'r_offset', 0x0000000000000010
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000012b
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: some_label
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0  R_AARCH64_LDST8_ABS_LO12_NC   some_label 0x0
+// OBJ-NEXT:     0x4  R_AARCH64_LDST16_ABS_LO12_NC  some_label 0x0
+// OBJ-NEXT:     0x8  R_AARCH64_LDST32_ABS_LO12_NC  some_label 0x0
+// OBJ-NEXT:     0xC  R_AARCH64_LDST64_ABS_LO12_NC  some_label 0x0
+// OBJ-NEXT:     0x10 R_AARCH64_LDST128_ABS_LO12_NC some_label 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-movw.s b/test/MC/AArch64/elf-reloc-movw.s
index cb7dc67..8a7e532 100644
--- a/test/MC/AArch64/elf-reloc-movw.s
+++ b/test/MC/AArch64/elf-reloc-movw.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         movz x0, #:abs_g0:some_label
         movk x0, #:abs_g0_nc:some_label
@@ -21,78 +21,22 @@
 
         movz x19, #:abs_g2_s:some_label
         movn x19, #:abs_g2_s:some_label
-// OBJ: .rela.text
 
-// :abs_g0: => R_AARCH64_MOVW_UABS_G0
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000107
-
-// :abs_g0_nc: => R_AARCH64_MOVW_UABS_G0_NC
-// OBJ: 'r_offset', 0x0000000000000004
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000108
-
-// :abs_g1: => R_AARCH64_MOVW_UABS_G1
-// OBJ: 'r_offset', 0x0000000000000008
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000109
-
-// :abs_g1_nc: => R_AARCH64_MOVW_UABS_G1_NC
-// OBJ: 'r_offset', 0x000000000000000c
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010a
-
-// :abs_g2: => R_AARCH64_MOVW_UABS_G2
-// OBJ: 'r_offset', 0x0000000000000010
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010b
-
-// :abs_g2_nc: => R_AARCH64_MOVW_UABS_G2_NC
-// OBJ: 'r_offset', 0x0000000000000014
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010c
-
-// :abs_g3: => R_AARCH64_MOVW_UABS_G3
-// OBJ: 'r_offset', 0x0000000000000018
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010d
-
-// :abs_g3: => R_AARCH64_MOVW_UABS_G3
-// OBJ: 'r_offset', 0x000000000000001c
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010d
-
-// :abs_g0_s: => R_AARCH64_MOVW_SABS_G0
-// OBJ: 'r_offset', 0x0000000000000020
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010e
-
-// :abs_g0_s: => R_AARCH64_MOVW_SABS_G0
-// OBJ: 'r_offset', 0x0000000000000024
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010e
-
-// :abs_g1_s: => R_AARCH64_MOVW_SABS_G1
-// OBJ: 'r_offset', 0x0000000000000028
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010f
-
-// :abs_g1_s: => R_AARCH64_MOVW_SABS_G1
-// OBJ: 'r_offset', 0x000000000000002c
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000010f
-
-// :abs_g2_s: => R_AARCH64_MOVW_SABS_G2
-// OBJ: 'r_offset', 0x0000000000000030
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000110
-
-// :abs_g2_s: => R_AARCH64_MOVW_SABS_G2
-// OBJ: 'r_offset', 0x0000000000000034
-// OBJ:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000110
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: some_label
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0  R_AARCH64_MOVW_UABS_G0    some_label 0x0
+// OBJ-NEXT:     0x4  R_AARCH64_MOVW_UABS_G0_NC some_label 0x0
+// OBJ-NEXT:     0x8  R_AARCH64_MOVW_UABS_G1    some_label 0x0
+// OBJ-NEXT:     0xC  R_AARCH64_MOVW_UABS_G1_NC some_label 0x0
+// OBJ-NEXT:     0x10 R_AARCH64_MOVW_UABS_G2    some_label 0x0
+// OBJ-NEXT:     0x14 R_AARCH64_MOVW_UABS_G2_NC some_label 0x0
+// OBJ-NEXT:     0x18 R_AARCH64_MOVW_UABS_G3    some_label 0x0
+// OBJ-NEXT:     0x1C R_AARCH64_MOVW_UABS_G3    some_label 0x0
+// OBJ-NEXT:     0x20 R_AARCH64_MOVW_SABS_G0    some_label 0x0
+// OBJ-NEXT:     0x24 R_AARCH64_MOVW_SABS_G0    some_label 0x0
+// OBJ-NEXT:     0x28 R_AARCH64_MOVW_SABS_G1    some_label 0x0
+// OBJ-NEXT:     0x2C R_AARCH64_MOVW_SABS_G1    some_label 0x0
+// OBJ-NEXT:     0x30 R_AARCH64_MOVW_SABS_G2    some_label 0x0
+// OBJ-NEXT:     0x34 R_AARCH64_MOVW_SABS_G2    some_label 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-pcreladdressing.s b/test/MC/AArch64/elf-reloc-pcreladdressing.s
index 39a8ba9..b5f0727 100644
--- a/test/MC/AArch64/elf-reloc-pcreladdressing.s
+++ b/test/MC/AArch64/elf-reloc-pcreladdressing.s
@@ -1,29 +1,17 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         adr x2, some_label
         adrp x5, some_label
 
         adrp x5, :got:some_label
         ldr x0, [x5, #:got_lo12:some_label]
-// OBJ: .rela.text
 
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000112
-
-// OBJ: 'r_offset', 0x0000000000000004
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000113
-
-// OBJ: 'r_offset', 0x0000000000000008
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000137
-
-// OBJ: 'r_offset', 0x000000000000000c
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000138
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: some_label
-\ No newline at end of file
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0 R_AARCH64_ADR_PREL_LO21    some_label 0x0
+// OBJ-NEXT:     0x4 R_AARCH64_ADR_PREL_PG_HI21 some_label 0x0
+// OBJ-NEXT:     0x8 R_AARCH64_ADR_GOT_PAGE     some_label 0x0
+// OBJ-NEXT:     0xC R_AARCH64_LD64_GOT_LO12_NC some_label 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-tstb.s b/test/MC/AArch64/elf-reloc-tstb.s
index c5e2981..037e896 100644
--- a/test/MC/AArch64/elf-reloc-tstb.s
+++ b/test/MC/AArch64/elf-reloc-tstb.s
@@ -1,18 +1,12 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         tbz x6, #45, somewhere
         tbnz w3, #15, somewhere
-// OBJ: .rela.text
 
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000117
-
-// OBJ: 'r_offset', 0x0000000000000004
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x00000117
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: somewhere
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0  R_AARCH64_TSTBR14 somewhere 0x0
+// OBJ-NEXT:     0x4  R_AARCH64_TSTBR14 somewhere 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/elf-reloc-uncondbrimm.s b/test/MC/AArch64/elf-reloc-uncondbrimm.s
index 0e97bc6..bead07c 100644
--- a/test/MC/AArch64/elf-reloc-uncondbrimm.s
+++ b/test/MC/AArch64/elf-reloc-uncondbrimm.s
@@ -1,18 +1,12 @@
-// RUN: llvm-mc -arch=aarch64 -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj %s -o - | \
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         b somewhere
         bl somewhere
-// OBJ: .rela.text
 
-// OBJ: 'r_offset', 0x0000000000000000
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000011a
-
-// OBJ: 'r_offset', 0x0000000000000004
-// OBJ-NEXT:  'r_sym', 0x00000005
-// OBJ-NEXT: 'r_type', 0x0000011b
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: somewhere
-\ No newline at end of file
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0 R_AARCH64_JUMP26 somewhere 0x0
+// OBJ-NEXT:     0x4 R_AARCH64_CALL26 somewhere 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s
index 690fa8c..d0e336e 100644
--- a/test/MC/AArch64/tls-relocs.s
+++ b/test/MC/AArch64/tls-relocs.s
@@ -1,9 +1,6 @@
-// RUN: llvm-mc -arch=aarch64 -show-encoding < %s | FileCheck %s
-// RUN: llvm-mc -arch=aarch64 -filetype=obj < %s -o %t
-// RUN: elf-dump %t | FileCheck --check-prefix=CHECK-ELF %s
-// RUN: llvm-objdump -r %t | FileCheck --check-prefix=CHECK-ELF-NAMES %s
-
-// CHECK-ELF:  .rela.text
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -filetype=obj < %s -o - | \
+// RUN:   llvm-readobj -r -t | FileCheck --check-prefix=CHECK-ELF %s
 
         // TLS local-dynamic forms
         movz x1, #:dtprel_g2:var
@@ -12,34 +9,20 @@
         movn x4, #:dtprel_g2:var
 // CHECK: movz    x1, #:dtprel_g2:var     // encoding: [0x01'A',A,0xc0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
-// CHECK-NEXT: movn    x2, #:dtprel_g2:var     // encoding: [0x02'A',A,0xc0'A',0x92'A']
+// CHECK: movn    x2, #:dtprel_g2:var     // encoding: [0x02'A',A,0xc0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
-// CHECK-NEXT: movz    x3, #:dtprel_g2:var     // encoding: [0x03'A',A,0xc0'A',0x92'A']
+// CHECK: movz    x3, #:dtprel_g2:var     // encoding: [0x03'A',A,0xc0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
-// CHECK-NEXT: movn    x4, #:dtprel_g2:var     // encoding: [0x04'A',A,0xc0'A',0x92'A']
+// CHECK: movn    x4, #:dtprel_g2:var     // encoding: [0x04'A',A,0xc0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g2:var, kind: fixup_a64_movw_dtprel_g2
 
-// CHECK-ELF: # Relocation 0
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000000)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM:0x[0-9a-f]+]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
-// CHECK-ELF: # Relocation 1
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000004)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
-// CHECK-ELF: # Relocation 2
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000008)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
-// CHECK-ELF: # Relocation 3
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000000c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020b)
-
-// CHECK-ELF-NAMES: 0 R_AARCH64_TLSLD_MOVW_DTPREL_G2
-// CHECK-ELF-NAMES: 4 R_AARCH64_TLSLD_MOVW_DTPREL_G2
-// CHECK-ELF-NAMES: 8 R_AARCH64_TLSLD_MOVW_DTPREL_G2
-// CHECK-ELF-NAMES: 12 R_AARCH64_TLSLD_MOVW_DTPREL_G2
+// CHECK-ELF:      Relocations [
+// CHECK-ELF-NEXT:   Section (1) .text {
+// CHECK-ELF-NEXT:     0x0 R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM:[^ ]+]]
+// CHECK-ELF-NEXT:     0x4 R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x8 R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xC R_AARCH64_TLSLD_MOVW_DTPREL_G2 [[VARSYM]]
+
 
         movz x5, #:dtprel_g1:var
         movn x6, #:dtprel_g1:var
@@ -54,46 +37,22 @@
 // CHECK-NEXT: movn    w8, #:dtprel_g1:var     // encoding: [0x08'A',A,0xa0'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1:var, kind: fixup_a64_movw_dtprel_g1
 
-// CHECK-ELF: # Relocation 4
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000010)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
-// CHECK-ELF: # Relocation 5
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000014)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
-// CHECK-ELF: # Relocation 6
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000018)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
-// CHECK-ELF: # Relocation 7
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000001c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020c)
-
-// CHECK-ELF-NAMES: 16 R_AARCH64_TLSLD_MOVW_DTPREL_G1
-// CHECK-ELF-NAMES: 20 R_AARCH64_TLSLD_MOVW_DTPREL_G1
-// CHECK-ELF-NAMES: 24 R_AARCH64_TLSLD_MOVW_DTPREL_G1
-// CHECK-ELF-NAMES: 28 R_AARCH64_TLSLD_MOVW_DTPREL_G1
+// CHECK-ELF-NEXT:     0x10 R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x14 R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x18 R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x1C R_AARCH64_TLSLD_MOVW_DTPREL_G1 [[VARSYM]]
+
 
         movk x9, #:dtprel_g1_nc:var
         movk w10, #:dtprel_g1_nc:var
 // CHECK: movk    x9, #:dtprel_g1_nc:var  // encoding: [0x09'A',A,0xa0'A',0xf2'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc
-// CHECK-NEXT: movk    w10, #:dtprel_g1_nc:var // encoding: [0x0a'A',A,0xa0'A',0x72'A']
+// CHECK: movk    w10, #:dtprel_g1_nc:var // encoding: [0x0a'A',A,0xa0'A',0x72'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g1_nc:var, kind: fixup_a64_movw_dtprel_g1_nc
 
-// CHECK-ELF: # Relocation 8
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000020)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020d)
-// CHECK-ELF: # Relocation 9
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000024)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020d)
+// CHECK-ELF-NEXT:     0x20 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x24 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 32 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC
-// CHECK-ELF-NAMES: 36 R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC
 
         movz x11, #:dtprel_g0:var
         movn x12, #:dtprel_g0:var
@@ -101,275 +60,156 @@
         movn w14, #:dtprel_g0:var
 // CHECK: movz    x11, #:dtprel_g0:var    // encoding: [0x0b'A',A,0x80'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
-// CHECK-NEXT: movn    x12, #:dtprel_g0:var    // encoding: [0x0c'A',A,0x80'A',0x92'A']
+// CHECK: movn    x12, #:dtprel_g0:var    // encoding: [0x0c'A',A,0x80'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
-// CHECK-NEXT: movz    w13, #:dtprel_g0:var    // encoding: [0x0d'A',A,0x80'A',0x12'A']
+// CHECK: movz    w13, #:dtprel_g0:var    // encoding: [0x0d'A',A,0x80'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0:var, kind: fixup_a64_movw_dtprel_g0
-// CHECK-NEXT: movn    w14, #:dtprel_g0:var    // encoding: [0x0e'A',A,0x80'A',0x12'A']
-
-
-// CHECK-ELF: # Relocation 10
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000028)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
-// CHECK-ELF: # Relocation 11
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000002c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
-// CHECK-ELF: # Relocation 12
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000030)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
-// CHECK-ELF: # Relocation 13
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000034)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020e)
-
-// CHECK-ELF-NAMES: 40 R_AARCH64_TLSLD_MOVW_DTPREL_G0
-// CHECK-ELF-NAMES: 44 R_AARCH64_TLSLD_MOVW_DTPREL_G0
-// CHECK-ELF-NAMES: 48 R_AARCH64_TLSLD_MOVW_DTPREL_G0
-// CHECK-ELF-NAMES: 52 R_AARCH64_TLSLD_MOVW_DTPREL_G0
+// CHECK: movn    w14, #:dtprel_g0:var    // encoding: [0x0e'A',A,0x80'A',0x12'A']
+
+// CHECK-ELF-NEXT:     0x28 R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x2C R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x30 R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x34 R_AARCH64_TLSLD_MOVW_DTPREL_G0 [[VARSYM]]
 
 
         movk x15, #:dtprel_g0_nc:var
         movk w16, #:dtprel_g0_nc:var
 // CHECK: movk    x15, #:dtprel_g0_nc:var // encoding: [0x0f'A',A,0x80'A',0xf2'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc
-// CHECK-NEXT: movk    w16, #:dtprel_g0_nc:var // encoding: [0x10'A',A,0x80'A',0x72'A']
+// CHECK: movk    w16, #:dtprel_g0_nc:var // encoding: [0x10'A',A,0x80'A',0x72'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_g0_nc:var, kind: fixup_a64_movw_dtprel_g0_nc
 
-// CHECK-ELF: # Relocation 14
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000038)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020f)
-// CHECK-ELF: # Relocation 15
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000003c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000020f)
+// CHECK-ELF-NEXT:     0x38 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x3C R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 56 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC
-// CHECK-ELF-NAMES: 60 R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC
 
         add x17, x18, #:dtprel_hi12:var, lsl #12
         add w19, w20, #:dtprel_hi12:var, lsl #12
 // CHECK: add     x17, x18, #:dtprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12
-// CHECK-NEXT: add     w19, w20, #:dtprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
+// CHECK: add     w19, w20, #:dtprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_hi12:var, kind: fixup_a64_add_dtprel_hi12
 
-// CHECK-ELF: # Relocation 16
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000040)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000210)
-// CHECK-ELF: # Relocation 17
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000044)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000210)
-
-// CHECK-ELF-NAMES: 64 R_AARCH64_TLSLD_ADD_DTPREL_HI12
-// CHECK-ELF-NAMES: 68 R_AARCH64_TLSLD_ADD_DTPREL_HI12
+// CHECK-ELF-NEXT:     0x40 R_AARCH64_TLSLD_ADD_DTPREL_HI12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x44 R_AARCH64_TLSLD_ADD_DTPREL_HI12 [[VARSYM]]
 
 
         add x21, x22, #:dtprel_lo12:var
         add w23, w24, #:dtprel_lo12:var
 // CHECK: add     x21, x22, #:dtprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12
-// CHECK-NEXT: add     w23, w24, #:dtprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
+// CHECK: add     w23, w24, #:dtprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_add_dtprel_lo12
 
-// CHECK-ELF: # Relocation 18
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000048)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000211)
-// CHECK-ELF: # Relocation 19
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000004c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000211)
+// CHECK-ELF-NEXT:     0x48 R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x4C R_AARCH64_TLSLD_ADD_DTPREL_LO12 [[VARSYM]]
 
-// CHECK-ELF-NAMES: 72 R_AARCH64_TLSLD_ADD_DTPREL_LO12
-// CHECK-ELF-NAMES: 76 R_AARCH64_TLSLD_ADD_DTPREL_LO12
 
         add x25, x26, #:dtprel_lo12_nc:var
         add w27, w28, #:dtprel_lo12_nc:var
 // CHECK: add     x25, x26, #:dtprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc
-// CHECK-NEXT: add     w27, w28, #:dtprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
+// CHECK: add     w27, w28, #:dtprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_add_dtprel_lo12_nc
 
-// CHECK-ELF: # Relocation 20
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000050)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000212)
-// CHECK-ELF: # Relocation 21
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000054)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000212)
+// CHECK-ELF-NEXT:     0x50 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x54 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 80 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
-// CHECK-ELF-NAMES: 84 R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
 
         ldrb w29, [x30, #:dtprel_lo12:var]
         ldrsb x29, [x28, #:dtprel_lo12_nc:var]
 // CHECK: ldrb    w29, [x30, #:dtprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst8_dtprel_lo12
-// CHECK-NEXT: ldrsb   x29, [x28, #:dtprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
+// CHECK: ldrsb   x29, [x28, #:dtprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst8_dtprel_lo12_nc
 
-// CHECK-ELF: # Relocation 22
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000058)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000213)
-// CHECK-ELF: # Relocation 23
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000005c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000214)
+// CHECK-ELF-NEXT:     0x58 R_AARCH64_TLSLD_LDST8_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x5C R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 88 R_AARCH64_TLSLD_LDST8_DTPREL_LO12
-// CHECK-ELF-NAMES: 92 R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC
 
         strh w27, [x26, #:dtprel_lo12:var]
         ldrsh x25, [x24, #:dtprel_lo12_nc:var]
 // CHECK: strh    w27, [x26, #:dtprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst16_dtprel_lo12
-// CHECK-NEXT: ldrsh   x25, [x24, #:dtprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
+// CHECK: ldrsh   x25, [x24, #:dtprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst16_dtprel_lo12_n
 
-// CHECK-ELF: # Relocation 24
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000060)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000215)
-// CHECK-ELF: # Relocation 25
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000064)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000216)
+// CHECK-ELF-NEXT:     0x60 R_AARCH64_TLSLD_LDST16_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x64 R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 96 R_AARCH64_TLSLD_LDST16_DTPREL_LO12
-// CHECK-ELF-NAMES: 100 R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC
 
         ldr w23, [x22, #:dtprel_lo12:var]
         ldrsw x21, [x20, #:dtprel_lo12_nc:var]
 // CHECK: ldr     w23, [x22, #:dtprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst32_dtprel_lo12
-// CHECK-NEXT: ldrsw   x21, [x20, #:dtprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
+// CHECK: ldrsw   x21, [x20, #:dtprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst32_dtprel_lo12_n
 
-// CHECK-ELF: # Relocation 26
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000068)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000217)
-// CHECK-ELF: # Relocation 27
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000006c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000218)
+// CHECK-ELF-NEXT:     0x68 R_AARCH64_TLSLD_LDST32_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x6C R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 104 R_AARCH64_TLSLD_LDST32_DTPREL_LO12
-// CHECK-ELF-NAMES: 108 R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC
 
         ldr x19, [x18, #:dtprel_lo12:var]
         str x17, [x16, #:dtprel_lo12_nc:var]
 // CHECK: ldr     x19, [x18, #:dtprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12:var, kind: fixup_a64_ldst64_dtprel_lo12
-// CHECK-NEXT: str     x17, [x16, #:dtprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
+// CHECK: str     x17, [x16, #:dtprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :dtprel_lo12_nc:var, kind: fixup_a64_ldst64_dtprel_lo12_nc
 
 
-// CHECK-ELF: # Relocation 28
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000070)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000219)
-// CHECK-ELF: # Relocation 29
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000074)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021a)
+// CHECK-ELF-NEXT:     0x70 R_AARCH64_TLSLD_LDST64_DTPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x74 R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 112 R_AARCH64_TLSLD_LDST64_DTPREL_LO12
-// CHECK-ELF-NAMES: 116 R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC
 
         // TLS initial-exec forms
         movz x15, #:gottprel_g1:var
         movz w14, #:gottprel_g1:var
 // CHECK: movz    x15, #:gottprel_g1:var  // encoding: [0x0f'A',A,0xa0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1
-// CHECK-NEXT: movz    w14, #:gottprel_g1:var  // encoding: [0x0e'A',A,0xa0'A',0x12'A']
+// CHECK: movz    w14, #:gottprel_g1:var  // encoding: [0x0e'A',A,0xa0'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g1:var, kind: fixup_a64_movw_gottprel_g1
 
-// CHECK-ELF: # Relocation 30
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000078)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021b)
-// CHECK-ELF: # Relocation 31
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000007c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021b)
+// CHECK-ELF-NEXT:     0x78 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x7C R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 [[VARSYM]]
 
-// CHECK-ELF-NAMES: 120 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1
-// CHECK-ELF-NAMES: 124 R_AARCH64_TLSIE_MOVW_GOTTPREL_G1
 
         movk x13, #:gottprel_g0_nc:var
         movk w12, #:gottprel_g0_nc:var
 // CHECK: movk    x13, #:gottprel_g0_nc:var // encoding: [0x0d'A',A,0x80'A',0xf2'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc
-// CHECK-NEXT: movk    w12, #:gottprel_g0_nc:var // encoding: [0x0c'A',A,0x80'A',0x72'A']
+// CHECK: movk    w12, #:gottprel_g0_nc:var // encoding: [0x0c'A',A,0x80'A',0x72'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_g0_nc:var, kind: fixup_a64_movw_gottprel_g0_nc
 
-// CHECK-ELF: # Relocation 32
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000080)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021c)
-// CHECK-ELF: # Relocation 33
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000084)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021c)
+// CHECK-ELF-NEXT:     0x80 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x84 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 128 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC
-// CHECK-ELF-NAMES: 132 R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC
 
         adrp x11, :gottprel:var
         ldr x10, [x0, #:gottprel_lo12:var]
         ldr x9, :gottprel:var
 // CHECK: adrp    x11, :gottprel:var      // encoding: [0x0b'A',A,A,0x90'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_adr_gottprel_page
-// CHECK-NEXT: ldr     x10, [x0, #:gottprel_lo12:var] // encoding: [0x0a'A',A,0x40'A',0xf9'A']
+// CHECK: ldr     x10, [x0, #:gottprel_lo12:var] // encoding: [0x0a'A',A,0x40'A',0xf9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel_lo12:var, kind: fixup_a64_ld64_gottprel_lo12_nc
-// CHECK-NEXT: ldr     x9, :gottprel:var       // encoding: [0x09'A',A,A,0x58'A']
+// CHECK: ldr     x9, :gottprel:var       // encoding: [0x09'A',A,A,0x58'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :gottprel:var, kind: fixup_a64_ld_gottprel_prel19
 
-// CHECK-ELF: # Relocation 34
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000088)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021d)
-// CHECK-ELF: # Relocation 35
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000008c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021e)
-// CHECK-ELF: # Relocation 36
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000090)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000021f)
-
-// CHECK-ELF-NAMES: 136 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE
-// CHECK-ELF-NAMES: 140 R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC
-// CHECK-ELF-NAMES: 144 R_AARCH64_TLSIE_LD_GOTTPREL_PREL19
+// CHECK-ELF-NEXT:     0x88 R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x8C R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x90 R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 [[VARSYM]]
+
 
         // TLS local-exec forms
         movz x3, #:tprel_g2:var
         movn x4, #:tprel_g2:var
 // CHECK: movz    x3, #:tprel_g2:var      // encoding: [0x03'A',A,0xc0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2
-// CHECK-NEXT: movn    x4, #:tprel_g2:var      // encoding: [0x04'A',A,0xc0'A',0x92'A']
+// CHECK: movn    x4, #:tprel_g2:var      // encoding: [0x04'A',A,0xc0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g2:var, kind: fixup_a64_movw_tprel_g2
 
-// CHECK-ELF: # Relocation 37
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000094)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000220)
-// CHECK-ELF: # Relocation 38
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000098)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000220)
+// CHECK-ELF-NEXT:     0x94 R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x98 R_AARCH64_TLSLE_MOVW_TPREL_G2 [[VARSYM]]
 
-// CHECK-ELF-NAMES: 148 R_AARCH64_TLSLE_MOVW_TPREL_G2
-// CHECK-ELF-NAMES: 152 R_AARCH64_TLSLE_MOVW_TPREL_G2
 
         movz x5, #:tprel_g1:var
         movn x6, #:tprel_g1:var
@@ -377,53 +217,29 @@
         movn w8, #:tprel_g1:var
 // CHECK: movz    x5, #:tprel_g1:var      // encoding: [0x05'A',A,0xa0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
-// CHECK-NEXT: movn    x6, #:tprel_g1:var      // encoding: [0x06'A',A,0xa0'A',0x92'A']
+// CHECK: movn    x6, #:tprel_g1:var      // encoding: [0x06'A',A,0xa0'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
-// CHECK-NEXT: movz    w7, #:tprel_g1:var      // encoding: [0x07'A',A,0xa0'A',0x12'A']
+// CHECK: movz    w7, #:tprel_g1:var      // encoding: [0x07'A',A,0xa0'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
-// CHECK-NEXT: movn    w8, #:tprel_g1:var      // encoding: [0x08'A',A,0xa0'A',0x12'A']
+// CHECK: movn    w8, #:tprel_g1:var      // encoding: [0x08'A',A,0xa0'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1:var, kind: fixup_a64_movw_tprel_g1
 
-// CHECK-ELF: # Relocation 39
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000009c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
-// CHECK-ELF: # Relocation 40
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a0)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
-// CHECK-ELF: # Relocation 41
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a4)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
-// CHECK-ELF: # Relocation 42
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000a8)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000221)
-
-// CHECK-ELF-NAMES: 156 R_AARCH64_TLSLE_MOVW_TPREL_G1
-// CHECK-ELF-NAMES: 160 R_AARCH64_TLSLE_MOVW_TPREL_G1
-// CHECK-ELF-NAMES: 164 R_AARCH64_TLSLE_MOVW_TPREL_G1
-// CHECK-ELF-NAMES: 168 R_AARCH64_TLSLE_MOVW_TPREL_G1
+// CHECK-ELF-NEXT:     0x9C R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xA0 R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xA4 R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xA8 R_AARCH64_TLSLE_MOVW_TPREL_G1 [[VARSYM]]
+
 
         movk x9, #:tprel_g1_nc:var
         movk w10, #:tprel_g1_nc:var
 // CHECK: movk    x9, #:tprel_g1_nc:var   // encoding: [0x09'A',A,0xa0'A',0xf2'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc
-// CHECK-NEXT: movk    w10, #:tprel_g1_nc:var  // encoding: [0x0a'A',A,0xa0'A',0x72'A']
+// CHECK: movk    w10, #:tprel_g1_nc:var  // encoding: [0x0a'A',A,0xa0'A',0x72'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g1_nc:var, kind: fixup_a64_movw_tprel_g1_nc
 
-// CHECK-ELF: # Relocation 43
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000ac)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000222)
-// CHECK-ELF: # Relocation 44
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b0)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000222)
+// CHECK-ELF-NEXT:     0xAC R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0xB0 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 172 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
-// CHECK-ELF-NAMES: 176 R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
 
         movz x11, #:tprel_g0:var
         movn x12, #:tprel_g0:var
@@ -431,187 +247,104 @@
         movn w14, #:tprel_g0:var
 // CHECK: movz    x11, #:tprel_g0:var     // encoding: [0x0b'A',A,0x80'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
-// CHECK-NEXT: movn    x12, #:tprel_g0:var     // encoding: [0x0c'A',A,0x80'A',0x92'A']
+// CHECK: movn    x12, #:tprel_g0:var     // encoding: [0x0c'A',A,0x80'A',0x92'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
-// CHECK-NEXT: movz    w13, #:tprel_g0:var     // encoding: [0x0d'A',A,0x80'A',0x12'A']
+// CHECK: movz    w13, #:tprel_g0:var     // encoding: [0x0d'A',A,0x80'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
-// CHECK-NEXT: movn    w14, #:tprel_g0:var     // encoding: [0x0e'A',A,0x80'A',0x12'A']
+// CHECK: movn    w14, #:tprel_g0:var     // encoding: [0x0e'A',A,0x80'A',0x12'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0:var, kind: fixup_a64_movw_tprel_g0
 
-// CHECK-ELF: # Relocation 45
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b4)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
-// CHECK-ELF: # Relocation 46
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000b8)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
-// CHECK-ELF: # Relocation 47
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000bc)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
-// CHECK-ELF: # Relocation 48
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c0)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000223)
-
-// CHECK-ELF-NAMES: 180 R_AARCH64_TLSLE_MOVW_TPREL_G0
-// CHECK-ELF-NAMES: 184 R_AARCH64_TLSLE_MOVW_TPREL_G0
-// CHECK-ELF-NAMES: 188 R_AARCH64_TLSLE_MOVW_TPREL_G0
-// CHECK-ELF-NAMES: 192 R_AARCH64_TLSLE_MOVW_TPREL_G0
+// CHECK-ELF-NEXT:     0xB4 R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xB8 R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xBC R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xC0 R_AARCH64_TLSLE_MOVW_TPREL_G0 [[VARSYM]]
+
 
         movk x15, #:tprel_g0_nc:var
         movk w16, #:tprel_g0_nc:var
 // CHECK: movk    x15, #:tprel_g0_nc:var  // encoding: [0x0f'A',A,0x80'A',0xf2'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc
-// CHECK-NEXT: movk    w16, #:tprel_g0_nc:var  // encoding: [0x10'A',A,0x80'A',0x72'A']
+// CHECK: movk    w16, #:tprel_g0_nc:var  // encoding: [0x10'A',A,0x80'A',0x72'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_g0_nc:var, kind: fixup_a64_movw_tprel_g0_nc
 
-// CHECK-ELF: # Relocation 49
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c4)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000224)
-// CHECK-ELF: # Relocation 50
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000c8)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000224)
+// CHECK-ELF-NEXT:     0xC4 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0xC8 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 196 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
-// CHECK-ELF-NAMES: 200 R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
 
         add x17, x18, #:tprel_hi12:var, lsl #12
         add w19, w20, #:tprel_hi12:var, lsl #12
 // CHECK: add     x17, x18, #:tprel_hi12:var, lsl #12 // encoding: [0x51'A',0x02'A',0x40'A',0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12
-// CHECK-NEXT: add     w19, w20, #:tprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
+// CHECK: add     w19, w20, #:tprel_hi12:var, lsl #12 // encoding: [0x93'A',0x02'A',0x40'A',0x11'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_hi12:var, kind: fixup_a64_add_tprel_hi12
 
-// CHECK-ELF: # Relocation 51
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000cc)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000225)
-// CHECK-ELF: # Relocation 52
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d0)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000225)
+// CHECK-ELF-NEXT:     0xCC R_AARCH64_TLSLE_ADD_TPREL_HI12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xD0 R_AARCH64_TLSLE_ADD_TPREL_HI12 [[VARSYM]]
 
-// CHECK-ELF-NAMES: 204 R_AARCH64_TLSLE_ADD_TPREL_HI12
-// CHECK-ELF-NAMES: 208 R_AARCH64_TLSLE_ADD_TPREL_HI12
 
         add x21, x22, #:tprel_lo12:var
         add w23, w24, #:tprel_lo12:var
 // CHECK: add     x21, x22, #:tprel_lo12:var // encoding: [0xd5'A',0x02'A',A,0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12
-// CHECK-NEXT: add     w23, w24, #:tprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
+// CHECK: add     w23, w24, #:tprel_lo12:var // encoding: [0x17'A',0x03'A',A,0x11'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_add_tprel_lo12
 
-// CHECK-ELF: # Relocation 53
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d4)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000226)
-// CHECK-ELF: # Relocation 54
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000d8)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000226)
+// CHECK-ELF-NEXT:     0xD4 R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xD8 R_AARCH64_TLSLE_ADD_TPREL_LO12 [[VARSYM]]
 
-// CHECK-ELF-NAMES: 212 R_AARCH64_TLSLE_ADD_TPREL_LO12
-// CHECK-ELF-NAMES: 216 R_AARCH64_TLSLE_ADD_TPREL_LO12
 
         add x25, x26, #:tprel_lo12_nc:var
         add w27, w28, #:tprel_lo12_nc:var
 // CHECK: add     x25, x26, #:tprel_lo12_nc:var // encoding: [0x59'A',0x03'A',A,0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc
-// CHECK-NEXT: add     w27, w28, #:tprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
+// CHECK: add     w27, w28, #:tprel_lo12_nc:var // encoding: [0x9b'A',0x03'A',A,0x11'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_add_tprel_lo12_nc
 
-// CHECK-ELF: # Relocation 55
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000dc)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000227)
-// CHECK-ELF: # Relocation 56
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e0)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000227)
-
+// CHECK-ELF-NEXT:     0xDC R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0xE0 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 220 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
-// CHECK-ELF-NAMES: 224 R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
 
         ldrb w29, [x30, #:tprel_lo12:var]
         ldrsb x29, [x28, #:tprel_lo12_nc:var]
 // CHECK: ldrb    w29, [x30, #:tprel_lo12:var] // encoding: [0xdd'A',0x03'A',0x40'A',0x39'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst8_tprel_lo12
-// CHECK-NEXT: ldrsb   x29, [x28, #:tprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
+// CHECK: ldrsb   x29, [x28, #:tprel_lo12_nc:var] // encoding: [0x9d'A',0x03'A',0x80'A',0x39'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst8_tprel_lo12_nc
 
-// CHECK-ELF: # Relocation 57
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e4)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000228)
-// CHECK-ELF: # Relocation 58
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000e8)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000229)
+// CHECK-ELF-NEXT:     0xE4 R_AARCH64_TLSLE_LDST8_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xE8 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 228 R_AARCH64_TLSLE_LDST8_TPREL_LO12
-// CHECK-ELF-NAMES: 232 R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC
 
         strh w27, [x26, #:tprel_lo12:var]
         ldrsh x25, [x24, #:tprel_lo12_nc:var]
 // CHECK: strh    w27, [x26, #:tprel_lo12:var] // encoding: [0x5b'A',0x03'A',A,0x79'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst16_tprel_lo12
-// CHECK-NEXT: ldrsh   x25, [x24, #:tprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
+// CHECK: ldrsh   x25, [x24, #:tprel_lo12_nc:var] // encoding: [0x19'A',0x03'A',0x80'A',0x79'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst16_tprel_lo12_n
 
-// CHECK-ELF: # Relocation 59
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000ec)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000022a)
-// CHECK-ELF: # Relocation 60
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f0)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000022b)
+// CHECK-ELF-NEXT:     0xEC R_AARCH64_TLSLE_LDST16_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xF0 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC [[VARSYM]]
 
-// CHECK-ELF-NAMES: 236 R_AARCH64_TLSLE_LDST16_TPREL_LO12
-// CHECK-ELF-NAMES: 240 R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC
 
         ldr w23, [x22, #:tprel_lo12:var]
         ldrsw x21, [x20, #:tprel_lo12_nc:var]
 // CHECK: ldr     w23, [x22, #:tprel_lo12:var] // encoding: [0xd7'A',0x02'A',0x40'A',0xb9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst32_tprel_lo12
-// CHECK-NEXT: ldrsw   x21, [x20, #:tprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
+// CHECK: ldrsw   x21, [x20, #:tprel_lo12_nc:var] // encoding: [0x95'A',0x02'A',0x80'A',0xb9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst32_tprel_lo12_n
 
-// CHECK-ELF: # Relocation 61
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f4)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000022c)
-// CHECK-ELF: # Relocation 62
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000f8)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000022d)
-
-// CHECK-ELF-NAMES: 244 R_AARCH64_TLSLE_LDST32_TPREL_LO12
-// CHECK-ELF-NAMES: 248 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC
+// CHECK-ELF-NEXT:     0xF4 R_AARCH64_TLSLE_LDST32_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0xF8 R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC [[VARSYM]]
 
         ldr x19, [x18, #:tprel_lo12:var]
         str x17, [x16, #:tprel_lo12_nc:var]
 // CHECK: ldr     x19, [x18, #:tprel_lo12:var] // encoding: [0x53'A',0x02'A',0x40'A',0xf9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12:var, kind: fixup_a64_ldst64_tprel_lo12
-// CHECK-NEXT: str     x17, [x16, #:tprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
+// CHECK: str     x17, [x16, #:tprel_lo12_nc:var] // encoding: [0x11'A',0x02'A',A,0xf9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tprel_lo12_nc:var, kind: fixup_a64_ldst64_tprel_lo12_nc
 
-// CHECK-ELF: # Relocation 63
-// CHECK-ELF-NEXT: (('r_offset', 0x00000000000000fc)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000022e)
-// CHECK-ELF: # Relocation 64
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000100)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x0000022f)
-
-// CHECK-ELF-NAMES: 252 R_AARCH64_TLSLE_LDST64_TPREL_LO12
-// CHECK-ELF-NAMES: 256 R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC
+// CHECK-ELF-NEXT:     0xFC  R_AARCH64_TLSLE_LDST64_TPREL_LO12 [[VARSYM]]
+// CHECK-ELF-NEXT:     0x100 R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC [[VARSYM]]
 
         // TLS descriptor forms
         adrp x8, :tlsdesc:var
@@ -622,41 +355,27 @@
 
 // CHECK: adrp    x8, :tlsdesc:var        // encoding: [0x08'A',A,A,0x90'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_adr_page
-// CHECK-NEXT: ldr     x7, [x6, #:tlsdesc_lo12:var] // encoding: [0xc7'A',A,0x40'A',0xf9'A']
+// CHECK: ldr     x7, [x6, #:tlsdesc_lo12:var] // encoding: [0xc7'A',A,0x40'A',0xf9'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_ld64_lo12_nc
-// CHECK-NEXT: add     x5, x4, #:tlsdesc_lo12:var // encoding: [0x85'A',A,A,0x91'A']
+// CHECK: add     x5, x4, #:tlsdesc_lo12:var // encoding: [0x85'A',A,A,0x91'A']
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc_lo12:var, kind: fixup_a64_tlsdesc_add_lo12_nc
-// CHECK-NEXT: .tlsdesccall var                // encoding: []
+// CHECK: .tlsdesccall var                // encoding: []
 // CHECK-NEXT:                                 //   fixup A - offset: 0, value: :tlsdesc:var, kind: fixup_a64_tlsdesc_call
 // CHECK: blr     x3                      // encoding: [0x60,0x00,0x3f,0xd6]
 
 
-// CHECK-ELF: # Relocation 65
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000104)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000232)
-// CHECK-ELF: # Relocation 66
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000108)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000233)
-// CHECK-ELF: # Relocation 67
-// CHECK-ELF-NEXT: (('r_offset', 0x000000000000010c)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000234)
-// CHECK-ELF: # Relocation 68
-// CHECK-ELF-NEXT: (('r_offset', 0x0000000000000110)
-// CHECK-ELF-NEXT:  ('r_sym', [[VARSYM]])
-// CHECK-ELF-NEXT:  ('r_type', 0x00000239)
-
-// CHECK-ELF-NAMES: 260 R_AARCH64_TLSDESC_ADR_PAGE
-// CHECK-ELF-NAMES: 264 R_AARCH64_TLSDESC_LD64_LO12_NC
-// CHECK-ELF-NAMES: 268 R_AARCH64_TLSDESC_ADD_LO12_NC
-// CHECK-ELF-NAMES: 272 R_AARCH64_TLSDESC_CALL
+// CHECK-ELF-NEXT:     0x104 R_AARCH64_TLSDESC_ADR_PAGE [[VARSYM]]
+// CHECK-ELF-NEXT:     0x108 R_AARCH64_TLSDESC_LD64_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x10C R_AARCH64_TLSDESC_ADD_LO12_NC [[VARSYM]]
+// CHECK-ELF-NEXT:     0x110 R_AARCH64_TLSDESC_CALL [[VARSYM]]
 
 
 // Make sure symbol 5 has type STT_TLS:
 
-// CHECK-ELF: # Symbol 5
-// CHECK-ELF-NEXT: (('st_name', 0x00000006) # 'var'
-// CHECK-ELF-NEXT:  ('st_bind', 0x1)
-// CHECK-ELF-NEXT:  ('st_type', 0x6)
+// CHECK-ELF:      Symbols [
+// CHECK-ELF:        Symbol {
+// CHECK-ELF:          Name: var (6)
+// CHECK-ELF-NEXT:     Value:
+// CHECK-ELF-NEXT:     Size:
+// CHECK-ELF-NEXT:     Binding: Global
+// CHECK-ELF-NEXT:     Type: TLS
diff --git a/test/MC/ARM/arm-thumb-trustzone.s b/test/MC/ARM/arm-thumb-trustzone.s
new file mode 100644
index 0000000..a080b3e
--- /dev/null
+++ b/test/MC/ARM/arm-thumb-trustzone.s
@@ -0,0 +1,25 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
+
+  .syntax unified
+  .globl _func
+
+@ Check that the assembler processes SMC instructions when TrustZone support is 
+@ active and that it rejects them when this feature is not enabled
+
+_func:
+@ CHECK: _func
+
+
+@------------------------------------------------------------------------------
+@ SMC
+@------------------------------------------------------------------------------
+        smc #0xf
+        ite eq
+        smceq #0
+
+@ NOTZ-NOT: smc 	#15
+@ NOTZ-NOT: smceq	#0
+@ TZ: smc	#15                     @ encoding: [0xff,0xf7,0x00,0x80]
+@ TZ: ite	eq                      @ encoding: [0x0c,0xbf]
+@ TZ: smceq	#0                      @ encoding: [0xf0,0xf7,0x00,0x80]
diff --git a/test/MC/ARM/arm-trustzone.s b/test/MC/ARM/arm-trustzone.s
new file mode 100644
index 0000000..69157f6
--- /dev/null
+++ b/test/MC/ARM/arm-trustzone.s
@@ -0,0 +1,24 @@
+@ RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
+@ RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
+
+  .syntax unified
+  .globl _func
+
+@ Check that the assembler processes SMC instructions when TrustZone support is 
+@ active and that it rejects them when this feature is not enabled
+
+_func:
+@ CHECK: _func
+
+
+@------------------------------------------------------------------------------
+@ SMC
+@------------------------------------------------------------------------------
+        smc #0xf
+        smceq #0
+
+@ NOTZ-NOT: smc 	#15
+@ NOTZ-NOT: smceq	#0
+@ TZ: smc	#15                     @ encoding: [0x7f,0x00,0x60,0xe1]
+@ TZ: smceq	#0                      @ encoding: [0x70,0x00,0x60,0x01]
+
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index 560a0d6..71b5b5d 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -1791,15 +1791,6 @@ Lforward:
 @ CHECK: shsub8gt	r4, r8, r2      @ encoding: [0xf2,0x4f,0x38,0xc6]
 
 @------------------------------------------------------------------------------
-@ SMC
-@------------------------------------------------------------------------------
-        smc #0xf
-        smceq #0
-
-@ CHECK: smc	#15                     @ encoding: [0x7f,0x00,0x60,0xe1]
-@ CHECK: smceq	#0                      @ encoding: [0x70,0x00,0x60,0x01]
-
-@------------------------------------------------------------------------------
 @ SMLABB/SMLABT/SMLATB/SMLATT
 @------------------------------------------------------------------------------
         smlabb r3, r1, r9, r0
@@ -2318,7 +2309,7 @@ Lforward:
         strpl	r3, [r10, #0]!
 
 @ CHECK: strpl	r3, [r10, #-0]!         @ encoding: [0x00,0x30,0x2a,0x55]
-@ CHECK: strpl	r3, [r10]!              @ encoding: [0x00,0x30,0xaa,0x55]
+@ CHECK: strpl	r3, [r10, #0]!          @ encoding: [0x00,0x30,0xaa,0x55]
 
 @------------------------------------------------------------------------------
 @ SUB
@@ -2879,7 +2870,6 @@ Lforward:
         wfilt
         yield
         yieldne
-        hint #5
         hint #4
         hint #3
         hint #2
@@ -2892,7 +2882,6 @@ Lforward:
 @ CHECK: wfilt                          @ encoding: [0x03,0xf0,0x20,0xb3]
 @ CHECK: yield                          @ encoding: [0x01,0xf0,0x20,0xe3]
 @ CHECK: yieldne                        @ encoding: [0x01,0xf0,0x20,0x13]
-@ CHECK: hint	#5                      @ encoding: [0x05,0xf0,0x20,0xe3]
 @ CHECK: sev                            @ encoding: [0x04,0xf0,0x20,0xe3]
 @ CHECK: wfi                            @ encoding: [0x03,0xf0,0x20,0xe3]
 @ CHECK: wfe                            @ encoding: [0x02,0xf0,0x20,0xe3]
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index 9278a2a..8127feba 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -3486,8 +3486,6 @@ _func:
         wfelt
         wfige
         yieldlt
-        hint #5
-        hint.w #5
         hint.w #4
         hint #3
         hint #2
@@ -3501,8 +3499,6 @@ _func:
 @ CHECK: wfelt                          @ encoding: [0x20,0xbf]
 @ CHECK: wfige                          @ encoding: [0x30,0xbf]
 @ CHECK: yieldlt                        @ encoding: [0x10,0xbf]
-@ CHECK: hint	#5                      @ encoding: [0xaf,0xf3,0x05,0x80]
-@ CHECK: hint	#5                      @ encoding: [0xaf,0xf3,0x05,0x80]
 @ CHECK: sev.w                          @ encoding: [0xaf,0xf3,0x04,0x80]
 @ CHECK: wfi.w                          @ encoding: [0xaf,0xf3,0x03,0x80]
 @ CHECK: wfe.w                          @ encoding: [0xaf,0xf3,0x02,0x80]
diff --git a/test/MC/ARM/cxx-global-constructor.ll b/test/MC/ARM/cxx-global-constructor.ll
index e06d2c7..4afd1e1 100644
--- a/test/MC/ARM/cxx-global-constructor.ll
+++ b/test/MC/ARM/cxx-global-constructor.ll
@@ -1,5 +1,5 @@
 ; RUN: llc %s -mtriple=armv7-linux-gnueabi -relocation-model=pic \
-; RUN: -filetype=obj -o - | elf-dump --dump-section-data | FileCheck %s
+; RUN: -filetype=obj -o - | llvm-readobj -r | FileCheck %s
 
 
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }]
@@ -9,4 +9,5 @@ define void @f() {
 }
 
 ; Check for a relocation of type R_ARM_TARGET1.
-; CHECK: ('r_type', 0x26)
+; CHECK: Relocations [
+; CHECK:   0x{{[0-9,A-F]+}} R_ARM_TARGET1
diff --git a/test/MC/ARM/data-in-code.ll b/test/MC/ARM/data-in-code.ll
index c2feec5..e3325b6 100644
--- a/test/MC/ARM/data-in-code.ll
+++ b/test/MC/ARM/data-in-code.ll
@@ -1,8 +1,8 @@
 ;; RUN: llc -O0 -mtriple=armv7-linux-gnueabi -filetype=obj %s -o - | \
-;; RUN:   elf-dump | FileCheck -check-prefix=ARM %s
+;; RUN:   llvm-readobj -t | FileCheck -check-prefix=ARM %s
 
 ;; RUN: llc -O0 -mtriple=thumbv7-linux-gnueabi -filetype=obj %s -o - | \
-;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=TMB %s
+;; RUN:   llvm-readobj -t | FileCheck -check-prefix=TMB %s
 
 ;; Ensure that if a jump table is generated that it has Mapping Symbols
 ;; marking the data-in-code region.
@@ -108,68 +108,68 @@ exit:
   ret void
 }
 
-;; ARM:         # Symbol 2
-;; ARM-NEXT:    $a
-;; ARM-NEXT:   'st_value', 0x00000000
-;; ARM-NEXT:   'st_size', 0x00000000
-;; ARM-NEXT:   'st_bind', 0x0
-;; ARM-NEXT:   'st_type', 0x0
-;; ARM-NEXT:   'st_other'
-;; ARM-NEXT:   'st_shndx', [[MIXED_SECT:0x[0-9a-f]+]]
-
-;; ARM:         # Symbol 3
-;; ARM-NEXT:    $a
-;; ARM-NEXT:   'st_value', 0x000000ac
-;; ARM-NEXT:   'st_size', 0x00000000
-;; ARM-NEXT:   'st_bind', 0x0
-;; ARM-NEXT:   'st_type', 0x0
-;; ARM-NEXT:   'st_other'
-;; ARM-NEXT:   'st_shndx', [[MIXED_SECT]]
-
-;; ARM:         # Symbol 4
-;; ARM-NEXT:    $d
-;; ARM-NEXT:    'st_value', 0x00000000
-;; ARM-NEXT:    'st_size', 0x00000000
-;; ARM-NEXT:    'st_bind', 0x0
-;; ARM-NEXT:    'st_type', 0x0
-
-;; ARM:         # Symbol 5
-;; ARM-NEXT:    $d
-;; ARM-NEXT:   'st_value', 0x00000030
-;; ARM-NEXT:   'st_size', 0x00000000
-;; ARM-NEXT:   'st_bind', 0x0
-;; ARM-NEXT:   'st_type', 0x0
-;; ARM-NEXT:   'st_other'
-;; ARM-NEXT:   'st_shndx', [[MIXED_SECT]]
+;; ARM:        Symbol {
+;; ARM:          Name: $a
+;; ARM-NEXT:     Value: 0x0
+;; ARM-NEXT:     Size: 0
+;; ARM-NEXT:     Binding: Local
+;; ARM-NEXT:     Type: None
+;; ARM-NEXT:     Other:
+;; ARM-NEXT:     Section: [[MIXED_SECT:[^ ]+]]
+
+;; ARM:        Symbol {
+;; ARM:          Name: $a
+;; ARM-NEXT:     Value: 0xAC
+;; ARM-NEXT:     Size: 0
+;; ARM-NEXT:     Binding: Local
+;; ARM-NEXT:     Type: None
+;; ARM-NEXT:     Other:
+;; ARM-NEXT:     Section: [[MIXED_SECT]]
+
+;; ARM:        Symbol {
+;; ARM:          Name: $d
+;; ARM-NEXT:     Value: 0
+;; ARM-NEXT:     Size: 0
+;; ARM-NEXT:     Binding: Local
+;; ARM-NEXT:     Type: None
+
+;; ARM:        Symbol {
+;; ARM:          Name: $d
+;; ARM-NEXT:     Value: 0x30
+;; ARM-NEXT:     Size: 0
+;; ARM-NEXT:     Binding: Local
+;; ARM-NEXT:     Type: None
+;; ARM-NEXT:     Other:
+;; ARM-NEXT:     Section: [[MIXED_SECT]]
 
 ;; ARM-NOT:     ${{[atd]}}
 
-;; TMB:         # Symbol 3
-;; TMB-NEXT:    $d
-;; TMB-NEXT:   'st_value', 0x00000016
-;; TMB-NEXT:   'st_size', 0x00000000
-;; TMB-NEXT:   'st_bind', 0x0
-;; TMB-NEXT:   'st_type', 0x0
-;; TMB-NEXT:   'st_other'
-;; TMB-NEXT:   'st_shndx', [[MIXED_SECT:0x[0-9a-f]+]]
-
-;; TMB:         # Symbol 4
-;; TMB-NEXT:    $t
-;; TMB-NEXT:   'st_value', 0x00000000
-;; TMB-NEXT:   'st_size', 0x00000000
-;; TMB-NEXT:   'st_bind', 0x0
-;; TMB-NEXT:   'st_type', 0x0
-;; TMB-NEXT:   'st_other'
-;; TMB-NEXT:   'st_shndx', [[MIXED_SECT]]
-
-;; TMB:         # Symbol 5
-;; TMB-NEXT:    $t
-;; TMB-NEXT:   'st_value', 0x00000036
-;; TMB-NEXT:   'st_size', 0x00000000
-;; TMB-NEXT:   'st_bind', 0x0
-;; TMB-NEXT:   'st_type', 0x0
-;; TMB-NEXT:   'st_other'
-;; TMB-NEXT:   'st_shndx', [[MIXED_SECT]]
+;; TMB:        Symbol {
+;; TMB:          Name: $d.2
+;; TMB-NEXT:     Value: 0x16
+;; TMB-NEXT:     Size: 0
+;; TMB-NEXT:     Binding: Local
+;; TMB-NEXT:     Type: None
+;; TMB-NEXT:     Other:
+;; TMB-NEXT:     Section: [[MIXED_SECT:[^ ]+]]
+
+;; TMB:        Symbol {
+;; TMB:          Name: $t
+;; TMB-NEXT:     Value: 0x0
+;; TMB-NEXT:     Size: 0
+;; TMB-NEXT:     Binding: Local
+;; TMB-NEXT:     Type: None
+;; TMB-NEXT:     Other:
+;; TMB-NEXT:     Section: [[MIXED_SECT]]
+
+;; TMB:        Symbol {
+;; TMB:          Name: $t
+;; TMB-NEXT:     Value: 0x36
+;; TMB-NEXT:     Size: 0
+;; TMB-NEXT:     Binding: Local
+;; TMB-NEXT:     Type: None
+;; TMB-NEXT:     Other:
+;; TMB-NEXT:     Section: [[MIXED_SECT]]
 
 
 ;; TMB-NOT:     ${{[atd]}}
diff --git a/test/MC/ARM/elf-eflags-eabi-cg.ll b/test/MC/ARM/elf-eflags-eabi-cg.ll
index 2e86a0f..0b9de7f 100644
--- a/test/MC/ARM/elf-eflags-eabi-cg.ll
+++ b/test/MC/ARM/elf-eflags-eabi-cg.ll
@@ -1,7 +1,7 @@
 ; Codegen version to check for ELF header flags.
 ;
 ; RUN: llc %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic \
-; RUN: -filetype=obj -o - | elf-dump --dump-section-data | \
+; RUN: -filetype=obj -o - | llvm-readobj -h | \
 ; RUN: FileCheck %s
 
 define void @bar() nounwind {
@@ -10,4 +10,5 @@ entry:
 }
 
 ; For now the only e_flag set is EF_ARM_EABI_VER5
-;CHECK:    'e_flags', 0x05000000
+; CHECK: ElfHeader {
+; CHECK:   Flags [ (0x5000000)
diff --git a/test/MC/ARM/elf-eflags-eabi.s b/test/MC/ARM/elf-eflags-eabi.s
index ea89eac..fe0b6c0 100644
--- a/test/MC/ARM/elf-eflags-eabi.s
+++ b/test/MC/ARM/elf-eflags-eabi.s
@@ -1,5 +1,5 @@
 @ RUN: llvm-mc %s -triple=armv7-linux-gnueabi -filetype=obj -o - | \
-@ RUN:    elf-dump --dump-section-data  | FileCheck -check-prefix=OBJ %s
+@ RUN:    llvm-readobj -h | FileCheck -check-prefix=OBJ %s
 	.syntax unified
 	.text
 	.globl	barf
@@ -10,4 +10,5 @@ barf:                                   @ @barf
         b foo
 
 @@@ make sure the EF_ARM_EABIMASK comes out OK
-@OBJ:    'e_flags', 0x05000000
+@OBJ: ElfHeader {
+@OBJ:   Flags [ (0x5000000)
diff --git a/test/MC/ARM/elf-movt.s b/test/MC/ARM/elf-movt.s
index 02bb5a6..74b3c9f 100644
--- a/test/MC/ARM/elf-movt.s
+++ b/test/MC/ARM/elf-movt.s
@@ -1,6 +1,6 @@
 @ RUN: llvm-mc %s -triple=armv7-linux-gnueabi | FileCheck -check-prefix=ASM %s
 @ RUN: llvm-mc %s -triple=armv7-linux-gnueabi -filetype=obj -o - | \
-@ RUN:    elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+@ RUN:    llvm-readobj -s -sd -sr | FileCheck -check-prefix=OBJ %s
 	.syntax unified
 	.text
 	.globl	barf
@@ -15,25 +15,24 @@ barf:                                   @ @barf
 @ ASM-NEXT:     movt    r0, :upper16:(GOT-(.LPC0_2+8))
 
 @@ make sure that the text section fixups are sane too
-@ OBJ:                 '.text'
-@ OBJ-NEXT:            'sh_type', 0x00000001
-@ OBJ-NEXT:            'sh_flags', 0x00000006
-@ OBJ-NEXT:            'sh_addr', 0x00000000
-@ OBJ-NEXT:            'sh_offset', 0x00000034
-@ OBJ-NEXT:            'sh_size', 0x00000008
-@ OBJ-NEXT:            'sh_link', 0x00000000
-@ OBJ-NEXT:            'sh_info', 0x00000000
-@ OBJ-NEXT:            'sh_addralign', 0x00000004
-@ OBJ-NEXT:            'sh_entsize', 0x00000000
-@ OBJ-NEXT:            '_section_data', 'f00f0fe3 f40f4fe3'
-
-@ OBJ:              Relocation 0
-@ OBJ-NEXT:         'r_offset', 0x00000000
-@ OBJ-NEXT:         'r_sym'
-@ OBJ-NEXT:         'r_type', 0x2d
-
-@ OBJ:              Relocation 1
-@ OBJ-NEXT:         'r_offset', 0x00000004
-@ OBJ-NEXT:         'r_sym'
-@ OBJ-NEXT:         'r_type', 0x2e
-
+@ OBJ:        Section {
+@ OBJ:          Name: .text
+@ OBJ-NEXT:     Type: SHT_PROGBITS
+@ OBJ-NEXT:     Flags [ (0x6)
+@ OBJ-NEXT:       SHF_ALLOC
+@ OBJ-NEXT:       SHF_EXECINSTR
+@ OBJ-NEXT:     ]
+@ OBJ-NEXT:     Address: 0x0
+@ OBJ-NEXT:     Offset: 0x34
+@ OBJ-NEXT:     Size: 8
+@ OBJ-NEXT:     Link: 0
+@ OBJ-NEXT:     Info: 0
+@ OBJ-NEXT:     AddressAlignment: 4
+@ OBJ-NEXT:     EntrySize: 0
+@ OBJ-NEXT:     Relocations [
+@ OBJ-NEXT:       0x0 R_ARM_MOVW_PREL_NC
+@ OBJ-NEXT:       0x4 R_ARM_MOVT_PREL
+@ OBJ-NEXT:     ]
+@ OBJ-NEXT:     SectionData (
+@ OBJ-NEXT:       0000: F00F0FE3 F40F4FE3
+@ OBJ-NEXT:     )
diff --git a/test/MC/ARM/elf-reloc-01.ll b/test/MC/ARM/elf-reloc-01.ll
index 3ebd7c6..9b5dbd9 100644
--- a/test/MC/ARM/elf-reloc-01.ll
+++ b/test/MC/ARM/elf-reloc-01.ll
@@ -1,7 +1,7 @@
 ;; RUN: llc -mtriple=armv7-linux-gnueabi -O3  \
 ;; RUN:    -mcpu=cortex-a8 -mattr=-neon -mattr=+vfp2  -arm-reserve-r9  \
 ;; RUN:    -filetype=obj %s -o - | \
-;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+;; RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
 ;; FIXME: This file needs to be in .s form!
 ;; The args to llc are there to constrain the codegen only.
@@ -60,11 +60,8 @@ bb3:                                              ; preds = %bb, %entry
 
 declare void @exit(i32) noreturn nounwind
 
-;; OBJ:          Relocation 1
-;; OBJ-NEXT:     'r_offset',
-;; OBJ-NEXT:     'r_sym', 0x000007
-;; OBJ-NEXT:     'r_type', 0x2b
-
-;; OBJ:         Symbol 7
-;; OBJ-NEXT:    '_MergedGlobals'
-;; OBJ-NEXT:    'st_value', 0x00000010
+; OBJ: Relocations [
+; OBJ:   Section (1) .text {
+; OBJ:     0x{{[0-9,A-F]+}} R_ARM_MOVW_ABS_NC _MergedGlobals
+; OBJ:   }
+; OBJ: ]
diff --git a/test/MC/ARM/elf-reloc-02.ll b/test/MC/ARM/elf-reloc-02.ll
index 6b6b03c..f021764 100644
--- a/test/MC/ARM/elf-reloc-02.ll
+++ b/test/MC/ARM/elf-reloc-02.ll
@@ -1,7 +1,7 @@
 ;; RUN: llc -mtriple=armv7-linux-gnueabi -O3  \
 ;; RUN:    -mcpu=cortex-a8 -mattr=-neon -mattr=+vfp2  -arm-reserve-r9  \
 ;; RUN:    -filetype=obj %s -o - | \
-;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+;; RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
 ;; FIXME: This file needs to be in .s form!
 ;; The args to llc are there to constrain the codegen only.
@@ -41,10 +41,8 @@ declare i32 @write(...)
 
 declare void @exit(i32) noreturn nounwind
 
-;; OBJ:        Relocation 0
-;; OBJ-NEXT:    'r_offset',
-;; OBJ-NEXT:    'r_sym', 0x000005
-;; OBJ-NEXT:    'r_type', 0x2b
-
-;; OBJ:          Symbol 5
-;; OBJ-NEXT:    '.L.str'
+;; OBJ:      Relocations [
+;; OBJ:        Section (1) .text {
+;; OBJ-NEXT:     0x{{[0-9,A-F]+}} R_ARM_MOVW_ABS_NC .L.str
+;; OBJ:        }
+;; OBJ:      ]
diff --git a/test/MC/ARM/elf-reloc-03.ll b/test/MC/ARM/elf-reloc-03.ll
index 87f91c1..ac46e69 100644
--- a/test/MC/ARM/elf-reloc-03.ll
+++ b/test/MC/ARM/elf-reloc-03.ll
@@ -1,7 +1,7 @@
 ;; RUN: llc -mtriple=armv7-linux-gnueabi -O3  \
 ;; RUN:    -mcpu=cortex-a8 -mattr=-neon -mattr=+vfp2  -arm-reserve-r9  \
 ;; RUN:    -filetype=obj %s -o - | \
-;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+;; RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
 ;; FIXME: This file needs to be in .s form!
 ;; The args to llc are there to constrain the codegen only.
@@ -88,10 +88,8 @@ entry:
 
 declare void @exit(i32) noreturn nounwind
 
-;; OBJ:           Relocation 1
-;; OBJ-NEXT:     'r_offset',
-;; OBJ-NEXT:     'r_sym', 0x000010
-;; OBJ-NEXT:     'r_type', 0x2b
-
-;; OBJ:      Symbol 16
-;; OBJ-NEXT:    'vtable'
+;; OBJ: Relocations [
+;; OBJ:   Section (1) .text {
+;; OBJ:     0x{{[0-9,A-F]+}} R_ARM_MOVW_ABS_NC vtable
+;; OBJ:   }
+;; OBJ: ]
diff --git a/test/MC/ARM/elf-reloc-condcall.s b/test/MC/ARM/elf-reloc-condcall.s
index 3fafb43..612942f 100644
--- a/test/MC/ARM/elf-reloc-condcall.s
+++ b/test/MC/ARM/elf-reloc-condcall.s
@@ -1,33 +1,18 @@
 // RUN: llvm-mc -triple=armv7-linux-gnueabi -filetype=obj %s -o - | \
-// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+// RUN:   llvm-readobj -r | FileCheck -check-prefix=OBJ %s
 
         bleq some_label
         bl some_label
         blx some_label
         beq some_label
         b some_label
-// OBJ: .rel.text
 
-// OBJ: 'r_offset', 0x00000000
-// OBJ-NEXT:  'r_sym', 0x000005
-// OBJ-NEXT: 'r_type', 0x1d
-
-// OBJ: 'r_offset', 0x00000004
-// OBJ-NEXT:  'r_sym', 0x000005
-// OBJ-NEXT: 'r_type', 0x1c
-
-// OBJ: 'r_offset', 0x00000008
-// OBJ-NEXT:  'r_sym', 0x000005
-// OBJ-NEXT: 'r_type', 0x1c
-
-// OBJ: 'r_offset', 0x0000000c
-// OBJ-NEXT:  'r_sym', 0x000005
-// OBJ-NEXT: 'r_type', 0x1d
-
-// OBJ: 'r_offset', 0x00000010
-// OBJ-NEXT:  'r_sym', 0x000005
-// OBJ-NEXT: 'r_type', 0x1d
-
-// OBJ: .symtab
-// OBJ: Symbol 5
-// OBJ-NEXT: some_label
+// OBJ:      Relocations [
+// OBJ-NEXT:   Section (1) .text {
+// OBJ-NEXT:     0x0  R_ARM_JUMP24 some_label 0x0
+// OBJ-NEXT:     0x4  R_ARM_CALL   some_label 0x0
+// OBJ-NEXT:     0x8  R_ARM_CALL   some_label 0x0
+// OBJ-NEXT:     0xC  R_ARM_JUMP24 some_label 0x0
+// OBJ-NEXT:     0x10 R_ARM_JUMP24 some_label 0x0
+// OBJ-NEXT:   }
+// OBJ-NEXT: ]
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.ll b/test/MC/ARM/elf-thumbfunc-reloc.ll
index b2f253d..e7d2c34 100644
--- a/test/MC/ARM/elf-thumbfunc-reloc.ll
+++ b/test/MC/ARM/elf-thumbfunc-reloc.ll
@@ -1,5 +1,5 @@
 ; RUN: llc %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic \
-; RUN: -filetype=obj -o - | elf-dump --dump-section-data | \
+; RUN: -filetype=obj -o - | llvm-readobj -s -sd -r -t | \
 ; RUN: FileCheck %s
 
 ; FIXME: This file needs to be in .s form!
@@ -22,16 +22,20 @@ entry:
 
 
 ; make sure that bl 0 <foo> (fff7feff) is correctly encoded
-; CHECK: '_section_data', '704700bf 2de90048 fff7feff bde80088'
-
-;  Offset     Info    Type            Sym.Value  Sym. Name
-; 00000008  0000070a R_ARM_THM_CALL    00000001   foo
-; CHECK:           Relocation 0
-; CHECK-NEXT:      'r_offset', 0x00000008
-; CHECK-NEXT:      'r_sym', 0x000009
-; CHECK-NEXT:      'r_type', 0x0a
+; CHECK: Sections [
+; CHECK:   SectionData (
+; CHECK:     0000: 704700BF 2DE90048 FFF7FEFF BDE80088
+; CHECK:   )
+; CHECK: ]
+
+; CHECK:      Relocations [
+; CHECK-NEXT:   Section (1) .text {
+; CHECK-NEXT:     0x8 R_ARM_THM_CALL foo 0x0
+; CHECK-NEXT:   }
+; CHECK-NEXT: ]
 
 ; make sure foo is thumb function: bit 0 = 1
-; CHECK:           Symbol 9
-; CHECK-NEXT:      'foo'
-; CHECK-NEXT:      'st_value', 0x00000001
+; CHECK:      Symbols [
+; CHECK:        Symbol {
+; CHECK:          Name: foo
+; CHECK-NEXT:     Value: 0x1
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.s b/test/MC/ARM/elf-thumbfunc-reloc.s
index 4a311dd..87a26d8 100644
--- a/test/MC/ARM/elf-thumbfunc-reloc.s
+++ b/test/MC/ARM/elf-thumbfunc-reloc.s
@@ -1,6 +1,6 @@
 @@ test st_value bit 0 of thumb function
 @ RUN: llvm-mc %s -triple=arm-freebsd-eabi -filetype=obj -o - | \
-@ RUN: elf-dump  | FileCheck %s
+@ RUN: llvm-readobj -r  | FileCheck %s
 
 
 	.syntax unified
@@ -17,7 +17,8 @@ f:
         pop     {r7, pc}
 
 @@ make sure an R_ARM_THM_CALL relocation is generated for the call to g
-@CHECK:        ('_relocations', [
-@CHECK:         (('r_offset', 0x00000004)
-@CHECK-NEXT:     ('r_sym', 0x{{[0-9a-fA-F]+}})
-@CHECK-NEXT:     ('r_type', 0x0a)
+@CHECK:      Relocations [
+@CHECK-NEXT:   Section (1) .text {
+@CHECK-NEXT:     0x4 R_ARM_THM_CALL g 0x0
+@CHECK-NEXT:   }
+@CHECK-NEXT: ]
diff --git a/test/MC/ARM/elf-thumbfunc.s b/test/MC/ARM/elf-thumbfunc.s
index 91b2eee..26f5f0b 100644
--- a/test/MC/ARM/elf-thumbfunc.s
+++ b/test/MC/ARM/elf-thumbfunc.s
@@ -1,6 +1,6 @@
 @@ test st_value bit 0 of thumb function
 @ RUN: llvm-mc %s -triple=thumbv7-linux-gnueabi -filetype=obj -o - | \
-@ RUN: elf-dump  | FileCheck %s
+@ RUN: llvm-readobj -t | FileCheck %s
 	.syntax unified
 	.text
 	.globl	foo
@@ -12,9 +12,9 @@ foo:
 	bx	lr
 
 @@ make sure foo is thumb function: bit 0 = 1 (st_value)
-@CHECK:           Symbol 5
-@CHECK-NEXT:      'st_name', 0x00000001
-@CHECK-NEXT:      'st_value', 0x00000001
-@CHECK-NEXT:      'st_size', 0x00000000
-@CHECK-NEXT:      'st_bind', 0x1
-@CHECK-NEXT:      'st_type', 0x2
+@CHECK:        Symbol {
+@CHECK:          Name: foo
+@CHECK-NEXT:     Value: 0x1
+@CHECK-NEXT:     Size: 0
+@CHECK-NEXT:     Binding: Global
+@CHECK-NEXT:     Type: Function
diff --git a/test/MC/ARM/invalid-hint-arm.s b/test/MC/ARM/invalid-hint-arm.s
new file mode 100644
index 0000000..e0cd97a
--- /dev/null
+++ b/test/MC/ARM/invalid-hint-arm.s
@@ -0,0 +1,7 @@
+@ RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 < %s 2>&1 | FileCheck %s
+
+hint #5
+hint #100
+
+@ CHECK: error: immediate operand must be in the range [0,4]
+@ CHECK: error: immediate operand must be in the range [0,4]
diff --git a/test/MC/ARM/invalid-hint-thumb.s b/test/MC/ARM/invalid-hint-thumb.s
new file mode 100644
index 0000000..fd0a761
--- /dev/null
+++ b/test/MC/ARM/invalid-hint-thumb.s
@@ -0,0 +1,9 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s 2>&1 | FileCheck %s
+
+hint #5
+hint.w #5
+hint #100
+
+@ CHECK: error: immediate operand must be in the range [0,4]
+@ CHECK: error: immediate operand must be in the range [0,4]
+@ CHECK: error: immediate operand must be in the range [0,4]
diff --git a/test/MC/ARM/neon-cmp-encoding.s b/test/MC/ARM/neon-cmp-encoding.s
index b3aedb8..cffbeab 100644
--- a/test/MC/ARM/neon-cmp-encoding.s
+++ b/test/MC/ARM/neon-cmp-encoding.s
@@ -174,3 +174,24 @@
 @ CHECK: vcge.u16	q8, q9, q8      @ encoding: [0xf0,0x03,0x52,0xf3]
 @ CHECK: vcge.u32	q8, q9, q8      @ encoding: [0xf0,0x03,0x62,0xf3]
 @ CHECK: vcge.f32	q8, q9, q8      @ encoding: [0xe0,0x0e,0x42,0xf3]
+
+
+@ VACLT is an alias for VACGT w/ the source operands reversed.
+@ VACLE is an alias for VACGE w/ the source operands reversed.
+	vaclt.f32 q9, q11, q12
+	vaclt.f32 d9, d11, d12
+	vaclt.f32 q11, q12
+	vaclt.f32 d11, d12
+	vacle.f32 q9, q11, q12
+	vacle.f32 d9, d11, d12
+	vacle.f32 q11, q12
+	vacle.f32 d11, d12
+
+@ CHECK: vacgt.f32	q9, q12, q11    @ encoding: [0xf6,0x2e,0x68,0xf3]
+@ CHECK: vacgt.f32	d9, d12, d11    @ encoding: [0x1b,0x9e,0x2c,0xf3]
+@ CHECK: vacgt.f32	q11, q12, q11   @ encoding: [0xf6,0x6e,0x68,0xf3]
+@ CHECK: vacgt.f32	d11, d12, d11   @ encoding: [0x1b,0xbe,0x2c,0xf3]
+@ CHECK: vacge.f32	q9, q12, q11    @ encoding: [0xf6,0x2e,0x48,0xf3]
+@ CHECK: vacge.f32	d9, d12, d11    @ encoding: [0x1b,0x9e,0x0c,0xf3]
+@ CHECK: vacge.f32	q11, q12, q11   @ encoding: [0xf6,0x6e,0x48,0xf3]
+@ CHECK: vacge.f32	d11, d12, d11   @ encoding: [0x1b,0xbe,0x0c,0xf3]
diff --git a/test/MC/ARM/xscale-attributes.ll b/test/MC/ARM/xscale-attributes.ll
index 3ccf02b..d1e9931 100644
--- a/test/MC/ARM/xscale-attributes.ll
+++ b/test/MC/ARM/xscale-attributes.ll
@@ -2,7 +2,7 @@
 ; RUN: FileCheck -check-prefix=ASM %s
 
 ; RUN: llc %s -mtriple=thumbv5-linux-gnueabi -filetype=obj \
-; RUN: -mcpu=xscale -o - | elf-dump --dump-section-data | \
+; RUN: -mcpu=xscale -o - | llvm-readobj -s -sd | \
 ; RUN: FileCheck -check-prefix=OBJ %s
 
 ; FIXME: The OBJ test should be a .s to .o test and the ASM test should
@@ -17,15 +17,22 @@ entry:
 ; ASM-NEXT:      .eabi_attribute 8, 1
 ; ASM-NEXT:      .eabi_attribute 9, 1
 
-; OBJ:           Section 4
-; OBJ-NEXT:      'sh_name', 0x0000000c
-; OBJ-NEXT:      'sh_type', 0x70000003
-; OBJ-NEXT:	   'sh_flags', 0x00000000
-; OBJ-NEXT:	   'sh_addr', 0x00000000
-; OBJ-NEXT:	   'sh_offset', 0x00000038
-; OBJ-NEXT:	   'sh_size', 0x00000020
-; OBJ-NEXT:	   'sh_link', 0x00000000
-; OBJ-NEXT:	   'sh_info', 0x00000000
-; OBJ-NEXT:	   'sh_addralign', 0x00000001
-; OBJ-NEXT:	   'sh_entsize', 0x00000000
-; OBJ-NEXT:      '_section_data', '411f0000 00616561 62690001 15000000 06050801 09011401 15011703 18011901'
+; OBJ:      Sections [
+; OBJ:        Section {
+; OBJ:          Index: 4
+; OBJ-NEXT:     Name: .ARM.attributes (12)
+; OBJ-NEXT:     Type: SHT_ARM_ATTRIBUTES
+; OBJ-NEXT:     Flags [ (0x0)
+; OBJ-NEXT:     ]
+; OBJ-NEXT:     Address: 0x0
+; OBJ-NEXT:     Offset: 0x38
+; OBJ-NEXT:     Size: 32
+; OBJ-NEXT:     Link: 0
+; OBJ-NEXT:     Info: 0
+; OBJ-NEXT:     AddressAlignment: 1
+; OBJ-NEXT:     EntrySize: 0
+; OBJ-NEXT:     SectionData (
+; OBJ-NEXT:       0000: 411F0000 00616561 62690001 15000000
+; OBJ-NEXT:       0010: 06050801 09011401 15011703 18011901
+; OBJ-NEXT:     )
+; OBJ-NEXT:   }
diff --git a/test/MC/AsmParser/exprs.s b/test/MC/AsmParser/exprs.s
index df075f8..a7e1002 100644
--- a/test/MC/AsmParser/exprs.s
+++ b/test/MC/AsmParser/exprs.s
@@ -45,6 +45,7 @@ k:
         check_expr 0 || 0, 0
         check_expr 1 + 2 < 3 + 4, 1
         check_expr 1 << 8 - 1, 128
+        check_expr 3 * 9 - 2 * 9 + 1, 10
 
         .set c, 10
         check_expr c + 1, 11
diff --git a/test/MC/AsmParser/section.s b/test/MC/AsmParser/section.s
index 5abacc7..0c3828d 100644
--- a/test/MC/AsmParser/section.s
+++ b/test/MC/AsmParser/section.s
@@ -1,5 +1,5 @@
 # RUN: llvm-mc -triple i386-pc-linux-gnu -filetype=obj -o %t %s
-# RUN: elf-dump --dump-section-data < %t | FileCheck %s
+# RUN: llvm-readobj -s -sd < %t | FileCheck %s
 .section test1
 .byte 1
 .section test2
@@ -45,63 +45,85 @@
 .previous
 .byte 1
 .previous
-# CHECK:       (('sh_name', 0x00000044) # 'test1'
-# CHECK-NEXT:   ('sh_type', 0x00000001)
-# CHECK-NEXT:   ('sh_flags', 0x00000000)
-# CHECK-NEXT:   ('sh_addr', 0x00000000)
-# CHECK-NEXT:   ('sh_offset', 0x00000034)
-# CHECK-NEXT:   ('sh_size', 0x00000007)
-# CHECK-NEXT:   ('sh_link', 0x00000000)
-# CHECK-NEXT:   ('sh_info', 0x00000000)
-# CHECK-NEXT:   ('sh_addralign', 0x00000001)
-# CHECK-NEXT:   ('sh_entsize', 0x00000000)
-# CHECK-NEXT:   ('_section_data', '01010101 010101')
-# CHECK-NEXT:  ),
-# CHECK:       (('sh_name', 0x0000003e) # 'test2'
-# CHECK-NEXT:   ('sh_type', 0x00000001)
-# CHECK-NEXT:   ('sh_flags', 0x00000000)
-# CHECK-NEXT:   ('sh_addr', 0x00000000)
-# CHECK-NEXT:   ('sh_offset', 0x0000003b)
-# CHECK-NEXT:   ('sh_size', 0x00000006)
-# CHECK-NEXT:   ('sh_link', 0x00000000)
-# CHECK-NEXT:   ('sh_info', 0x00000000)
-# CHECK-NEXT:   ('sh_addralign', 0x00000001)
-# CHECK-NEXT:   ('sh_entsize', 0x00000000)
-# CHECK-NEXT:   ('_section_data', '02020202 0202')
-# CHECK-NEXT:  ),
-# CHECK:       (('sh_name', 0x00000038) # 'test3'
-# CHECK-NEXT:   ('sh_type', 0x00000001)
-# CHECK-NEXT:   ('sh_flags', 0x00000000)
-# CHECK-NEXT:   ('sh_addr', 0x00000000)
-# CHECK-NEXT:   ('sh_offset', 0x00000041)
-# CHECK-NEXT:   ('sh_size', 0x00000005)
-# CHECK-NEXT:   ('sh_link', 0x00000000)
-# CHECK-NEXT:   ('sh_info', 0x00000000)
-# CHECK-NEXT:   ('sh_addralign', 0x00000001)
-# CHECK-NEXT:   ('sh_entsize', 0x00000000)
-# CHECK-NEXT:   ('_section_data', '03030303 03')
-# CHECK-NEXT:  ),
-# CHECK:       (('sh_name', 0x00000032) # 'test4'
-# CHECK-NEXT:   ('sh_type', 0x00000001)
-# CHECK-NEXT:   ('sh_flags', 0x00000000)
-# CHECK-NEXT:   ('sh_addr', 0x00000000)
-# CHECK-NEXT:   ('sh_offset', 0x00000046)
-# CHECK-NEXT:   ('sh_size', 0x00000003)
-# CHECK-NEXT:   ('sh_link', 0x00000000)
-# CHECK-NEXT:   ('sh_info', 0x00000000)
-# CHECK-NEXT:   ('sh_addralign', 0x00000001)
-# CHECK-NEXT:   ('sh_entsize', 0x00000000)
-# CHECK-NEXT:   ('_section_data', '040404')
-# CHECK-NEXT:  ),
-# CHECK:       (('sh_name', 0x0000002c) # 'test5'
-# CHECK-NEXT:   ('sh_type', 0x00000001)
-# CHECK-NEXT:   ('sh_flags', 0x00000000)
-# CHECK-NEXT:   ('sh_addr', 0x00000000)
-# CHECK-NEXT:   ('sh_offset', 0x00000049)
-# CHECK-NEXT:   ('sh_size', 0x00000001)
-# CHECK-NEXT:   ('sh_link', 0x00000000)
-# CHECK-NEXT:   ('sh_info', 0x00000000)
-# CHECK-NEXT:   ('sh_addralign', 0x00000001)
-# CHECK-NEXT:   ('sh_entsize', 0x00000000)
-# CHECK-NEXT:   ('_section_data', '05')
-# CHECK-NEXT:  ),
+
+# CHECK:      Sections [
+# CHECK:        Section {
+# CHECK:          Name: test1 (68)
+# CHECK-NEXT:     Type: SHT_PROGBITS
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset: 0x34
+# CHECK-NEXT:     Size: 7
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 1
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: 01010101 010101
+# CHECK-NEXT:     )
+# CHECK-NEXT:   }
+# CHECK:        Section {
+# CHECK:          Name: test2 (62)
+# CHECK-NEXT:     Type: SHT_PROGBITS
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset: 0x3B
+# CHECK-NEXT:     Size: 6
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 1
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: 02020202 0202
+# CHECK-NEXT:     )
+# CHECK-NEXT:   }
+# CHECK:        Section {
+# CHECK:          Name: test3 (56)
+# CHECK-NEXT:     Type: SHT_PROGBITS
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset: 0x41
+# CHECK-NEXT:     Size: 5
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 1
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: 03030303 03
+# CHECK-NEXT:     )
+# CHECK-NEXT:   }
+# CHECK:        Section {
+# CHECK:          Name: test4 (50)
+# CHECK-NEXT:     Type: SHT_PROGBITS
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset: 0x46
+# CHECK-NEXT:     Size: 3
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 1
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: 040404
+# CHECK-NEXT:     )
+# CHECK-NEXT:   }
+# CHECK:        Section {
+# CHECK:          Name: test5 (44)
+# CHECK-NEXT:     Type: SHT_PROGBITS
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address: 0x0
+# CHECK-NEXT:     Offset: 0x49
+# CHECK-NEXT:     Size: 1
+# CHECK-NEXT:     Link: 0
+# CHECK-NEXT:     Info: 0
+# CHECK-NEXT:     AddressAlignment: 1
+# CHECK-NEXT:     EntrySize: 0
+# CHECK-NEXT:     SectionData (
+# CHECK-NEXT:       0000: 05
+# CHECK-NEXT:     )
+# CHECK-NEXT:   }
diff --git a/test/MC/AsmParser/section_names.s b/test/MC/AsmParser/section_names.s
index 332cdbe..3883e15 100644
--- a/test/MC/AsmParser/section_names.s
+++ b/test/MC/AsmParser/section_names.s
@@ -1,5 +1,5 @@
 # RUN: llvm-mc -triple i386-pc-linux-gnu -filetype=obj -o %t %s
-# RUN: elf-dump --dump-section-data < %t | FileCheck %s
+# RUN: llvm-readobj -s < %t | FileCheck %s
 .section .nobits
 .byte 1
 .section .nobits2
@@ -30,33 +30,33 @@
 .byte 1
 .section .notefoo
 .byte 1
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.nobits'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.nobits2'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.nobitsfoo'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.init_array'
-# CHECK-NEXT:  ('sh_type', 0x0000000e)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.init_array2'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.init_arrayfoo'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.fini_array'
-# CHECK-NEXT:  ('sh_type', 0x0000000f)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.fini_array2'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.fini_arrayfoo'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.preinit_array'
-# CHECK-NEXT:  ('sh_type', 0x00000010)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.preinit_array2'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.preinit_arrayfoo'
-# CHECK-NEXT:  ('sh_type', 0x00000001)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.note'
-# CHECK-NEXT:  ('sh_type', 0x00000007)
-# CHECK:      (('sh_name', 0x00000{{...}}) # '.note2'
-# CHECK-NEXT:  ('sh_type', 0x00000007)
-#CHECK:       (('sh_name', 0x00000{{...}}) # '.notefoo'
-# CHECK-NEXT:  ('sh_type', 0x00000007)
+# CHECK:        Name: .nobits
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .nobits2
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .nobitsfoo
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .init_array
+# CHECK-NEXT:   Type:  SHT_INIT_ARRAY
+# CHECK:        Name: .init_array2
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .init_arrayfoo
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .fini_array
+# CHECK-NEXT:   Type: SHT_FINI_ARRAY
+# CHECK:        Name: .fini_array2
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .fini_arrayfoo
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .preinit_array
+# CHECK-NEXT:   Type: SHT_PREINIT_ARRAY
+# CHECK:        Name: .preinit_array2
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .preinit_arrayfoo
+# CHECK-NEXT:   Type: SHT_PROGBITS
+# CHECK:        Name: .note
+# CHECK-NEXT:   Type: SHT_NOTE
+# CHECK:        Name: .note2
+# CHECK-NEXT:   Type: SHT_NOTE
+# CHECK:        Name: .notefoo
+# CHECK-NEXT:   Type: SHT_NOTE
diff --git a/test/MC/COFF/align-nops.s b/test/MC/COFF/align-nops.s
index 2971ec6..02b4884 100644
--- a/test/MC/COFF/align-nops.s
+++ b/test/MC/COFF/align-nops.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -s -sd | FileCheck %s
 
 // Test that we get optimal nops in text
     .text
@@ -15,36 +15,40 @@ f0:
     .long 0
     .align  8
 
-//CHECK:         Name                     = .text
-//CHECK-NEXT:    VirtualSize
-//CHECK-NEXT:    VirtualAddress
-//CHECK-NEXT:    SizeOfRawData            = 16
-//CHECK-NEXT:    PointerToRawData
-//CHECK-NEXT:    PointerToRelocations
-//CHECK-NEXT:    PointerToLineNumbers
-//CHECK-NEXT:    NumberOfRelocations
-//CHECK-NEXT:    NumberOfLineNumbers
-//CHECK-NEXT:    Charateristics           = 0x60400020
-//CHECK-NEXT:        IMAGE_SCN_CNT_CODE
+//CHECK:          Name: .text
+//CHECK-NEXT:     VirtualSize
+//CHECK-NEXT:     VirtualAddress
+//CHECK-NEXT:     RawDataSize: 16
+//CHECK-NEXT:     PointerToRawData
+//CHECK-NEXT:     PointerToRelocations
+//CHECK-NEXT:     PointerToLineNumbers
+//CHECK-NEXT:     RelocationCount
+//CHECK-NEXT:     LineNumberCount
+//CHECK-NEXT:     Characteristics [ (0x60400020)
 //CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
+//CHECK-NEXT:        IMAGE_SCN_CNT_CODE
 //CHECK-NEXT:        IMAGE_SCN_MEM_EXECUTE
 //CHECK-NEXT:        IMAGE_SCN_MEM_READ
-//CHECK-NEXT:      SectionData              =
-//CHECK-NEXT:        00 00 00 00 0F 1F 40 00 - 00 00 00 00 0F 1F 40 00
+//CHECK-NEXT:     ]
+//CHECK-NEXT:     SectionData (
+//CHECK-NEXT:       0000: 00000000 0F1F4000 00000000 0F1F4000
+//CHECK-NEXT:     )
 
-//CHECK:         Name                     = .data
-//CHECK-NEXT:      VirtualSize
-//CHECK-NEXT:      VirtualAddress
-//CHECK-NEXT:      SizeOfRawData            = 16
-//CHECK-NEXT:      PointerToRawData
-//CHECK-NEXT:      PointerToRelocations
-//CHECK-NEXT:      PointerToLineNumbers
-//CHECK-NEXT:      NumberOfRelocations
-//CHECK-NEXT:      NumberOfLineNumbers
-//CHECK-NEXT:      Charateristics           = 0xC0400040
-//CHECK-NEXT:        IMAGE_SCN_CNT_INITIALIZED_DATA
-//CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
-//CHECK-NEXT:        IMAGE_SCN_MEM_READ
-//CHECK-NEXT:        IMAGE_SCN_MEM_WRITE
-//CHECK-NEXT:      SectionData              =
-//CHECK-NEXT:        00 00 00 00 90 90 90 90 - 00 00 00 00 00 00 00 00
+//CHECK:          Name: .data
+//CHECK-NEXT:     VirtualSize:
+//CHECK-NEXT:     VirtualAddress:
+//CHECK-NEXT:     RawDataSize: 16
+//CHECK-NEXT:     PointerToRawData:
+//CHECK-NEXT:     PointerToRelocations:
+//CHECK-NEXT:     PointerToLineNumbers:
+//CHECK-NEXT:     RelocationCount:
+//CHECK-NEXT:     LineNumberCount:
+//CHECK-NEXT:     Characteristics [ (0xC0400040)
+//CHECK-NEXT:       IMAGE_SCN_ALIGN_8BYTES
+//CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA
+//CHECK-NEXT:       IMAGE_SCN_MEM_READ
+//CHECK-NEXT:       IMAGE_SCN_MEM_WRITE
+//CHECK-NEXT:     ]
+//CHECK-NEXT:     SectionData (
+//CHECK-NEXT:       0000: 00000000 90909090 00000000 00000000
+//CHECK-NEXT:     )
diff --git a/test/MC/COFF/basic-coff-64.s b/test/MC/COFF/basic-coff-64.s
new file mode 100644
index 0000000..89d1745
--- /dev/null
+++ b/test/MC/COFF/basic-coff-64.s
@@ -0,0 +1,137 @@
+// This test checks that the COFF object emitter works for the most basic
+// programs.
+
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -h -s -sr -sd -t | FileCheck %s
+
+.def	 _main;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_main
+	.align	16, 0x90
+_main:                                  # @main
+# BB#0:                                 # %entry
+	subl	$4, %esp
+	movl	$.L_.str, (%esp)
+	call	_printf
+	xorl	%eax, %eax
+	addl	$4, %esp
+	ret
+
+	.data
+.L_.str:                                # @.str
+	.asciz	"Hello World"
+
+// CHECK: ImageFileHeader {
+// CHECK:   Machine: IMAGE_FILE_MACHINE_AMD64
+// CHECK:   SectionCount: 2
+// CHECK:   TimeDateStamp: {{[0-9]+}}
+// CHECK:   PointerToSymbolTable: 0x{{[0-9A-F]+}}
+// CHECK:   SymbolCount: 6
+// CHECK:   OptionalHeaderSize: 0
+// CHECK:   Characteristics [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Number:               [[TextNum:[0-9]+]]
+// CHECK:     Name:                 .text
+// CHECK:     VirtualSize:          0
+// CHECK:     VirtualAddress:       0
+// CHECK:     RawDataSize:          [[TextSize:[0-9]+]]
+// CHECK:     PointerToRawData:     0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations: 0x{{[0-9A-F]+}}
+// CHECK:     PointerToLineNumbers: 0x0
+// CHECK:     RelocationCount:      2
+// CHECK:     LineNumberCount:      0
+// CHECK:     Characteristics [ (0x60500020)
+// CHECK:       IMAGE_SCN_ALIGN_16BYTES
+// CHECK:       IMAGE_SCN_CNT_CODE
+// CHECK:       IMAGE_SCN_MEM_EXECUTE
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:     ]
+// CHECK:     Relocations [
+// CHECK:       0x{{[0-9A-F]+}} IMAGE_REL_AMD64_ADDR32 .data
+// CHECK:       0x{{[0-9A-F]+}} IMAGE_REL_AMD64_REL32 _printf
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number:               [[DataNum:[0-9]+]]
+// CHECK:     Name:                 .data
+// CHECK:     VirtualSize:          0
+// CHECK:     VirtualAddress:       0
+// CHECK:     RawDataSize:          [[DataSize:[0-9]+]]
+// CHECK:     PointerToRawData:     0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations: 0x0
+// CHECK:     PointerToLineNumbers: 0x0
+// CHECK:     RelocationCount:      0
+// CHECK:     LineNumberCount:      0
+// CHECK:     Characteristics [ (0xC0300040)
+// CHECK:       IMAGE_SCN_ALIGN_4BYTES
+// CHECK:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:       IMAGE_SCN_MEM_WRITE
+// CHECK:     ]
+// CHECK:     Relocations [
+// CHECK:     ]
+// CHECK:     SectionData (
+// CHECK:       0000: 48656C6C 6F20576F 726C6400             |Hello World.|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name:           .text
+// CHECK:     Value:          0
+// CHECK:     Section:        .text
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Null
+// CHECK:     StorageClass:   Static
+// CHECK:     AuxSymbolCount: 1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: [[TextSize]]
+// CHECK:       RelocationCount: 2
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: [[TextNum]]
+// CHECK:       Selection: 0x0
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:           .data
+// CHECK:     Value:          0
+// CHECK:     Section:        .data
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Null
+// CHECK:     StorageClass:   Static
+// CHECK:     AuxSymbolCount: 1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: [[DataSize]]
+// CHECK:       RelocationCount: 0
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: [[DataNum]]
+// CHECK:       Selection: 0x0
+// CHECK:       Unused: (00 00 00)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:           _main
+// CHECK:     Value:          0
+// CHECK:     Section:        .text
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Function
+// CHECK:     StorageClass:   External
+// CHECK:     AuxSymbolCount: 0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:           _printf
+// CHECK:     Value:          0
+// CHECK:     Section:        (0)
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Null
+// CHECK:     StorageClass:   External
+// CHECK:     AuxSymbolCount: 0
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/COFF/basic-coff.s b/test/MC/COFF/basic-coff.s
index 23156b8..9b29970 100644
--- a/test/MC/COFF/basic-coff.s
+++ b/test/MC/COFF/basic-coff.s
@@ -1,8 +1,7 @@
 // This test checks that the COFF object emitter works for the most basic
 // programs.
 
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -h -s -sr -sd -t | FileCheck %s
 
 .def	 _main;
 	.scl	2;
@@ -15,119 +14,124 @@ _main:                                  # @main
 # BB#0:                                 # %entry
 	subl	$4, %esp
 	movl	$L_.str, (%esp)
-	calll	_printf
+	call	_printf
 	xorl	%eax, %eax
 	addl	$4, %esp
 	ret
 
 	.data
 L_.str:                                 # @.str
-	.asciz	 "Hello World"
+	.asciz	"Hello World"
 
-// CHECK: {
-// CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
-// CHECK:   NumberOfSections         = 2
-// CHECK:   TimeDateStamp            = {{[0-9]+}}
-// CHECK:   PointerToSymbolTable     = 0x{{[0-9A-F]+}}
-// CHECK:   NumberOfSymbols          = 6
-// CHECK:   SizeOfOptionalHeader     = 0
-// CHECK:   Characteristics          = 0x0
-// CHECK:   Sections                 = [
-// CHECK:     1 = {
-// CHECK:       Name                     = .text
-// CHECK:       VirtualSize              = 0
-// CHECK:       VirtualAddress           = 0
-// CHECK:       SizeOfRawData            = {{[0-9]+}}
-// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToRelocations     = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToLineNumbers     = 0x0
-// CHECK:       NumberOfRelocations      = 2
-// CHECK:       NumberOfLineNumbers      = 0
-// CHECK:       Charateristics           = 0x60500020
-// CHECK:         IMAGE_SCN_CNT_CODE
-// CHECK:         IMAGE_SCN_ALIGN_16BYTES
-// CHECK:         IMAGE_SCN_MEM_EXECUTE
-// CHECK:         IMAGE_SCN_MEM_READ
-// CHECK:       SectionData              =
-// CHECK:       Relocations              = [
-// CHECK:         0 = {
-// CHECK:           VirtualAddress           = 0x{{[0-9A-F]+}}
-// CHECK:           SymbolTableIndex         = 2
-// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK:           SymbolName               = .data
-// CHECK:         }
-// CHECK:         1 = {
-// CHECK:           VirtualAddress           = 0x{{[0-9A-F]+}}
-// CHECK:           SymbolTableIndex         = 5
-// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK:           SymbolName               = _printf
-// CHECK:         }
-// CHECK:       ]
-// CHECK:     }
-// CHECK:     2 = {
-// CHECK:       Name                     = .data
-// CHECK:       VirtualSize              = 0
-// CHECK:       VirtualAddress           = 0
-// CHECK:       SizeOfRawData            = {{[0-9]+}}
-// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToRelocations     = 0x0
-// CHECK:       PointerToLineNumbers     = 0x0
-// CHECK:       NumberOfRelocations      = 0
-// CHECK:       NumberOfLineNumbers      = 0
-// CHECK:       Charateristics           = 0xC0300040
-// CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
-// CHECK:         IMAGE_SCN_ALIGN_4BYTES
-// CHECK:         IMAGE_SCN_MEM_READ
-// CHECK:         IMAGE_SCN_MEM_WRITE
-// CHECK:       SectionData              =
-// CHECK:         48 65 6C 6C 6F 20 57 6F - 72 6C 64 00             |Hello World.|
-// CHECK:       Relocations              = None
-// CHECK:     }
+// CHECK: ImageFileHeader {
+// CHECK:   Machine: IMAGE_FILE_MACHINE_I386
+// CHECK:   SectionCount: 2
+// CHECK:   TimeDateStamp: {{[0-9]+}}
+// CHECK:   PointerToSymbolTable: 0x{{[0-9A-F]+}}
+// CHECK:   SymbolCount: 6
+// CHECK:   OptionalHeaderSize: 0
+// CHECK:   Characteristics [ (0x0)
 // CHECK:   ]
-// CHECK:   Symbols                  = [
-// CHECK:     0 = {
-// CHECK:       Name                     = .text
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 1
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-// CHECK:       NumberOfAuxSymbols       = 1
-// CHECK:       AuxillaryData            =
-// CHECK:         15 00 00 00 02 00 00 00 - 00 00 00 00 01 00 00 00 |................|
-// CHECK:         00 00                                             |..|
-// CHECK:     }
-// CHECK:     2 = {
-// CHECK:       Name                     = .data
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 2
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-// CHECK:       NumberOfAuxSymbols       = 1
-// CHECK:       AuxillaryData            =
-// CHECK:         0C 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................|
-// CHECK:         00 00                                             |..|
-// CHECK:     }
-// CHECK:     4 = {
-// CHECK:       Name                     = _main
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 1
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Number:               [[TextNum:[0-9]+]]
+// CHECK:     Name:                 .text
+// CHECK:     VirtualSize:          0
+// CHECK:     VirtualAddress:       0
+// CHECK:     RawDataSize:          {{[0-9]+}}
+// CHECK:     PointerToRawData:     0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations: 0x{{[0-9A-F]+}}
+// CHECK:     PointerToLineNumbers: 0x0
+// CHECK:     RelocationCount:      2
+// CHECK:     LineNumberCount:      0
+// CHECK:     Characteristics [ (0x60500020)
+// CHECK:       IMAGE_SCN_ALIGN_16BYTES
+// CHECK:       IMAGE_SCN_CNT_CODE
+// CHECK:       IMAGE_SCN_MEM_EXECUTE
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:     ]
+// CHECK:     Relocations [
+// CHECK:       0x{{[0-9A-F]+}} IMAGE_REL_I386_DIR32 .data
+// CHECK:       0x{{[0-9A-F]+}} IMAGE_REL_I386_REL32 _printf
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number:               [[DataNum:[0-9]+]]
+// CHECK:     Name:                 .data
+// CHECK:     VirtualSize:          0
+// CHECK:     VirtualAddress:       0
+// CHECK:     RawDataSize:          {{[0-9]+}}
+// CHECK:     PointerToRawData:     0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations: 0x0
+// CHECK:     PointerToLineNumbers: 0x0
+// CHECK:     RelocationCount:      0
+// CHECK:     LineNumberCount:      0
+// CHECK:     Characteristics [ (0xC0300040)
+// CHECK:       IMAGE_SCN_ALIGN_4BYTES
+// CHECK:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:       IMAGE_SCN_MEM_WRITE
+// CHECK:     ]
+// CHECK:     Relocations [
+// CHECK:     ]
+// CHECK:     SectionData (
+// CHECK:       0000: 48656C6C 6F20576F 726C6400             |Hello World.|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name:           .text
+// CHECK:     Value:          0
+// CHECK:     Section:        .text
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Null
+// CHECK:     StorageClass:   Static
+// CHECK:     AuxSymbolCount: 1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: 21
+// CHECK:       RelocationCount: 2
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: 1
+// CHECK:       Selection: 0x0
 // CHECK:     }
-// CHECK:     5 = {
-// CHECK:       Name                     = _printf
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 0
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:           .data
+// CHECK:     Value:          0
+// CHECK:     Section:        .data
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Null
+// CHECK:     StorageClass:   Static
+// CHECK:     AuxSymbolCount: 1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: 12
+// CHECK:       RelocationCount: 0
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: 2
+// CHECK:       Selection: 0x0
+// CHECK:       Unused: (00 00 00)
 // CHECK:     }
-// CHECK:   ]
-// CHECK: }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:           _main
+// CHECK:     Value:          0
+// CHECK:     Section:        .text
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Function
+// CHECK:     StorageClass:   External
+// CHECK:     AuxSymbolCount: 0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:           _printf
+// CHECK:     Value:          0
+// CHECK:     Section:        (0)
+// CHECK:     BaseType:       Null
+// CHECK:     ComplexType:    Null
+// CHECK:     StorageClass:   External
+// CHECK:     AuxSymbolCount: 0
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/COFF/bss.s b/test/MC/COFF/bss.s
index 3bed13d..86294c1 100644
--- a/test/MC/COFF/bss.s
+++ b/test/MC/COFF/bss.s
@@ -1,7 +1,7 @@
 // The purpose of this test is to verify that bss sections are emited correctly.
 
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -s | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -s | FileCheck %s
 
     .bss
     .globl _g0
@@ -9,7 +9,7 @@
 _g0:
     .long 0
 
-// CHECK:      Name           = .bss
-// CHECK-NEXT: VirtualSize    = 0
-// CHECK-NEXT: VirtualAddress = 0
-// CHECK-NEXT: SizeOfRawData  = 4
+// CHECK:      Name:            .bss
+// CHECK-NEXT: VirtualSize:     0
+// CHECK-NEXT: VirtualAddress:  0
+// CHECK-NEXT: RawDataSize:     4
diff --git a/test/MC/COFF/diff.s b/test/MC/COFF/diff.s
index aa683f2..820272a 100644
--- a/test/MC/COFF/diff.s
+++ b/test/MC/COFF/diff.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-mingw32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-mingw32 %s | llvm-readobj -s -sr -sd | FileCheck %s
 
 	.def	 _foobar;
 	.scl	2;
@@ -21,26 +21,12 @@ _rust_crate:
 	.long	_foobar-_rust_crate
 	.long	_foobar-_rust_crate
 
-// CHECK:      Name                     = .data
-// CHECK:      SectionData              =
-// CHECK-NEXT:   00 00 00 00 00 00 00 00 - 1C 00 00 00 20 00 00 00 |............ ...|
-// CHECK:        Relocations              = [
-// CHECK-NEXT:   0 = {
-// CHECK-NEXT:     VirtualAddress           = 0x4
-// CHECK-NEXT:     SymbolTableIndex         =
-// CHECK-NEXT:     Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK-NEXT:     SymbolName               = _foobar
-// CHECK-NEXT:   }
-// CHECK-NEXT:   1 = {
-// CHECK-NEXT:     VirtualAddress           = 0x8
-// CHECK-NEXT:     SymbolTableIndex         = 0
-// CHECK-NEXT:     Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK-NEXT:     SymbolName               = .text
-// CHECK-NEXT:   }
-// CHECK-NEXT:   2 = {
-// CHECK-NEXT:     VirtualAddress           = 0xC
-// CHECK-NEXT:     SymbolTableIndex         = 0
-// CHECK-NEXT:     Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK-NEXT:     SymbolName               = .text
-// CHECK-NEXT:   }
-// CHECK-NEXT: ]
+// CHECK:        Name: .data
+// CHECK:        Relocations [
+// CHECK-NEXT:     0x4 IMAGE_REL_I386_DIR32 _foobar
+// CHECK-NEXT:     0x8 IMAGE_REL_I386_REL32 .text
+// CHECK-NEXT:     0xC IMAGE_REL_I386_REL32 .text
+// CHECK-NEXT:   ]
+// CHECK:        SectionData (
+// CHECK-NEXT:     0000: 00000000 00000000 1C000000 20000000
+// CHECK-NEXT:   )
diff --git a/test/MC/COFF/linker-options.ll b/test/MC/COFF/linker-options.ll
new file mode 100755
index 0000000..de11941
--- /dev/null
+++ b/test/MC/COFF/linker-options.ll
@@ -0,0 +1,21 @@
+; RUN: llc -O0 -mtriple=i386-pc-win32 -filetype=asm -o - %s | FileCheck %s
+
+!0 = metadata !{ i32 6, metadata !"Linker Options",
+   metadata !{
+      metadata !{ metadata !"/DEFAULTLIB:msvcrt.lib" },
+      metadata !{ metadata !"/DEFAULTLIB:msvcrt.lib",
+                  metadata !"/DEFAULTLIB:secur32.lib" },
+      metadata !{ metadata !"/with spaces" } } }
+
+!llvm.module.flags = !{ !0 }
+
+define dllexport void @foo() {
+  ret void
+}
+
+; CHECK: .section        .drectve,"r"
+; CHECK: .ascii   " /DEFAULTLIB:msvcrt.lib"
+; CHECK: .ascii   " /DEFAULTLIB:msvcrt.lib"
+; CHECK: .ascii   " /DEFAULTLIB:secur32.lib"
+; CHECK: .ascii   " \"/with spaces\""
+; CHECK: .ascii   " /EXPORT:_foo"
diff --git a/test/MC/COFF/module-asm.ll b/test/MC/COFF/module-asm.ll
index 9c6d00d..bf14dc6 100644
--- a/test/MC/COFF/module-asm.ll
+++ b/test/MC/COFF/module-asm.ll
@@ -1,26 +1,28 @@
 ; The purpose of this test is to verify that various module level assembly
 ; constructs work.
 
-; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o - | coff-dump.py | FileCheck %s
-; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o - | llvm-readobj -s -sd | FileCheck %s
+; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o - | llvm-readobj -s -sd | FileCheck %s
 
 module asm ".text"
 module asm "_foo:"
 module asm "  ret"
 
-; CHECK:            Name                     = .text
-; CHECK-NEXT:       VirtualSize              = 0
-; CHECK-NEXT:       VirtualAddress           = 0
-; CHECK-NEXT:       SizeOfRawData            = {{[0-9]+}}
-; CHECK-NEXT:       PointerToRawData         = 0x{{[0-9A-F]+}}
-; CHECK-NEXT:       PointerToRelocations     = 0x{{[0-9A-F]+}}
-; CHECK-NEXT:       PointerToLineNumbers     = 0x0
-; CHECK-NEXT:       NumberOfRelocations      = 0
-; CHECK-NEXT:       NumberOfLineNumbers      = 0
-; CHECK-NEXT:       Charateristics           = 0x60300020
-; CHECK-NEXT:         IMAGE_SCN_CNT_CODE
+; CHECK:            Name:                      .text
+; CHECK-NEXT:       VirtualSize:               0
+; CHECK-NEXT:       VirtualAddress:            0
+; CHECK-NEXT:       RawDataSize:               {{[0-9]+}}
+; CHECK-NEXT:       PointerToRawData:          0x{{[0-9A-F]+}}
+; CHECK-NEXT:       PointerToRelocations:      0x{{[0-9A-F]+}}
+; CHECK-NEXT:       PointerToLineNumbers:      0x0
+; CHECK-NEXT:       RelocationCount:           0
+; CHECK-NEXT:       LineNumberCount:           0
+; CHECK-NEXT:       Characteristics [ (0x60300020)
 ; CHECK-NEXT:         IMAGE_SCN_ALIGN_4BYTES
+; CHECK-NEXT:         IMAGE_SCN_CNT_CODE
 ; CHECK-NEXT:         IMAGE_SCN_MEM_EXECUTE
 ; CHECK-NEXT:         IMAGE_SCN_MEM_READ
-; CHECK-NEXT:       SectionData              =
-; CHECK-NEXT:         C3
+; CHECK-NEXT:       ]
+; CHECK-NEXT:       SectionData (
+; CHECK-NEXT:         0000: C3
+; CHECK-NEXT:       )
diff --git a/test/MC/COFF/relocation-imgrel.s b/test/MC/COFF/relocation-imgrel.s
new file mode 100644
index 0000000..ccd19ee
--- /dev/null
+++ b/test/MC/COFF/relocation-imgrel.s
@@ -0,0 +1,29 @@
+// COFF Image-relative relocations
+//
+// Test that we produce image-relative relocations (IMAGE_REL_I386_DIR32NB
+// and IMAGE_REL_AMD64_ADDR32NB) when accessing foo@imgrel.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -r | FileCheck --check-prefix=W32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -r | FileCheck --check-prefix=W64 %s
+
+.data
+foo:
+    .long 1
+
+.text
+    mov foo@IMGREL(%ebx, %ecx, 4), %eax
+    mov foo@imgrel(%ebx, %ecx, 4), %eax
+
+// W32:      Relocations [
+// W32-NEXT:   Section (1) .text {
+// W32-NEXT:     0x3 IMAGE_REL_I386_DIR32NB foo
+// W32-NEXT:     0xA IMAGE_REL_I386_DIR32NB foo
+// W32-NEXT:   }
+// W32-NEXT: ]
+
+// W64:      Relocations [
+// W64-NEXT:   Section (1) .text {
+// W64-NEXT:     0x4 IMAGE_REL_AMD64_ADDR32NB foo
+// W64-NEXT:     0xC IMAGE_REL_AMD64_ADDR32NB foo
+// W64-NEXT:   }
+// W64-NEXT: ]
diff --git a/test/MC/COFF/secrel-variant.s b/test/MC/COFF/secrel-variant.s
new file mode 100644
index 0000000..1061bd4
--- /dev/null
+++ b/test/MC/COFF/secrel-variant.s
@@ -0,0 +1,19 @@
+// COFF section-relative relocations
+
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -r | FileCheck %s
+
+.data
+values:
+    .long 1
+    .long 0
+
+.text
+    movq    values@SECREL32(%rax), %rcx
+    movq    values@SECREL32+8(%rax), %rax
+
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section (1) .text {
+// CHECK-NEXT:     0x3 IMAGE_REL_AMD64_SECREL values
+// CHECK-NEXT:     0xA IMAGE_REL_AMD64_SECREL values
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/COFF/secrel32.s b/test/MC/COFF/secrel32.s
index ce148db..deadfe0 100644
--- a/test/MC/COFF/secrel32.s
+++ b/test/MC/COFF/secrel32.s
@@ -1,14 +1,10 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -s -sr | FileCheck %s
 
 // check that we produce the correct relocation for .secrel32
 
 Lfoo:
 	.secrel32	Lfoo
 
-// CHECK:       Relocations              = [
-// CHECK-NEXT:    0 = {
-// CHECK-NEXT:       VirtualAddress           = 0x0
-// CHECK-NEXT:       SymbolTableIndex         = 0
-// CHECK-NEXT:       Type                     = IMAGE_REL_I386_SECREL (11)
-// CHECK-NEXT:       SymbolName               = .text
-// CHECK-NEXT:     }
+// CHECK:       Relocations [
+// CHECK-NEXT:    0x0 IMAGE_REL_I386_SECREL .text
+// CHECK-NEXT:  ]
diff --git a/test/MC/COFF/seh-section.s b/test/MC/COFF/seh-section.s
index 802cba5..7f05cc3 100644
--- a/test/MC/COFF/seh-section.s
+++ b/test/MC/COFF/seh-section.s
@@ -1,24 +1,26 @@
 // This test ensures that, if the section containing a function has a suffix
 // (e.g. .text$foo), its unwind info section also has a suffix (.xdata$foo).
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -s -sd | FileCheck %s
 // XFAIL: *
 
-// CHECK:      Name                 = .xdata$foo
+// CHECK:      Name: .xdata$foo
 // CHECK-NEXT: VirtualSize
 // CHECK-NEXT: VirtualAddress
-// CHECK-NEXT: SizeOfRawData        = 8
+// CHECK-NEXT: RawDataSize: 8
 // CHECK-NEXT: PointerToRawData
 // CHECK-NEXT: PointerToRelocations
 // CHECK-NEXT: PointerToLineNumbers
-// CHECK-NEXT: NumberOfRelocations  = 0
-// CHECK-NEXT: NumberOfLineNumbers  = 0
-// CHECK-NEXT: Charateristics
-// CHECK-NEXT:   IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT: RelocationCount: 0
+// CHECK-NEXT: LineNumberCount: 0
+// CHECK-NEXT: Characteristics [
 // CHECK-NEXT:   IMAGE_SCN_ALIGN_4BYTES
+// CHECK-NEXT:   IMAGE_SCN_CNT_INITIALIZED_DATA
 // CHECK-NEXT:   IMAGE_SCN_MEM_READ
 // CHECK-NEXT:   IMAGE_SCN_MEM_WRITE
-// CHECK-NEXT: SectionData
-// CHECK-NEXT:   01 05 02 00 05 50 04 02
+// CHECK-NEXT: ]
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT:   0000: 01050200 05500402
+// CHECK-NEXT: )
 
     .section .text$foo,"x"
     .globl foo
diff --git a/test/MC/COFF/seh.s b/test/MC/COFF/seh.s
index 3f72805..bef425e 100644
--- a/test/MC/COFF/seh.s
+++ b/test/MC/COFF/seh.s
@@ -1,24 +1,105 @@
 // This test checks that the SEH directives emit the correct unwind data.
-// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | coff-dump.py | FileCheck %s
 
-// CHECK:      Name                 = .xdata
-// CHECK-NEXT: VirtualSize
-// CHECK-NEXT: VirtualAddress
-// CHECK-NEXT: SizeOfRawData        = 52
-// CHECK-NEXT: PointerToRawData
-// CHECK-NEXT: PointerToRelocations
-// CHECK-NEXT: PointerToLineNumbers
-// CHECK-NEXT: NumberOfRelocations  = 4
-// CHECK-NEXT: NumberOfLineNumbers  = 0
-// CHECK-NEXT: Charateristics
-// CHECK-NEXT:   IMAGE_SCN_CNT_INITIALIZED_DATA
-// CHECK-NEXT:   IMAGE_SCN_ALIGN_4BYTES
-// CHECK-NEXT:   IMAGE_SCN_MEM_READ
-// CHECK-NEXT: SectionData
-// CHECK-NEXT:   09 12 08 03 00 03 0F 30 - 0E 88 00 00 09 64 02 00
-// CHECK-NEXT:   04 22 00 1A 00 00 00 00 - 00 00 00 00 21 00 00 00
-// CHECK-NEXT:   00 00 00 00 1B 00 00 00 - 00 00 00 00 01 00 00 00
-// CHECK-NEXT:   00 00 00 00
+// TODO: Expected fail because SET_FPREG has a wrong offset.
+// XFAIL: *
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -s -u | FileCheck %s
+
+// CHECK:      Sections [
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK:          RelocationCount: 0
+// CHECK:          Characteristics [
+// CHECK-NEXT:       ALIGN_4BYTES
+// CHECK-NEXT:       CNT_CODE
+// CHECK-NEXT:       MEM_EXECUTE
+// CHECK-NEXT:       MEM_READ
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .xdata
+// CHECK:          RawDataSize: 52
+// CHECK:          RelocationCount: 4
+// CHECK:          Characteristics [
+// CHECK-NEXT:       ALIGN_4BYTES
+// CHECK-NEXT:       CNT_INITIALIZED_DATA
+// CHECK-NEXT:       MEM_READ
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .pdata
+// CHECK:          RelocationCount: 9
+// CHECK:          Characteristics [
+// CHECK-NEXT:       ALIGN_4BYTES
+// CHECK-NEXT:       CNT_INITIALIZED_DATA
+// CHECK-NEXT:       MEM_READ
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+// CHECK:      UnwindInformation [
+// CHECK-NEXT:   RuntimeFunction {
+// CHECK-NEXT:     StartAddress: [[CodeSect1:[^ ]+]] [[BeginDisp1:(\+0x[A-F0-9]+)?]]
+// CHECK-NEXT:     EndAddress: [[CodeSect1]] [[EndDisp1:(\+0x[A-F0-9]+)?]]
+// CHECK-NEXT:     UnwindInfoAddress:
+// CHECK-NEXT:     UnwindInfo {
+// CHECK-NEXT:       Version: 1
+// CHECK-NEXT:       Flags [
+// CHECK-NEXT:         ExceptionHandler
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       PrologSize: 18
+// CHECK-NEXT:       FrameRegister: RBX
+// CHECK-NEXT:       FrameOffset: 0x0
+// CHECK-NEXT:       UnwindCodeCount: 8
+// CHECK-NEXT:       UnwindCodes [
+// CHECK-NEXT:         0x12: SET_FPREG reg=RBX, offset=0x0
+// CHECK-NEXT:         0x0F: PUSH_NONVOL reg=RBX
+// CHECK-NEXT:         0x0E: SAVE_XMM128 reg=XMM8, offset=0x0
+// CHECK-NEXT:         0x09: SAVE_NONVOL reg=RSI, offset=0x10
+// CHECK-NEXT:         0x04: ALLOC_SMALL size=24
+// CHECK-NEXT:         0x00: PUSH_MACHFRAME errcode=yes
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       Handler: __C_specific_handler
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   RuntimeFunction {
+// CHECK-NEXT:     StartAddress: [[CodeSect2:[^ ]+]] [[BeginDisp2:(\+0x[A-F0-9]+)?]]
+// CHECK-NEXT:     EndAddress: [[CodeSect2]] [[BeginDisp2:(\+0x[A-F0-9]+)?]]
+// CHECK-NEXT:     UnwindInfoAddress:
+// CHECK-NEXT:     UnwindInfo {
+// CHECK-NEXT:       Version: 1
+// CHECK-NEXT:       Flags [
+// CHECK-NEXT:         ChainInfo
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       PrologSize: 0
+// CHECK-NEXT:       FrameRegister: -
+// CHECK-NEXT:       FrameOffset: -
+// CHECK-NEXT:       UnwindCodeCount: 0
+// CHECK-NEXT:       UnwindCodes [
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       Chained {
+// CHECK-NEXT:         StartAddress: [[CodeSect1]] [[BeginDisp1]]
+// CHECK-NEXT:         EndAddress: [[CodeSect1]] [[EndDisp1]]
+// CHECK-NEXT:         UnwindInfoAddress:
+// CHECK-NEXT:       }
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   RuntimeFunction {
+// CHECK-NEXT:     StartAddress: [[CodeSect3:[^ ]+]] [[BeginDisp3:(\+0x[A-F0-9]+)?]]
+// CHECK-NEXT:     EndAddress: [[CodeSect3]] [[BeginDisp3:(\+0x[A-F0-9]+)?]]
+// CHECK-NEXT:     UnwindInfoAddress:
+// CHECK-NEXT:     UnwindInfo {
+// CHECK-NEXT:       Version: 1
+// CHECK-NEXT:       Flags [
+// CHECK-NEXT:       ]
+// CHECK-NEXT:       PrologSize: 0
+// CHECK-NEXT:       FrameRegister: -
+// CHECK-NEXT:       FrameOffset: -
+// CHECK-NEXT:       UnwindCodeCount: 0
+// CHECK-NEXT:       UnwindCodes [
+// CHECK-NEXT:       ]
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
 
     .text
     .globl func
diff --git a/test/MC/COFF/simple-fixups.s b/test/MC/COFF/simple-fixups.s
index 4c9b4d4..2a74f21 100644
--- a/test/MC/COFF/simple-fixups.s
+++ b/test/MC/COFF/simple-fixups.s
@@ -1,8 +1,8 @@
 // The purpose of this test is to verify that we do not produce unneeded
 // relocations when symbols are in the same section and we know their offset.
 
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -s | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -s | FileCheck %s
 
 	.def	 _foo;
 	.scl	2;
@@ -41,10 +41,9 @@ _baz:                                   # @baz
 # BB#0:                                 # %e
 	subl	$4, %esp
 Ltmp0:
-	calll	_baz
+	call	_baz
 	addl	$4, %esp
 	ret
 
-// CHECK:     Sections = [
-// CHECK-NOT: NumberOfRelocations = {{[^0]}}
-// CHECK:     Symbols = [
+// CHECK:     Sections [
+// CHECK-NOT: RelocationCount: {{[^0]}}
diff --git a/test/MC/COFF/symbol-alias.s b/test/MC/COFF/symbol-alias.s
index 4b1772c..ccada37 100644
--- a/test/MC/COFF/symbol-alias.s
+++ b/test/MC/COFF/symbol-alias.s
@@ -1,9 +1,9 @@
 // The purpose of this test is to verify that symbol aliases
-// (@foo = alias <type> @bar) generate the correct entries in the symbol table.
+// (@foo:  alias <type> @bar) generate the correct entries in the symbol table.
 // They should be identical except for the name.
 
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -t | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -t | FileCheck %s
 
 	.def	 _foo;
 	.scl	2;
@@ -31,43 +31,43 @@ _bar_alias_alias = _bar_alias
 	.globl	_bar_alias
 _bar_alias = _bar
 
-// CHECK:      Name               = {{_?}}foo
-// CHECK-NEXT: Value              = [[FOO_VALUE:.*$]]
-// CHECK-NEXT: SectionNumber      = [[FOO_SECTION_NUMBER:.*$]]
-// CHECK-NEXT: SimpleType         = [[FOO_SIMPLE_TYPE:.*$]]
-// CHECK-NEXT: ComplexType        = [[FOO_COMPLEX_TYPE:.*$]]
-// CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS:.*$]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS:.*$]]
+// CHECK:      Name:                {{_?}}foo
+// CHECK-NEXT: Value:               [[FOO_VALUE:.*$]]
+// CHECK-NEXT: Section:             [[FOO_SECTION_NUMBER:.*$]]
+// CHECK-NEXT: BaseType:            [[FOO_SIMPLE_TYPE:.*$]]
+// CHECK-NEXT: ComplexType:         [[FOO_COMPLEX_TYPE:.*$]]
+// CHECK-NEXT: StorageClass:        [[FOO_STORAGE_CLASS:.*$]]
+// CHECK-NEXT: AuxSymbolCount:      [[FOO_NUMBER_OF_AUX_SYMBOLS:.*$]]
 
-// CHECK:      Name               = {{_?}}bar
-// CHECK-NEXT: Value              = [[BAR_VALUE:.*$]]
-// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER:.*$]]
-// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE:.*$]]
-// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE:.*$]]
-// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS:.*$]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS:.*$]]
+// CHECK:      Name:                {{_?}}bar
+// CHECK-NEXT: Value:               [[BAR_VALUE:.*$]]
+// CHECK-NEXT: Section:             [[BAR_SECTION_NUMBER:.*$]]
+// CHECK-NEXT: BaseType:            [[BAR_SIMPLE_TYPE:.*$]]
+// CHECK-NEXT: ComplexType:         [[BAR_COMPLEX_TYPE:.*$]]
+// CHECK-NEXT: StorageClass:        [[BAR_STORAGE_CLASS:.*$]]
+// CHECK-NEXT: AuxSymbolCount:      [[BAR_NUMBER_OF_AUX_SYMBOLS:.*$]]
 
-// CHECK:      Name               = {{_?}}foo_alias
-// CHECK-NEXT: Value              = [[FOO_VALUE]]
-// CHECK-NEXT: SectionNumber      = [[FOO_SECTION_NUMBER]]
-// CHECK-NEXT: SimpleType         = [[FOO_SIMPLE_TYPE]]
-// CHECK-NEXT: ComplexType        = [[FOO_COMPLEX_TYPE]]
-// CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS]]
+// CHECK:      Name:                {{_?}}foo_alias
+// CHECK-NEXT: Value:               [[FOO_VALUE]]
+// CHECK-NEXT: Section:             [[FOO_SECTION_NUMBER]]
+// CHECK-NEXT: BaseType:            [[FOO_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType:         [[FOO_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass:        [[FOO_STORAGE_CLASS]]
+// CHECK-NEXT: AuxSymbolCount:      [[FOO_NUMBER_OF_AUX_SYMBOLS]]
 
-// CHECK:      Name               = {{_?}}bar_alias_alias
-// CHECK-NEXT: Value              = [[BAR_VALUE]]
-// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER]]
-// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE]]
-// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE]]
-// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS]]
+// CHECK:      Name:                {{_?}}bar_alias_alias
+// CHECK-NEXT: Value:               [[BAR_VALUE]]
+// CHECK-NEXT: Section:             [[BAR_SECTION_NUMBER]]
+// CHECK-NEXT: BaseType:            [[BAR_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType:         [[BAR_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass:        [[BAR_STORAGE_CLASS]]
+// CHECK-NEXT: AuxSymbolCount:      [[BAR_NUMBER_OF_AUX_SYMBOLS]]
 
-// CHECK:      Name               = {{_?}}bar_alias
-// CHECK-NEXT: Value              = [[BAR_VALUE]]
-// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER]]
-// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE]]
-// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE]]
-// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS]]
-// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS]]
+// CHECK:      Name:                {{_?}}bar_alias
+// CHECK-NEXT: Value:               [[BAR_VALUE]]
+// CHECK-NEXT: Section:             [[BAR_SECTION_NUMBER]]
+// CHECK-NEXT: BaseType:            [[BAR_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType:         [[BAR_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass:        [[BAR_STORAGE_CLASS]]
+// CHECK-NEXT: AuxSymbolCount:      [[BAR_NUMBER_OF_AUX_SYMBOLS]]
 
diff --git a/test/MC/COFF/symbol-fragment-offset-64.s b/test/MC/COFF/symbol-fragment-offset-64.s
new file mode 100644
index 0000000..b824470
--- /dev/null
+++ b/test/MC/COFF/symbol-fragment-offset-64.s
@@ -0,0 +1,168 @@
+// The purpose of this test is to see if the COFF object writer is emitting the
+// proper relocations for multiple pieces of data in a single data fragment.
+
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -h -s -sr -sd -t | FileCheck %s
+
+.def	 _main;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_main
+	.align	16, 0x90
+_main:                                  # @main
+# BB#0:                                 # %entry
+	subl	$4, %esp
+	movl	$.L_.str0, (%esp)
+	callq	_printf
+	movl	$.L_.str1, (%esp)
+	callq	_puts
+	movl	$.L_.str2, (%esp)
+	callq	_puts
+	xorl	%eax, %eax
+	addl	$4, %esp
+	ret
+
+	.data
+.L_.str0:                                # @.str0
+	.asciz	 "Hello "
+
+.L_.str1:                                # @.str1
+	.asciz	 "World!"
+
+	.align	16                      # @.str2
+.L_.str2:
+	.asciz	 "I'm The Last Line."
+
+// CHECK: {
+// CHECK:   Machine:                   IMAGE_FILE_MACHINE_AMD64
+// CHECK:   SectionCount:              2
+// CHECK:   TimeDateStamp:             {{[0-9]+}}
+// CHECK:   PointerToSymbolTable:      0x{{[0-9A-F]+}}
+// CHECK:   SymbolCount:               7
+// CHECK:   OptionalHeaderSize:        0
+// CHECK:   Characteristics [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Number:                    1
+// CHECK:     Name:                      .text
+// CHECK:     VirtualSize:               0
+// CHECK:     VirtualAddress:            0
+// CHECK:     RawDataSize:               {{[0-9]+}}
+// CHECK:     PointerToRawData:          0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations:      0x{{[0-9A-F]+}}
+// CHECK:     PointerToLineNumbers:      0x0
+// CHECK:     RelocationCount:           6
+// CHECK:     LineNumberCount:           0
+// CHECK:     Characteristics [ (0x60500020)
+// CHECK:       IMAGE_SCN_ALIGN_16BYTES
+// CHECK:       IMAGE_SCN_CNT_CODE
+// CHECK:       IMAGE_SCN_MEM_EXECUTE
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:     ]
+// CHECK:     Relocations [
+// CHECK:       0x7  IMAGE_REL_AMD64_ADDR32 .data
+// CHECK:       0xC  IMAGE_REL_AMD64_REL32 _printf
+// CHECK:       0x14 IMAGE_REL_AMD64_ADDR32 .data
+// CHECK:       0x19 IMAGE_REL_AMD64_REL32 _puts
+// CHECK:       0x21 IMAGE_REL_AMD64_ADDR32 .data
+// CHECK:       0x26 IMAGE_REL_AMD64_REL32 _puts
+// CHECK:     ]
+// CHECK:     SectionData (
+// CHECK:       0000: 83EC0467 C7042400 000000E8 00000000
+// CHECK:       0010: 67C70424 07000000 E8000000 0067C704
+// CHECK:       0020: 24100000 00E80000 000031C0 83C404C3
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number:                    2
+// CHECK:     Name:                      .data
+// CHECK:     VirtualSize:               0
+// CHECK:     VirtualAddress:            0
+// CHECK:     RawDataSize:               {{[0-9]+}}
+// CHECK:     PointerToRawData:          0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations:      0x0
+// CHECK:     PointerToLineNumbers:      0x0
+// CHECK:     RelocationCount:           0
+// CHECK:     LineNumberCount:           0
+// CHECK:     Characteristics [ (0xC0500040)
+// CHECK:       IMAGE_SCN_ALIGN_16BYTES
+// CHECK:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:       IMAGE_SCN_MEM_WRITE
+// CHECK:     Relocations [
+// CHECK:     ]
+// CHECK:     SectionData (
+// CHECK:       0000: 48656C6C 6F200057 6F726C64 21000000 |Hello .World!...|
+// CHECK:       0010: 49276D20 54686520 4C617374 204C696E |I'm The Last Lin|
+// CHECK:       0020: 652E00                              |e..|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name:                      .text
+// CHECK:     Value:                     0
+// CHECK:     Section:                   .text
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              Static
+// CHECK:     AuxSymbolCount:            1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: 48
+// CHECK:       RelocationCount: 6
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: 1
+// CHECK:       Selection: 0x0
+// CHECK:       Unused: (00 00 00)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      .data
+// CHECK:     Value:                     0
+// CHECK:     Section:                   .data
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              Static
+// CHECK:     AuxSymbolCount:            1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: 35
+// CHECK:       RelocationCount: 0
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: 2
+// CHECK:       Selection: 0x0
+// CHECK:       Unused: (00 00 00)
+// CHECK:     }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      _main
+// CHECK:     Value:                     0
+// CHECK:     Section:                   .text
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Function
+// CHECK:     StorageClass:              External
+// CHECK:     AuxSymbolCount:            0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      _printf
+// CHECK:     Value:                     0
+// CHECK:     Section:                   (0)
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              External
+// CHECK:     AuxSymbolCount:            0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      _puts
+// CHECK:     Value:                     0
+// CHECK:     Section:                   (0)
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              External
+// CHECK:     AuxSymbolCount:            0
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/COFF/symbol-fragment-offset.s b/test/MC/COFF/symbol-fragment-offset.s
index 1df8baa..71b1703 100644
--- a/test/MC/COFF/symbol-fragment-offset.s
+++ b/test/MC/COFF/symbol-fragment-offset.s
@@ -1,8 +1,7 @@
 // The purpose of this test is to see if the COFF object writer is emitting the
 // proper relocations for multiple pieces of data in a single data fragment.
 
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
-// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -h -s -sr -sd -t | FileCheck %s
 
 .def	 _main;
 	.scl	2;
@@ -36,152 +35,134 @@ L_.str2:
 	.asciz	 "I'm The Last Line."
 
 // CHECK: {
-// CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
-// CHECK:   NumberOfSections         = 2
-// CHECK:   TimeDateStamp            = {{[0-9]+}}
-// CHECK:   PointerToSymbolTable     = 0x{{[0-9A-F]+}}
-// CHECK:   NumberOfSymbols          = 7
-// CHECK:   SizeOfOptionalHeader     = 0
-// CHECK:   Characteristics          = 0x0
-// CHECK:   Sections                 = [
-// CHECK:     1 = {
-// CHECK:       Name                     = .text
-// CHECK:       VirtualSize              = 0
-// CHECK:       VirtualAddress           = 0
-// CHECK:       SizeOfRawData            = {{[0-9]+}}
-// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToRelocations     = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToLineNumbers     = 0x0
-// CHECK:       NumberOfRelocations      = 6
-// CHECK:       NumberOfLineNumbers      = 0
-// CHECK:       Charateristics           = 0x60500020
-// CHECK:         IMAGE_SCN_CNT_CODE
-// CHECK:         IMAGE_SCN_ALIGN_16BYTES
-// CHECK:         IMAGE_SCN_MEM_EXECUTE
-// CHECK:         IMAGE_SCN_MEM_READ
-// CHECK:       SectionData              =
-// CHECK:         83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 C7 |.....$..........|
-// CHECK:         04 24 07 00 00 00 E8 00 - 00 00 00 C7 04 24 10 00 |.$...........$..|
-// CHECK:         00 00 E8 00 00 00 00 31 - C0 83 C4 04 C3 |.......1.....|
-// CHECK:       Relocations              = [
-// CHECK:         0 = {
-// CHECK:           VirtualAddress           = 0x6
-// CHECK:           SymbolTableIndex         = 2
-// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK:           SymbolName               = .data
-// CHECK:         }
-// CHECK:         1 = {
-// CHECK:           VirtualAddress           = 0xB
-// CHECK:           SymbolTableIndex         = 5
-// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK:           SymbolName               = _printf
-// CHECK:         }
-// CHECK:         2 = {
-// CHECK:           VirtualAddress           = 0x12
-// CHECK:           SymbolTableIndex         = 2
-// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK:           SymbolName               = .data
-// CHECK:         }
-// CHECK:         3 = {
-// CHECK:           VirtualAddress           = 0x17
-// CHECK:           SymbolTableIndex         = 6
-// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK:           SymbolName               = _puts
-// CHECK:         }
-// CHECK:         4 = {
-// CHECK:           VirtualAddress           = 0x1E
-// CHECK:           SymbolTableIndex         = 2
-// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
-// CHECK:           SymbolName               = .data
-// CHECK:         }
-// CHECK:         5 = {
-// CHECK:           VirtualAddress           = 0x23
-// CHECK:           SymbolTableIndex         = 6
-// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
-// CHECK:           SymbolName               = _puts
-// CHECK:         }
-// CHECK:       ]
-// CHECK:     }
-// CHECK:     2 = {
-// CHECK:       Name                     = .data
-// CHECK:       VirtualSize              = 0
-// CHECK:       VirtualAddress           = 0
-// CHECK:       SizeOfRawData            = {{[0-9]+}}
-// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
-// CHECK:       PointerToRelocations     = 0x0
-// CHECK:       PointerToLineNumbers     = 0x0
-// CHECK:       NumberOfRelocations      = 0
-// CHECK:       NumberOfLineNumbers      = 0
-// CHECK:       Charateristics           = 0xC0500040
-// CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
-// CHECK:         IMAGE_SCN_ALIGN_16BYTES
-// CHECK:         IMAGE_SCN_MEM_READ
-// CHECK:         IMAGE_SCN_MEM_WRITE
-// CHECK:       SectionData              =
-// CHECK:         48 65 6C 6C 6F 20 00 57 - 6F 72 6C 64 21 00 00 00 |Hello .World!...|
-// CHECK:         49 27 6D 20 54 68 65 20 - 4C 61 73 74 20 4C 69 6E |I'm The Last Lin|
-// CHECK:         65 2E 00                                          |e..|
-// CHECK:       Relocations              = None
-// CHECK:     }
+// CHECK:   Machine:                   IMAGE_FILE_MACHINE_I386 (0x14C)
+// CHECK:   SectionCount:              2
+// CHECK:   TimeDateStamp:             {{[0-9]+}}
+// CHECK:   PointerToSymbolTable:      0x{{[0-9A-F]+}}
+// CHECK:   SymbolCount:               7
+// CHECK:   OptionalHeaderSize:        0
+// CHECK:   Characteristics [ (0x0)
 // CHECK:   ]
-// CHECK:   Symbols                  = [
-// CHECK:     0 = {
-// CHECK:       Name                     = .text
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 1
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-// CHECK:       NumberOfAuxSymbols       = 1
-// CHECK:       AuxillaryData            =
-// CHECK:         2D 00 00 00 06 00 00 00 - 00 00 00 00 01 00 00 00 |-...............|
-// CHECK:         00 00                                             |..|
-
-// CHECK:     }
-// CHECK:     2 = {
-// CHECK:       Name                     = .data
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 2
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
-// CHECK:       NumberOfAuxSymbols       = 1
-// CHECK:       AuxillaryData            =
-// CHECK:         23 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |#...............|
-// CHECK:         00 00                                             |..|
-
-// CHECK:     }
-// CHECK:     4 = {
-// CHECK:       Name                     = _main
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 1
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
-
-// CHECK:     5 = {
-// CHECK:       Name                     = _printf
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 0
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
-
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Number:                    1
+// CHECK:     Name:                      .text
+// CHECK:     VirtualSize:               0
+// CHECK:     VirtualAddress:            0
+// CHECK:     RawDataSize:               {{[0-9]+}}
+// CHECK:     PointerToRawData:          0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations:      0x{{[0-9A-F]+}}
+// CHECK:     PointerToLineNumbers:      0x0
+// CHECK:     RelocationCount:           6
+// CHECK:     LineNumberCount:           0
+// CHECK:     Characteristics [ (0x60500020)
+// CHECK:       IMAGE_SCN_ALIGN_16BYTES
+// CHECK:       IMAGE_SCN_CNT_CODE
+// CHECK:       IMAGE_SCN_MEM_EXECUTE
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:     ]
+// CHECK:     Relocations [
+// CHECK:       0x6  IMAGE_REL_I386_DIR32 .data
+// CHECK:       0xB  IMAGE_REL_I386_REL32 _printf
+// CHECK:       0x12 IMAGE_REL_I386_DIR32 .data
+// CHECK:       0x17 IMAGE_REL_I386_REL32 _puts
+// CHECK:       0x1E IMAGE_REL_I386_DIR32 .data
+// CHECK:       0x23 IMAGE_REL_I386_REL32 _puts
+// CHECK:     ]
+// CHECK:     SectionData (
+// CHECK:       0000: 83EC04C7 04240000 0000E800 000000C7 |.....$..........|
+// CHECK:       0010: 04240700 0000E800 000000C7 04241000 |.$...........$..|
+// CHECK:       0020: 0000E800 00000031 C083C404 C3       |.......1.....|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number:                    2
+// CHECK:     Name:                      .data
+// CHECK:     VirtualSize:               0
+// CHECK:     VirtualAddress:            0
+// CHECK:     RawDataSize:               {{[0-9]+}}
+// CHECK:     PointerToRawData:          0x{{[0-9A-F]+}}
+// CHECK:     PointerToRelocations:      0x0
+// CHECK:     PointerToLineNumbers:      0x0
+// CHECK:     RelocationCount:           0
+// CHECK:     LineNumberCount:           0
+// CHECK:     Characteristics [ (0xC0500040)
+// CHECK:       IMAGE_SCN_ALIGN_16BYTES
+// CHECK:       IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:       IMAGE_SCN_MEM_READ
+// CHECK:       IMAGE_SCN_MEM_WRITE
+// CHECK:     Relocations [
+// CHECK:     ]
+// CHECK:     SectionData (
+// CHECK:       0000: 48656C6C 6F200057 6F726C64 21000000 |Hello .World!...|
+// CHECK:       0010: 49276D20 54686520 4C617374 204C696E |I'm The Last Lin|
+// CHECK:       0020: 652E00                              |e..|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name:                      .text
+// CHECK:     Value:                     0
+// CHECK:     Section:                   .text
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              Static
+// CHECK:     AuxSymbolCount:            1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: 45
+// CHECK:       RelocationCount: 6
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: 1
+// CHECK:       Selection: 0x0
+// CHECK:       Unused: (00 00 00)
 // CHECK:     }
-// CHECK:     6 = {
-// CHECK:       Name                     = _puts
-// CHECK:       Value                    = 0
-// CHECK:       SectionNumber            = 0
-// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
-// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK:       NumberOfAuxSymbols       = 0
-// CHECK:       AuxillaryData            =
-
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      .data
+// CHECK:     Value:                     0
+// CHECK:     Section:                   .data
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              Static
+// CHECK:     AuxSymbolCount:            1
+// CHECK:     AuxSectionDef {
+// CHECK:       Length: 35
+// CHECK:       RelocationCount: 0
+// CHECK:       LineNumberCount: 0
+// CHECK:       Checksum: 0x0
+// CHECK:       Number: 2
+// CHECK:       Selection: 0x0
+// CHECK:       Unused: (00 00 00)
 // CHECK:     }
-// CHECK:   ]
-// CHECK: }
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      _main
+// CHECK:     Value:                     0
+// CHECK:     Section:                   .text
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Function
+// CHECK:     StorageClass:              External
+// CHECK:     AuxSymbolCount:            0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      _printf
+// CHECK:     Value:                     0
+// CHECK:     Section:                   (0)
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              External
+// CHECK:     AuxSymbolCount:            0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name:                      _puts
+// CHECK:     Value:                     0
+// CHECK:     Section:                   (0)
+// CHECK:     BaseType:                  Null
+// CHECK:     ComplexType:               Null
+// CHECK:     StorageClass:              External
+// CHECK:     AuxSymbolCount:            0
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/COFF/weak-symbol-section-specification.ll b/test/MC/COFF/weak-symbol-section-specification.ll
index 5049372..4772c92 100644
--- a/test/MC/COFF/weak-symbol-section-specification.ll
+++ b/test/MC/COFF/weak-symbol-section-specification.ll
@@ -1,23 +1,25 @@
 ; The purpose of this test is to verify that weak linkage type is not ignored by backend,
 ; if section was specialized.
 
-; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o - | llvm-readobj -s -sd | FileCheck %s
 
 @a = weak unnamed_addr constant { i32, i32, i32 } { i32 0, i32 0, i32 0}, section ".data"
 
-; CHECK:           Name                     = .data$a
-; CHECK-NEXT:      VirtualSize              = 0
-; CHECK-NEXT:      VirtualAddress           = 0
-; CHECK-NEXT:      SizeOfRawData            = {{[0-9]+}}
-; CHECK-NEXT:      PointerToRawData         = 0x{{[0-9A-F]+}}
-; CHECK-NEXT:      PointerToRelocations     = 0x0
-; CHECK-NEXT:      PointerToLineNumbers     = 0x0
-; CHECK-NEXT:      NumberOfRelocations      = 0
-; CHECK-NEXT:      NumberOfLineNumbers      = 0
-; CHECK-NEXT:      Charateristics           = 0x40401040
+; CHECK:           Name:                      .data$a
+; CHECK-NEXT:      VirtualSize:               0
+; CHECK-NEXT:      VirtualAddress:            0
+; CHECK-NEXT:      RawDataSize:               {{[0-9]+}}
+; CHECK-NEXT:      PointerToRawData:          0x{{[0-9A-F]+}}
+; CHECK-NEXT:      PointerToRelocations:      0x0
+; CHECK-NEXT:      PointerToLineNumbers:      0x0
+; CHECK-NEXT:      RelocationCount:           0
+; CHECK-NEXT:      LineNumberCount:           0
+; CHECK-NEXT:      Characteristics [ (0x40401040)
+; CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
 ; CHECK-NEXT:        IMAGE_SCN_CNT_INITIALIZED_DATA
 ; CHECK-NEXT:        IMAGE_SCN_LNK_COMDAT
-; CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
 ; CHECK-NEXT:        IMAGE_SCN_MEM_READ
-; CHECK-NEXT:      SectionData              = 
-; CHECK-NEXT:        00 00 00 00 00 00 00 00 - 00 00 00 00 
+; CHECK-NEXT:      ]
+; CHECK-NEXT:      SectionData (
+; CHECK-NEXT:        0000: 00000000 00000000 00000000
+; CHECK-NEXT:      )
diff --git a/test/MC/COFF/weak.s b/test/MC/COFF/weak.s
index 0f99313..b9df0f1 100644
--- a/test/MC/COFF/weak.s
+++ b/test/MC/COFF/weak.s
@@ -1,7 +1,8 @@
 // This tests that default-null weak symbols (a GNU extension) are created
 // properly via the .weak directive.
 
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 < %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -t | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -t | FileCheck %s
 
     .def    _main;
     .scl    2;
@@ -17,7 +18,7 @@ _main:                                  # @main
     testl   %eax, %eax
     je      LBB0_2
 # BB#1:                                 # %if.then
-    calll   _test_weak
+    call    _test_weak
     movl    $1, %eax
     addl    $4, %esp
     ret
@@ -28,24 +29,47 @@ LBB0_2:                                 # %return
 
     .weak   _test_weak
 
-// CHECK: Symbols = [
-
-// CHECK:      Name               = _test_weak
-// CHECK-NEXT: Value              = 0
-// CHECK-NEXT: SectionNumber      = 0
-// CHECK-NEXT: SimpleType         = IMAGE_SYM_TYPE_NULL (0)
-// CHECK-NEXT: ComplexType        = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK-NEXT: StorageClass       = IMAGE_SYM_CLASS_WEAK_EXTERNAL (105)
-// CHECK-NEXT: NumberOfAuxSymbols = 1
-// CHECK-NEXT: AuxillaryData      =
-// CHECK-NEXT: 05 00 00 00 02 00 00 00 - 00 00 00 00 00 00 00 00 |................|
-// CHECK-NEXT: 00 00                                             |..|
-
-// CHECK:      Name               = .weak._test_weak.default
-// CHECK-NEXT: Value              = 0
-// CHECK-NEXT: SectionNumber      = 65535
-// CHECK-NEXT: SimpleType         = IMAGE_SYM_TYPE_NULL (0)
-// CHECK-NEXT: ComplexType        = IMAGE_SYM_DTYPE_NULL (0)
-// CHECK-NEXT: StorageClass       = IMAGE_SYM_CLASS_EXTERNAL (2)
-// CHECK-NEXT: NumberOfAuxSymbols = 0
-// CHECK-NEXT: AuxillaryData      =
+    .weak   _test_weak_alias
+    _test_weak_alias=_main
+
+// CHECK: Symbols [
+
+// CHECK:      Symbol {
+// CHECK:        Name:           _test_weak
+// CHECK-NEXT:   Value:          0
+// CHECK-NEXT:   Section:        (0)
+// CHECK-NEXT:   BaseType:       Null
+// CHECK-NEXT:   ComplexType:    Null
+// CHECK-NEXT:   StorageClass:   WeakExternal
+// CHECK-NEXT:   AuxSymbolCount: 1
+// CHECK-NEXT:   AuxWeakExternal {
+// CHECK-NEXT:     Linked: .weak._test_weak.default
+// CHECK-NEXT:      Search: Library
+// CHECK-NEXT:      Unused: (00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
+
+// CHECK:      Symbol {
+// CHECK:        Name:                .weak._test_weak.default
+// CHECK-NEXT:   Value:               0
+// CHECK-NEXT:   Section:             (-1)
+// CHECK-NEXT:   BaseType:            Null
+// CHECK-NEXT:   ComplexType:         Null
+// CHECK-NEXT:   StorageClass:        External
+// CHECK-NEXT:   AuxSymbolCount:      0
+// CHECK-NEXT: }
+
+// CHECK:      Symbol {
+// CHECK:        Name:           _test_weak_alias
+// CHECK-NEXT:   Value:          0
+// CHECK-NEXT:   Section:        (0)
+// CHECK-NEXT:   BaseType:       Null
+// CHECK-NEXT:   ComplexType:    Null
+// CHECK-NEXT:   StorageClass:   WeakExternal
+// CHECK-NEXT:   AuxSymbolCount: 1
+// CHECK-NEXT:   AuxWeakExternal {
+// CHECK-NEXT:     Linked: _main
+// CHECK-NEXT:      Search: Library
+// CHECK-NEXT:      Unused: (00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
diff --git a/test/MC/Disassembler/ARM/arm-tests.txt b/test/MC/Disassembler/ARM/arm-tests.txt
index 0c9aaab..98daaa7 100644
--- a/test/MC/Disassembler/ARM/arm-tests.txt
+++ b/test/MC/Disassembler/ARM/arm-tests.txt
@@ -51,24 +51,48 @@
 # CHECKx:	ldclvc	p5, cr15, [r8], #-0
 #0x00 0xf5 0x78 0x7c
 
+# CHECK:        ldc     p13, c9, [r2, #0]!
+0x00 0x9d 0xb2 0xed
+
+# CHECK:        ldcl    p1, c9, [r3, #0]!
+0x00 0x91 0xf3 0xed
+
 # CHECK:	ldr	r0, [r2], #15
 0x0f 0x00 0x92 0xe4
 
 # CHECK:	ldr	r5, [r7, -r10, lsl #2]
 0x0a 0x51 0x17 0xe7
 
+# CHECK:        ldr     r4, [r5, #0]!
+0x00 0x40 0xb5 0xe5
+
+# CHECK:        ldrb    lr, [r10, #0]!
+0x00 0xe0 0xfa 0xe5
+
+# CHECK:	ldrd	r4, r5, [r0, #0]!
+0xd0 0x40 0xe0 0xe1
+
 # CHECK:	ldrh	r0, [r2], #0
 0xb0 0x00 0xd2 0xe0
 
 # CHECK:	ldrh	r0, [r2]
 0xb0 0x00 0xd2 0xe1
 
+# CHECK:	ldrh    lr, [sp, #0]!
+0xb0 0xe0 0xfd 0xe1
+
 # CHECK:	ldrht	r0, [r2], #15
 0xbf 0x00 0xf2 0xe0
 
+# CHECK:        ldrsb   r1, [lr, #0]!
+0xd0 0x10 0xfe 0xe1
+
 # CHECK:	ldrsbtvs	lr, [r2], -r9
 0xd9 0xe0 0x32 0x60
 
+# CHECK:        ldrsh   r9, [r1, #0]
+0xf0 0x90 0xf1 0xe1
+
 # CHECK:	lsls	r0, r2, #31
 0x82 0x0f 0xb0 0xe1
 
@@ -245,9 +269,27 @@
 # CHECK:	stc	p2, c4, [r9], {157}
 0x9d 0x42 0x89 0xec
 
+# CHECK:        stc     p15, c0, [r3, #0]!
+0x00 0x0f 0xa3 0xed
+
 # CHECK:	stc2	p2, c4, [r9], {157}
 0x9d 0x42 0x89 0xfc
 
+# CHECK:	stcl    p13, c12, [r9, #0]!
+0x00 0xcd 0xe9 0xed
+
+# CHECK:        str     pc, [r11, #0]!
+0x00 0xf0 0xab 0xe5
+
+# CHECK:        strb    r9, [r10, #0]!
+0x00 0x90 0xea 0xe5
+
+# CHECK:        strd    r12, sp, [r6, #0]!
+0xf0 0xc0 0xe6 0xe1
+
+# CHECK:        strh    r7, [r9, #0]!
+0xb0 0x70 0xe9 0xe1
+
 # CHECK:	bne #-24
 0xfa 0xff 0xff 0x1a
 
diff --git a/test/MC/Disassembler/ARM/arm-thumb-trustzone.txt b/test/MC/Disassembler/ARM/arm-thumb-trustzone.txt
new file mode 100644
index 0000000..d6b7cf1
--- /dev/null
+++ b/test/MC/Disassembler/ARM/arm-thumb-trustzone.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -disassemble -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
+# RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -disassemble -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
+
+
+#------------------------------------------------------------------------------
+# SMC
+#------------------------------------------------------------------------------
+
+0xff 0xf7 0x00 0x80
+0x0c 0xbf
+0xf0 0xf7 0x00 0x80
+
+# NOTZ-NOT: smc #15
+# NOTZ-NOT: smceq #0
+# TZ: smc #15
+# TZ: ite eq
+# TZ: smceq #0
diff --git a/test/MC/Disassembler/ARM/arm-trustzone.txt b/test/MC/Disassembler/ARM/arm-trustzone.txt
new file mode 100644
index 0000000..92d5d6b
--- /dev/null
+++ b/test/MC/Disassembler/ARM/arm-trustzone.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -disassemble -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
+# RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -disassemble -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
+
+
+#------------------------------------------------------------------------------
+# SMC
+#------------------------------------------------------------------------------
+
+0x7f 0x00 0x60 0xe1
+0x70 0x00 0x60 0x01
+
+# NOTZ-NOT: smc #15
+# NOTZ-NOT: smceq #0
+# TZ: smc #15
+# TZ: smceq #0
+
diff --git a/test/MC/Disassembler/ARM/basic-arm-instructions.txt b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
index 1100ce6..9f63e1e 100644
--- a/test/MC/Disassembler/ARM/basic-arm-instructions.txt
+++ b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
@@ -707,8 +707,10 @@
 # CHECK: mov r3, #7
 # CHECK: mov r4, #4080
 # CHECK: mov r5, #16711680
+# CHECK: mov sp, #35
 # CHECK: movw r6, #65535
 # CHECK: movw r9, #65535
+# CHECK: movw sp, #1193
 # CHECK: movs r3, #7
 # CHECK: moveq r4, #4080
 # CHECK: movseq r5, #16711680
@@ -716,8 +718,10 @@
 0x07 0x30 0xa0 0xe3
 0xff 0x4e 0xa0 0xe3
 0xff 0x58 0xa0 0xe3
+0x23 0xd0 0xa0 0xe3
 0xff 0x6f 0x0f 0xe3
 0xff 0x9f 0x0f 0xe3
+0xa9 0xd4 0x00 0xe3
 0x07 0x30 0xb0 0xe3
 0xff 0x4e 0xa0 0x03
 0xff 0x58 0xb0 0x03
@@ -740,10 +744,12 @@
 #------------------------------------------------------------------------------
 # CHECK: movt r3, #7
 # CHECK: movt r6, #65535
+# CHECK: movt sp, #3397 
 # CHECK: movteq r4, #4080
 
 0x07 0x30 0x40 0xe3
 0xff 0x6f 0x4f 0xe3
+0x45 0xdd 0x40 0xe3
 0xf0 0x4f 0x40 0x03
 
 
@@ -1442,15 +1448,6 @@
 0xf2 0x4f 0x38 0xc6
 
 #------------------------------------------------------------------------------
-# SMC
-#------------------------------------------------------------------------------
-# CHECK: smc #15
-# CHECK: smceq #0
-
-0x7f 0x00 0x60 0xe1
-0x70 0x00 0x60 0x01
-
-#------------------------------------------------------------------------------
 # SMLABB/SMLABT/SMLATB/SMLATT
 #------------------------------------------------------------------------------
 # CHECK: smlabb r3, r1, r9, r0
@@ -1826,12 +1823,13 @@
 # CHECK: strexh  r4, r2, [r5
 # CHECK: strex  r2, r1, [r7
 # CHECK: strexd  r6, r2, r3, [r8
+# CHECK: strexd  sp, r0, r1, [r0]
 
 0x93 0x1f 0xc4 0xe1
 0x92 0x4f 0xe5 0xe1
 0x91 0x2f 0x87 0xe1
 0x92 0x6f 0xa8 0xe1
-
+0x90 0xdf 0xa0 0xe1
 
 #------------------------------------------------------------------------------
 # SUB
diff --git a/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt b/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt
index 0cff28a..ecab5a5 100644
--- a/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-# XFAIL: *
 
 # LDR_PRE/POST has encoding Inst{4} = 0.
 0xde 0x69 0x18 0x46
diff --git a/test/MC/Disassembler/ARM/invalid-hint-arm.txt b/test/MC/Disassembler/ARM/invalid-hint-arm.txt
new file mode 100644
index 0000000..7da96d8
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-hint-arm.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -disassemble < %s 2>&1 | FileCheck %s
+
+#------------------------------------------------------------------------------
+# Undefined encoding space for hint instructions
+#------------------------------------------------------------------------------
+
+0x05 0xf0 0x20 0xe3
+# CHECK: invalid instruction encoding
+0x41 0xf0 0x20 0xe3
+# CHECK: invalid instruction encoding
+0xfe 0xf0 0x20 0xe3
+# CHECK: invalid instruction encoding
+
diff --git a/test/MC/Disassembler/ARM/invalid-hint-thumb.txt b/test/MC/Disassembler/ARM/invalid-hint-thumb.txt
new file mode 100644
index 0000000..1e41336
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-hint-thumb.txt
@@ -0,0 +1,8 @@
+# RUN: llvm-mc -triple=thumbv7 -disassemble -show-encoding < %s 2>&1 | FileCheck %s
+
+#------------------------------------------------------------------------------
+# Undefined encoding space for hint instructions
+#------------------------------------------------------------------------------
+
+0xaf 0xf3 0x05 0x80
+# CHECK: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips-dsp.txt b/test/MC/Disassembler/Mips/mips-dsp.txt
new file mode 100644
index 0000000..d10e62c
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips-dsp.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple=mipsel-unknown-linux -mattr=+dsp -disassemble < %s | FileCheck %s
+
+# CHECK: mfhi $21, $ac3
+0x10 0xa8 0x60 0x00
+
+# CHECK: mflo $21, $ac3
+0x12 0xa8 0x60 0x00
+
+# CHECK: mthi $21, $ac3
+0x11 0x18 0xa0 0x02
+
+# CHECK: mtlo $21, $ac3
+0x13 0x18 0xa0 0x02
diff --git a/test/MC/Disassembler/Mips/mips32.txt b/test/MC/Disassembler/Mips/mips32.txt
index 7022486..ef8bf71 100644
--- a/test/MC/Disassembler/Mips/mips32.txt
+++ b/test/MC/Disassembler/Mips/mips32.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
 # CHECK: abs.d $f12, $f14
 0x46 0x20 0x73 0x05
 
diff --git a/test/MC/Disassembler/Mips/mips32_le.txt b/test/MC/Disassembler/Mips/mips32_le.txt
index 48fa8e2..a0885a4 100644
--- a/test/MC/Disassembler/Mips/mips32_le.txt
+++ b/test/MC/Disassembler/Mips/mips32_le.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
 # CHECK: abs.d $f12, $f14
 0x05 0x73 0x20 0x46
 
diff --git a/test/MC/Disassembler/Mips/mips32r2.txt b/test/MC/Disassembler/Mips/mips32r2.txt
index 3b70db3..991eaa6 100644
--- a/test/MC/Disassembler/Mips/mips32r2.txt
+++ b/test/MC/Disassembler/Mips/mips32r2.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
 # CHECK: abs.d $f12, $f14
 0x46 0x20 0x73 0x05
 
diff --git a/test/MC/Disassembler/Mips/mips32r2_le.txt b/test/MC/Disassembler/Mips/mips32r2_le.txt
index ecfde7a..10c2938 100644
--- a/test/MC/Disassembler/Mips/mips32r2_le.txt
+++ b/test/MC/Disassembler/Mips/mips32r2_le.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r2 | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
 # CHECK: abs.d $f12, $f14
 0x05 0x73 0x20 0x46
 
diff --git a/test/MC/Disassembler/Mips/mips64.txt b/test/MC/Disassembler/Mips/mips64.txt
index 38b1377..b887473 100644
--- a/test/MC/Disassembler/Mips/mips64.txt
+++ b/test/MC/Disassembler/Mips/mips64.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux | FileCheck %s
-# CHECK: .section	 __TEXT,__text,regular,pure_instructions
 # CHECK: daddiu $11, $26, 31949
 0x67 0x4b 0x7c 0xcd
 
diff --git a/test/MC/Disassembler/Mips/mips64_le.txt b/test/MC/Disassembler/Mips/mips64_le.txt
index a7ef0e4..ddc3c2b 100644
--- a/test/MC/Disassembler/Mips/mips64_le.txt
+++ b/test/MC/Disassembler/Mips/mips64_le.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux | FileCheck %s
-# CHECK: .section	 __TEXT,__text,regular,pure_instructions
 # CHECK: daddiu $11, $26, 31949
 0xcd 0x7c 0x4b 0x67
 
diff --git a/test/MC/Disassembler/Mips/mips64r2.txt b/test/MC/Disassembler/Mips/mips64r2.txt
index 0b421fc..cee6f3c 100644
--- a/test/MC/Disassembler/Mips/mips64r2.txt
+++ b/test/MC/Disassembler/Mips/mips64r2.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mattr +mips64r2 | FileCheck %s
-# CHECK: .section	 __TEXT,__text,regular,pure_instructions
 # CHECK: daddiu $11, $26, 31949
 0x67 0x4b 0x7c 0xcd
 
diff --git a/test/MC/Disassembler/Mips/mips64r2_le.txt b/test/MC/Disassembler/Mips/mips64r2_le.txt
index c1d326f..82e4d6a 100644
--- a/test/MC/Disassembler/Mips/mips64r2_le.txt
+++ b/test/MC/Disassembler/Mips/mips64r2_le.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux -mattr +mips64r2 | FileCheck %s
-# CHECK: .section	 __TEXT,__text,regular,pure_instructions
 # CHECK: daddiu $11, $26, 31949
 0xcd 0x7c 0x4b 0x67
 
diff --git a/test/MC/Disassembler/X86/intel-syntax.txt b/test/MC/Disassembler/X86/intel-syntax.txt
index 27694cd..57e602f 100644
--- a/test/MC/Disassembler/X86/intel-syntax.txt
+++ b/test/MC/Disassembler/X86/intel-syntax.txt
@@ -110,3 +110,12 @@
 
 # CHECK: vpgatherdd XMM10, DWORD PTR [R15 + 2*XMM9], XMM8
 0xc4 0x02 0x39 0x90 0x14 0x4f
+
+# CHECK: xsave64 OPAQUE PTR [RAX]
+0x48 0x0f 0xae 0x20
+
+# CHECK: xrstor64 OPAQUE PTR [RAX]
+0x48 0x0f 0xae 0x28
+
+# CHECK: xsaveopt64 OPAQUE PTR [RAX]
+0x48 0x0f 0xae 0x30
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index 5de1d59..c285af7 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -112,3 +112,18 @@
 
 # CHECK: xabort $13
 0xc6 0xf8 0x0d
+
+# CHECK: xsaveq (%rax)
+0x48 0x0f 0xae 0x20
+
+# CHECK: xrstorq (%rax)
+0x48 0x0f 0xae 0x28
+
+# CHECK: xsaveoptq (%rax)
+0x48 0x0f 0xae 0x30
+
+# CHECK: clac
+0x0f 0x01 0xca
+
+# CHECK: stac
+0x0f 0x01 0xcb
diff --git a/test/MC/Disassembler/XCore/xcore.txt b/test/MC/Disassembler/XCore/xcore.txt
index 99e54e9..1164330 100644
--- a/test/MC/Disassembler/XCore/xcore.txt
+++ b/test/MC/Disassembler/XCore/xcore.txt
@@ -1,5 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=xcore-xmos-elf | FileCheck %s
-# CHECK: .section        __TEXT,__text,regular,pure_instructions
 
 # 0r instructions
 
@@ -649,12 +648,24 @@
 # CHECK: ldap r11, 53112
 0x33 0xf0 0x78 0xdb
 
+# CHECK: ldap r11, -22
+0x16 0xdc
+
+# CHECK: ldap r11, -9999
+0x09 0xf0 0x0f 0xdf
+
 # CHECK: bl 8
 0x08 0xd0
 
 # CHECK: bl 38631
 0x25 0xf0 0xe7 0xd2
 
+# CHECK: bl -222
+0xde 0xd4
+
+# CHECK: bl -55132
+0x35 0xf0 0x5c 0xd7
+
 # CHECK: bla cp[500]
 0xf4 0xe1
 
diff --git a/test/MC/ELF/abs.s b/test/MC/ELF/abs.s
index 48dbe3d..1836f40 100644
--- a/test/MC/ELF/abs.s
+++ b/test/MC/ELF/abs.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that zed will be an ABS symbol
 
@@ -6,11 +6,12 @@
 .Lbar:
         zed = .Lfoo - .Lbar
 
-// CHECK:      # Symbol 1
-// CHECK-NEXT: (('st_name', 0x00000001) # 'zed'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0xfff1)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK:        Symbol {
+// CHECK:          Name: zed
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF1)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/alias-reloc.s b/test/MC/ELF/alias-reloc.s
index f0db815..c25c259 100644
--- a/test/MC/ELF/alias-reloc.s
+++ b/test/MC/ELF/alias-reloc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
 
 // Test that this produces a R_X86_64_PLT32 with bar.
 
@@ -17,36 +17,30 @@ foo2:
     .set    bar2,foo2
     .quad    bar2
 
-// CHECK:       # Relocation 0
-// CHECK-NEXT:  (('r_offset', 0x0000000000000001)
-// CHECK-NEXT:   ('r_sym', 0x00000001)
-// CHECK-NEXT:   ('r_type', 0x00000004)
-// CHECK-NEXT:   ('r_addend', 0xfffffffffffffffc)
-// CHECK-NEXT:  ),
-
-// CHECK:      # Relocation 1
-// CHECK-NEXT: (('r_offset', 0x0000000000000005)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x00000001)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-// CHECK-NEXT: ),
-
-// CHECK:       # Symbol 1
-// CHECK-NEXT:  (('st_name', 0x00000005) # 'bar'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-
-// CHECK:      # Symbol 6
-// CHECK-NEXT: (('st_name', 0x0000000e) # 'bar2'
-// CHECK-NEXT:  ('st_bind', 0x2)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0004)
-// CHECK-NEXT:  ('st_value', 0x0000000000000005)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section ({{[0-9]+}}) zed {
+// CHECK-NEXT:     0x1 R_X86_64_PLT32 bar 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:     0x5 R_X86_64_64 bar2 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+// CHECK:      Symbols [
+// CHECK:        Symbol {
+// CHECK-NEXT:     Name: bar
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+
+// CHECK:        Symbol {
+// CHECK:          Name: bar2
+// CHECK-NEXT:     Value: 0x5
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Weak
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: zed
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/alias.s b/test/MC/ELF/alias.s
index f382628..0575f41 100644
--- a/test/MC/ELF/alias.s
+++ b/test/MC/ELF/alias.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 foo:
 bar = foo
@@ -16,70 +16,78 @@ foo4:
 bar4 = foo4
 
         .long foo2
-// CHECK:       # Symbol 1
-// CHECK-NEXT:  (('st_name', 0x00000005) # 'bar'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 2
-// CHECK-NEXT: (('st_name', 0x0000001d) # 'bar4'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x2)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT:  # Symbol 3
-// CHECK-NEXT:  (('st_name', 0x00000001) # 'foo'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT:  # Symbol 4
-// CHECK-NEXT:  (('st_name', 0x0000000e) # 'foo3'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 5
-// CHECK-NEXT: (('st_name', 0x00000018) # 'foo4'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x2)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 6
-// CHECK-NEXT: (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 7
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 8
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 9
-// CHECK-NEXT:  (('st_name', 0x00000013) # 'bar3'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK:       # Symbol 10
-// CHECK-NEXT:  (('st_name', 0x00000009) # 'bar2'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+
+// CHECK:      Symbols [
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar4
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Function
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo3
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo4
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Function
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .text (0)
+// CHECK:        }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .data (0)
+// CHECK:        }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .bss (0)
+// CHECK:        }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar3
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar2
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/align-bss.s b/test/MC/ELF/align-bss.s
index a59232b..776eef3 100644
--- a/test/MC/ELF/align-bss.s
+++ b/test/MC/ELF/align-bss.s
@@ -1,17 +1,22 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that the bss section is correctly aligned
 
 	.local	foo
 	.comm	foo,2048,16
 
-// CHECK:        ('sh_name', 0x00000007) # '.bss'
-// CHECK-NEXT:   ('sh_type', 0x00000008)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000003)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000800)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000010)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
+// CHECK:        Section {
+// CHECK:          Name: .bss
+// CHECK-NEXT:     Type: SHT_NOBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_WRITE
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 2048
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 16
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/align-nops.s b/test/MC/ELF/align-nops.s
index 3bf96e9..5e33868 100644
--- a/test/MC/ELF/align-nops.s
+++ b/test/MC/ELF/align-nops.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
 // Test that we get optimal nops in text
     .text
@@ -15,26 +15,40 @@ f0:
     .long 0
     .align  8
 
-// CHECK: (('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:  ('sh_addr',
-// CHECK-NEXT:  ('sh_offset',
-// CHECK-NEXT:  ('sh_size', 0x0000000000000010)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '00000000 0f1f4000 00000000 0f1f4000')
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 16
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 00000000 0F1F4000 00000000 0F1F4000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-// CHECK: (('sh_name', 0x00000026) # '.data'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000003)
-// CHECK-NEXT:  ('sh_addr',
-// CHECK-NEXT:  ('sh_offset',
-// CHECK-NEXT:  ('sh_size', 0x0000000000000010)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '00000000 90909090 00000000 00000000')
+// CHECK:        Section {
+// CHECK:          Name: .data
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_WRITE
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 16
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 00000000 90909090 00000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/align-size.s b/test/MC/ELF/align-size.s
index f628291..84a6e99 100644
--- a/test/MC/ELF/align-size.s
+++ b/test/MC/ELF/align-size.s
@@ -1,13 +1,18 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that the alignment does contribute to the size of the section.
 
 	.zero 4
 	.align	8
 
-// CHECK:      (('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000008)
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 8
+// CHECK:        }
diff --git a/test/MC/ELF/align-text.s b/test/MC/ELF/align-text.s
index 2fd3cba..b00af4a 100644
--- a/test/MC/ELF/align-text.s
+++ b/test/MC/ELF/align-text.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that the .text directive doesn't cause alignment.
 
@@ -6,14 +6,18 @@
         .text
         .zero 1
 
-// CHECK:      (('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 2
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/align.s b/test/MC/ELF/align.s
index 3142ffb..46be3df 100644
--- a/test/MC/ELF/align.s
+++ b/test/MC/ELF/align.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that the alignment of rodata doesn't force a alignment of the
 // previous section (.bss)
@@ -7,26 +7,33 @@
 	.section	.rodata,"a",@progbits
 	.align	8
 
-// CHECK: # Section 3
-// CHECK-NEXT:  (('sh_name', 0x00000007) # '.bss'
-// CHECK-NEXT:   ('sh_type', 0x00000008)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000003)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000044)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000026) # '.rodata'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
+// CHECK:        Section {
+// CHECK:          Name: .bss
+// CHECK-NEXT:     Type: SHT_NOBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_WRITE
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x44
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 4
+// CHECK-NEXT:     Name: .rodata
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/basic-elf-32.s b/test/MC/ELF/basic-elf-32.s
index 2c6a984..3ddb539 100644
--- a/test/MC/ELF/basic-elf-32.s
+++ b/test/MC/ELF/basic-elf-32.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -h -s -r -t | FileCheck %s
 
 	.text
 	.globl	main
@@ -30,49 +30,53 @@ main:                                   # @main
 
 	.section	.note.GNU-stack,"",@progbits
 
-// CHECK: ('e_indent[EI_CLASS]', 0x01)
-// CHECK: ('e_indent[EI_DATA]', 0x01)
-// CHECK: ('e_indent[EI_VERSION]', 0x01)
-// CHECK: ('_sections', [
-// CHECK:   # Section 0
-// CHECK:   (('sh_name', 0x00000000) # ''
+// CHECK: ElfHeader {
+// CHECK:   Class: 32-bit
+// CHECK:   DataEncoding: LittleEndian
+// CHECK:   FileVersion: 1
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: (0)
 
-// CHECK:   # '.text'
+// CHECK:     Name: .text
 
-// CHECK:   # '.rel.text'
+// CHECK:     Name: .rel.text
 
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('r_offset', 0x00000006)
-// CHECK:      ('r_type', 0x01)
-// CHECK:     ),
-// CHECK:     # Relocation 1
-// CHECK:     (('r_offset', 0x0000000b)
-// CHECK:      ('r_type', 0x02)
-// CHECK:     ),
-// CHECK:     # Relocation 2
-// CHECK:     (('r_offset', 0x00000012)
-// CHECK:      ('r_type', 0x01)
-// CHECK:     ),
-// CHECK:     # Relocation 3
-// CHECK:     (('r_offset', 0x00000017)
-// CHECK:      ('r_type', 0x02)
-// CHECK:     ),
-// CHECK:   ])
+// CHECK: Relocations [
+// CHECK:   Section (1) .text {
+// CHECK:     0x6  R_386_32   .rodata.str1.1
+// CHECK:     0xB  R_386_PC32 puts
+// CHECK:     0x12 R_386_32   .rodata.str1.1
+// CHECK:     0x17 R_386_PC32 puts
+// CHECK:   }
+// CHECK: ]
 
-// CHECK: ('st_bind', 0x0)
-// CHECK: ('st_type', 0x3)
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Binding: Local
+// CHECK:     Type: Section
+// CHECK:   }
 
-// CHECK: ('st_bind', 0x0)
-// CHECK: ('st_type', 0x3)
+// CHECK:   Symbol {
+// CHECK:     Binding: Local
+// CHECK:     Type: Section
+// CHECK:   }
 
-// CHECK: ('st_bind', 0x0)
-// CHECK: ('st_type', 0x3)
+// CHECK:   Symbol {
+// CHECK:     Binding: Local
+// CHECK:     Type: Section
+// CHECK:   }
 
-// CHECK:   # 'main'
-// CHECK:   ('st_bind', 0x1)
-// CHECK-NEXT: ('st_type', 0x2)
+// CHECK:   Symbol {
+// CHECK:     Name: main
+// CHECK:     Binding: Global
+// CHECK:     Type: Function
+// CHECK:   }
 
-// CHECK:   # 'puts'
-// CHECK:   ('st_bind', 0x1)
-// CHECK-NEXT: ('st_type', 0x0)
+// CHECK:   Symbol {
+// CHECK:     Name: puts
+// CHECK:     Binding: Global
+// CHECK:     Type: None
+// CHECK:   }
diff --git a/test/MC/ELF/basic-elf-64.s b/test/MC/ELF/basic-elf-64.s
index 38ffaa7..f98623a 100644
--- a/test/MC/ELF/basic-elf-64.s
+++ b/test/MC/ELF/basic-elf-64.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -h -s -r -t | FileCheck %s
 
         .text
 	.globl	main
@@ -30,53 +30,51 @@ main:                                   # @main
 
 	.section	.note.GNU-stack,"",@progbits
 
-// CHECK: ('e_indent[EI_CLASS]', 0x02)
-// CHECK: ('e_indent[EI_DATA]', 0x01)
-// CHECK: ('e_indent[EI_VERSION]', 0x01)
-// CHECK: ('_sections', [
-// CHECK:   # Section 0
-// CHECK:   (('sh_name', 0x00000000) # ''
+// CHECK: ElfHeader {
+// CHECK:   Class: 64-bit
+// CHECK:   DataEncoding: LittleEndian
+// CHECK:   FileVersion: 1
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: (0)
 
-// CHECK:   # '.text'
+// CHECK:     Name: .text
 
-// CHECK:   # '.rela.text'
+// CHECK:     Name: .rela.text
 
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('r_offset', 0x0000000000000005)
-// CHECK:      ('r_type', 0x0000000a)
-// CHECK:      ('r_addend', 0x0000000000000000)
-// CHECK:     ),
-// CHECK:     # Relocation 1
-// CHECK:     (('r_offset', 0x000000000000000a)
-// CHECK:      ('r_type', 0x00000002)
-// CHECK:      ('r_addend', 0xfffffffffffffffc)
-// CHECK:     ),
-// CHECK:     # Relocation 2
-// CHECK:     (('r_offset', 0x000000000000000f)
-// CHECK:      ('r_type', 0x0000000a)
-// CHECK:      ('r_addend', 0x0000000000000006)
-// CHECK:     ),
-// CHECK:     # Relocation 3
-// CHECK:     (('r_offset', 0x0000000000000014)
-// CHECK:      ('r_type', 0x00000002)
-// CHECK:      ('r_addend', 0xfffffffffffffffc)
-// CHECK:     ),
-// CHECK:   ])
+// CHECK: Relocations [
+// CHECK:   Section (1) .text {
+// CHECK:     0x5  R_X86_64_32   .rodata.str1.1 0x0
+// CHECK:     0xA  R_X86_64_PC32 puts           0xFFFFFFFFFFFFFFFC
+// CHECK:     0xF  R_X86_64_32   .rodata.str1.1 0x6
+// CHECK:     0x14 R_X86_64_PC32 puts           0xFFFFFFFFFFFFFFFC
+// CHECK:   }
+// CHECK: ]
 
-// CHECK: ('st_bind', 0x0)
-// CHECK: ('st_type', 0x3)
+// CHECK:   Symbol {
+// CHECK:     Binding: Local
+// CHECK:     Type: Section
 
-// CHECK: ('st_bind', 0x0)
-// CHECK: ('st_type', 0x3)
+// CHECK:   Symbol {
+// CHECK:     Binding: Local
+// CHECK:     Type: Section
+// CHECK:   }
 
-// CHECK: ('st_bind', 0x0)
-// CHECK: ('st_type', 0x3)
+// CHECK:   Symbol {
+// CHECK:     Binding: Local
+// CHECK:     Type: Section
+// CHECK:   }
 
-// CHECK:   # 'main'
-// CHECK-NEXT: ('st_bind', 0x1)
-// CHECK-NEXT: ('st_type', 0x2)
+// CHECK:   Symbol {
+// CHECK:     Name: main
+// CHECK:     Binding: Global
+// CHECK:     Type: Function
+// CHECK:  }
 
-// CHECK:   # 'puts'
-// CHECK-NEXT: ('st_bind', 0x1)
-// CHECK-NEXT: ('st_type', 0x0)
+// CHECK:   Symbol {
+// CHECK:     Name: puts
+// CHECK:     Binding: Global
+// CHECK:     Type: None
+// CHECK:  }
diff --git a/test/MC/ELF/call-abs.s b/test/MC/ELF/call-abs.s
index 795a659..81265a1 100644
--- a/test/MC/ELF/call-abs.s
+++ b/test/MC/ELF/call-abs.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s
 
 	.text
 	.globl	f
@@ -15,10 +15,8 @@ f:                                      # @f
 
 	.section	.note.GNU-stack,"",@progbits
 
-// CHECK:      ('_relocations', [
-// CHECK-NEXT:  # Relocation 0
-// CHECK-NEXT:  (('r_offset', 0x00000004)
-// CHECK-NEXT:   ('r_sym', 0x000000)
-// CHECK-NEXT:   ('r_type', 0x02)
-// CHECK-NEXT:  ),
-// CHECK-NEXT: ])
+// CHECK:      Relocations [
+// CHECK:        Section ({{[^ ]+}}) {{[^ ]+}} {
+// CHECK-NEXT:     0x4 R_386_PC32 -
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/cfi-adjust-cfa-offset.s b/test/MC/ELF/cfi-adjust-cfa-offset.s
index f0d9c5f..137b8b6 100644
--- a/test/MC/ELF/cfi-adjust-cfa-offset.s
+++ b/test/MC/ELF/cfi-adjust-cfa-offset.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -11,36 +11,43 @@ f:
 	ret
 	.cfi_endproc
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000038)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 1c000000 1c000000 00000000 0a000000 00440e10 410e1444 0e080000 00000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x00000000000003a0)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 56
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 1C000000 1C000000
+// CHECK-NEXT:       0020: 00000000 0A000000 00440E10 410E1444
+// CHECK-NEXT:       0030: 0E080000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x3A0
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-advance-loc2.s b/test/MC/ELF/cfi-advance-loc2.s
index b3c08e0..1cad325 100644
--- a/test/MC/ELF/cfi-advance-loc2.s
+++ b/test/MC/ELF/cfi-advance-loc2.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 // test that this produces a correctly encoded cfi_advance_loc2
 
@@ -10,36 +10,41 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000148)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 01010000 00030001 0e080000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x148
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 01010000 00030001 0E080000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-
-// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000490)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x490
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-def-cfa-offset.s b/test/MC/ELF/cfi-def-cfa-offset.s
index 0ed2be0..f1a54a8 100644
--- a/test/MC/ELF/cfi-def-cfa-offset.s
+++ b/test/MC/ELF/cfi-def-cfa-offset.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -10,37 +10,43 @@ f:
 	ret
 	.cfi_endproc
 
-// CHECK:      # Section 4
-// CHECK-NEXT: (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 0a000000 00440e10 450e0800')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 0A000000 00440E10 450E0800
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-// CHECK:       # Section 5
-// CHECK-NEXT: (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000398)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x398
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-def-cfa-register.s b/test/MC/ELF/cfi-def-cfa-register.s
index e87b4f6..b1e74ea 100644
--- a/test/MC/ELF/cfi-def-cfa-register.s
+++ b/test/MC/ELF/cfi-def-cfa-register.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,35 +7,41 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410d06 00000000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00410D06 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-def-cfa.s b/test/MC/ELF/cfi-def-cfa.s
index e25bf5c..abde0de 100644
--- a/test/MC/ELF/cfi-def-cfa.s
+++ b/test/MC/ELF/cfi-def-cfa.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,36 +7,41 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410c07 08000000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00410C07 08000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-
-// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-escape.s b/test/MC/ELF/cfi-escape.s
index 3a5af00..a910fab 100644
--- a/test/MC/ELF/cfi-escape.s
+++ b/test/MC/ELF/cfi-escape.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,36 +7,42 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00411507 7f000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00411507 7F000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-offset.s b/test/MC/ELF/cfi-offset.s
index 9acb76c..f7f95fb 100644
--- a/test/MC/ELF/cfi-offset.s
+++ b/test/MC/ELF/cfi-offset.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,36 +7,41 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00418602 00000000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00418602 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-
-// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-register.s b/test/MC/ELF/cfi-register.s
index 3772309..f7a07e4 100644
--- a/test/MC/ELF/cfi-register.s
+++ b/test/MC/ELF/cfi-register.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,36 +7,42 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:        # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410906 00000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00410906 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-rel-offset.s b/test/MC/ELF/cfi-rel-offset.s
index 82bbd8d..35a73ef 100644
--- a/test/MC/ELF/cfi-rel-offset.s
+++ b/test/MC/ELF/cfi-rel-offset.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -14,36 +14,43 @@ f:
         .cfi_rel_offset 6,0
 	.cfi_endproc
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000040)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 24000000 1c000000 00000000 05000000 00410e08 410d0641 11067f41 0e104186 02000000 00000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x00000000000003a0)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 64
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 24000000 1C000000
+// CHECK-NEXT:       0020: 00000000 05000000 00410E08 410D0641
+// CHECK-NEXT:       0030: 11067F41 0E104186 02000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x3A0
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-rel-offset2.s b/test/MC/ELF/cfi-rel-offset2.s
index 7726adb..5817d1f 100644
--- a/test/MC/ELF/cfi-rel-offset2.s
+++ b/test/MC/ELF/cfi-rel-offset2.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -6,36 +6,42 @@ f:
         .cfi_rel_offset 6,16
 	.cfi_endproc
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 01000000 00411106 7f000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 01000000 00411106 7F000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-remember.s b/test/MC/ELF/cfi-remember.s
index 1717662..932a182 100644
--- a/test/MC/ELF/cfi-remember.s
+++ b/test/MC/ELF/cfi-remember.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -9,37 +9,42 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      # Section 4
-// CHECK-NEXT: (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 03000000 00410a41 0b000000')
-// CHECK-NEXT: ),
-
-// CHECK:      # Section 5
-// CHECK-NEXT: (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 03000000 00410A41 0B000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-restore.s b/test/MC/ELF/cfi-restore.s
index 0fc3129..6c25d5b 100644
--- a/test/MC/ELF/cfi-restore.s
+++ b/test/MC/ELF/cfi-restore.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,36 +7,42 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 0041c600 00000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 0041C600 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-same-value.s b/test/MC/ELF/cfi-same-value.s
index 4c80a0a..075c6b9 100644
--- a/test/MC/ELF/cfi-same-value.s
+++ b/test/MC/ELF/cfi-same-value.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -7,36 +7,42 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410806 00000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00410806 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-sections.s b/test/MC/ELF/cfi-sections.s
index b256bbf..15a79e5 100644
--- a/test/MC/ELF/cfi-sections.s
+++ b/test/MC/ELF/cfi-sections.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_64 %s
-// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_64 %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_32 %s
 
 .cfi_sections .debug_frame
 
@@ -13,26 +13,43 @@ f2:
         nop
         .cfi_endproc
 
-// ELF_64:      (('sh_name', 0x00000011) # '.debug_frame'
-// ELF_64-NEXT:  ('sh_type', 0x00000001)
-// ELF_64-NEXT:  ('sh_flags', 0x0000000000000000)
-// ELF_64-NEXT:  ('sh_addr', 0x0000000000000000)
-// ELF_64-NEXT:  ('sh_offset', 0x0000000000000048)
-// ELF_64-NEXT:  ('sh_size', 0x0000000000000048)
-// ELF_64-NEXT:  ('sh_link', 0x00000000)
-// ELF_64-NEXT:  ('sh_info', 0x00000000)
-// ELF_64-NEXT:  ('sh_addralign', 0x0000000000000008)
-// ELF_64-NEXT:  ('sh_entsize', 0x0000000000000000)
-// ELF_64-NEXT:  ('_section_data', '14000000 ffffffff 01000178 100c0708 90010000 00000000 14000000 00000000 00000000 00000000 01000000 00000000 14000000 00000000 00000000 00000000 01000000 00000000')
+// ELF_64:        Section {
+// ELF_64:          Name: .debug_frame
+// ELF_64-NEXT:     Type: SHT_PROGBITS
+// ELF_64-NEXT:     Flags [
+// ELF_64-NEXT:     ]
+// ELF_64-NEXT:     Address: 0x0
+// ELF_64-NEXT:     Offset: 0x48
+// ELF_64-NEXT:     Size: 72
+// ELF_64-NEXT:     Link: 0
+// ELF_64-NEXT:     Info: 0
+// ELF_64-NEXT:     AddressAlignment: 8
+// ELF_64-NEXT:     EntrySize: 0
+// ELF_64-NEXT:     SectionData (
+// ELF_64-NEXT:       0000: 14000000 FFFFFFFF 01000178 100C0708
+// ELF_64-NEXT:       0010: 90010000 00000000 14000000 00000000
+// ELF_64-NEXT:       0020: 00000000 00000000 01000000 00000000
+// ELF_64-NEXT:       0030: 14000000 00000000 00000000 00000000
+// ELF_64-NEXT:       0040: 01000000 00000000
+// ELF_64-NEXT:     )
+// ELF_64-NEXT:   }
 
-// ELF_32:      (('sh_name', 0x00000010) # '.debug_frame'
-// ELF_32-NEXT:  ('sh_type', 0x00000001)
-// ELF_32-NEXT:  ('sh_flags', 0x00000000)
-// ELF_32-NEXT:  ('sh_addr', 0x00000000)
-// ELF_32-NEXT:  ('sh_offset', 0x00000038)
-// ELF_32-NEXT:  ('sh_size', 0x00000034)
-// ELF_32-NEXT:  ('sh_link', 0x00000000)
-// ELF_32-NEXT:  ('sh_info', 0x00000000)
-// ELF_32-NEXT:  ('sh_addralign', 0x00000004)
-// ELF_32-NEXT:  ('sh_entsize', 0x00000000)
-// ELF_32-NEXT:  ('_section_data', '10000000 ffffffff 0100017c 080c0404 88010000 0c000000 00000000 00000000 01000000 0c000000 00000000 01000000 01000000')
+// ELF_32:        Section {
+// ELF_32:          Name: .debug_frame
+// ELF_32-NEXT:     Type: SHT_PROGBITS
+// ELF_32-NEXT:     Flags [
+// ELF_32-NEXT:     ]
+// ELF_32-NEXT:     Address: 0x0
+// ELF_32-NEXT:     Offset: 0x38
+// ELF_32-NEXT:     Size: 52
+// ELF_32-NEXT:     Link: 0
+// ELF_32-NEXT:     Info: 0
+// ELF_32-NEXT:     AddressAlignment: 4
+// ELF_32-NEXT:     EntrySize: 0
+// ELF_32-NEXT:     SectionData (
+// ELF_32-NEXT:       0000: 10000000 FFFFFFFF 0100017C 080C0404
+// ELF_32-NEXT:       0010: 88010000 0C000000 00000000 00000000
+// ELF_32-NEXT:       0020: 01000000 0C000000 00000000 01000000
+// ELF_32-NEXT:       0030: 01000000
+// ELF_32-NEXT:     )
+// ELF_32-NEXT:   }
diff --git a/test/MC/ELF/cfi-signal-frame.s b/test/MC/ELF/cfi-signal-frame.s
index cf6d160..0233119 100644
--- a/test/MC/ELF/cfi-signal-frame.s
+++ b/test/MC/ELF/cfi-signal-frame.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
 f:
         .cfi_startproc
@@ -9,15 +9,25 @@ g:
         .cfi_startproc
         .cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000058)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5253 00017810 011b0c07 08900100 10000000 1c000000 00000000 00000000 00000000 14000000 00000000 017a5200 01781001 1b0c0708 90010000 10000000 1c000000 00000000 00000000 00000000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 88
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5253 00017810
+// CHECK-NEXT:       0010: 011B0C07 08900100 10000000 1C000000
+// CHECK-NEXT:       0020: 00000000 00000000 00000000 14000000
+// CHECK-NEXT:       0030: 00000000 017A5200 01781001 1B0C0708
+// CHECK-NEXT:       0040: 90010000 10000000 1C000000 00000000
+// CHECK-NEXT:       0050: 00000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/cfi-undefined.s b/test/MC/ELF/cfi-undefined.s
index 28049fa..c83b47c 100644
--- a/test/MC/ELF/cfi-undefined.s
+++ b/test/MC/ELF/cfi-undefined.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f:
 	.cfi_startproc
@@ -6,36 +6,43 @@ f:
 	.cfi_undefined %rbp
         nop
 	.cfi_endproc
-// CHECK:  # Section 4
-// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410706 00000000')
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 5
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x00000004)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:   ('sh_link', 0x00000007)
-// CHECK-NEXT:   ('sh_info', 0x00000004)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:     ('r_sym', 0x00000002)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
+
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 02000000 00410706 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x390
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/cfi-zero-addr-delta.s b/test/MC/ELF/cfi-zero-addr-delta.s
index 9e818e6..4ac0e34 100644
--- a/test/MC/ELF/cfi-zero-addr-delta.s
+++ b/test/MC/ELF/cfi-zero-addr-delta.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 // Test that we don't produce a DW_CFA_advance_loc 0
 
@@ -14,35 +14,41 @@ f:
         nop
 	.cfi_endproc
 
-// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000048)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000038)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 1c000000 1c000000 00000000 04000000 00410e10 410a0e08 410b0000 00000000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x48
+// CHECK-NEXT:     Size: 56
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20 R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A5200 01781001
+// CHECK-NEXT:       0010: 1B0C0708 90010000 1C000000 1C000000
+// CHECK-NEXT:       0020: 00000000 04000000 00410E10 410A0E08
+// CHECK-NEXT:       0030: 410B0000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
-// CHECK:      (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000398)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000018)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x398
+// CHECK-NEXT:     Size: 24
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi.s b/test/MC/ELF/cfi.s
index 9320894..98f4fa9 100644
--- a/test/MC/ELF/cfi.s
+++ b/test/MC/ELF/cfi.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -sd | FileCheck %s
 
 f1:
         .cfi_startproc
@@ -212,463 +212,220 @@ f36:
         nop
         .cfi_endproc
 
-// CHECK:      # Section 4
-// CHECK-NEXT: (('sh_name', 0x00000011) # '.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000068)
-// CHECK-NEXT:  ('sh_size', 0x00000000000006c8)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a4c52 00017810 02031b0c 07089001 14000000 1c000000 00000000 01000000 04000000 00000000 20000000 00000000 017a504c 52000178 100b0000 00000000 00000003 1b0c0708 90010000 14000000 28000000 00000000 01000000 04000000 00000000 14000000 70000000 00000000 01000000 04000000 00000000 20000000 00000000 017a504c 52000178 100b0000 00000000 00000002 1b0c0708 90010000 10000000 28000000 00000000 01000000 02000000 18000000 00000000 017a5052 00017810 04020000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06030000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a040000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 040a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 060b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a0c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a080000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a100000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04120000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06130000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a140000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 041a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 061b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a1c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a180000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a800000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04820000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06830000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a840000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 048a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 068b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a8c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a880000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a900000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04920000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06930000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a940000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 049a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 069b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a9c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a980000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000')
-// CHECK-NEXT: ),
-
-// CHECK:        # Section 5
-// CHECK-NEXT: (('sh_name', 0x0000000c) # '.rela.eh_frame'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000e30)
-// CHECK-NEXT:  ('sh_size', 0x00000000000006c0)
-// CHECK-NEXT:  ('sh_link', 0x00000007)
-// CHECK-NEXT:  ('sh_info', 0x00000004)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000020)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 1
-// CHECK-NEXT:   (('r_offset', 0x0000000000000029)
-// CHECK-NEXT:    ('r_sym', 0x00000028)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 2
-// CHECK-NEXT:   (('r_offset', 0x0000000000000043)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 3
-// CHECK-NEXT:   (('r_offset', 0x000000000000005c)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000001)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 4
-// CHECK-NEXT:   (('r_offset', 0x0000000000000065)
-// CHECK-NEXT:    ('r_sym', 0x00000028)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 5
-// CHECK-NEXT:   (('r_offset', 0x0000000000000074)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000002)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 6
-// CHECK-NEXT:   (('r_offset', 0x000000000000007d)
-// CHECK-NEXT:    ('r_sym', 0x00000028)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 7
-// CHECK-NEXT:   (('r_offset', 0x0000000000000097)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 8
-// CHECK-NEXT:   (('r_offset', 0x00000000000000b0)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000003)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 9
-// CHECK-NEXT:   (('r_offset', 0x00000000000000b9)
-// CHECK-NEXT:    ('r_sym', 0x00000028)
-// CHECK-NEXT:    ('r_type', 0x0000000c)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 10
-// CHECK-NEXT:   (('r_offset', 0x00000000000000ce)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000c)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 11
-// CHECK-NEXT:   (('r_offset', 0x00000000000000e0)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000004)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 12
-// CHECK-NEXT:   (('r_offset', 0x00000000000000fe)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 13
-// CHECK-NEXT:   (('r_offset', 0x0000000000000110)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000005)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 14
-// CHECK-NEXT:   (('r_offset', 0x000000000000012e)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 15
-// CHECK-NEXT:   (('r_offset', 0x0000000000000144)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000006)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 16
-// CHECK-NEXT:   (('r_offset', 0x0000000000000162)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000c)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 17
-// CHECK-NEXT:   (('r_offset', 0x0000000000000174)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000007)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 18
-// CHECK-NEXT:   (('r_offset', 0x0000000000000192)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 19
-// CHECK-NEXT:   (('r_offset', 0x00000000000001a4)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000008)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 20
-// CHECK-NEXT:   (('r_offset', 0x00000000000001c2)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 21
-// CHECK-NEXT:   (('r_offset', 0x00000000000001d8)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000009)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 22
-// CHECK-NEXT:   (('r_offset', 0x00000000000001f6)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 23
-// CHECK-NEXT:   (('r_offset', 0x000000000000020c)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000000a)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 24
-// CHECK-NEXT:   (('r_offset', 0x000000000000022a)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 25
-// CHECK-NEXT:   (('r_offset', 0x0000000000000240)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000000b)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 26
-// CHECK-NEXT:   (('r_offset', 0x000000000000025e)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000d)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 27
-// CHECK-NEXT:   (('r_offset', 0x0000000000000270)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000000c)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 28
-// CHECK-NEXT:   (('r_offset', 0x000000000000028e)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 29
-// CHECK-NEXT:   (('r_offset', 0x00000000000002a0)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000000d)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 30
-// CHECK-NEXT:   (('r_offset', 0x00000000000002be)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 31
-// CHECK-NEXT:   (('r_offset', 0x00000000000002d4)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000000e)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 32
-// CHECK-NEXT:   (('r_offset', 0x00000000000002f2)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000d)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 33
-// CHECK-NEXT:   (('r_offset', 0x0000000000000304)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000000f)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 34
-// CHECK-NEXT:   (('r_offset', 0x0000000000000322)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 35
-// CHECK-NEXT:   (('r_offset', 0x0000000000000334)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000010)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 36
-// CHECK-NEXT:   (('r_offset', 0x0000000000000352)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 37
-// CHECK-NEXT:   (('r_offset', 0x0000000000000368)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000011)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 38
-// CHECK-NEXT:   (('r_offset', 0x0000000000000386)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 39
-// CHECK-NEXT:   (('r_offset', 0x000000000000039c)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000012)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 40
-// CHECK-NEXT:   (('r_offset', 0x00000000000003ba)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 41
-// CHECK-NEXT:   (('r_offset', 0x00000000000003d0)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000013)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 42
-// CHECK-NEXT:   (('r_offset', 0x00000000000003ee)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000c)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 43
-// CHECK-NEXT:   (('r_offset', 0x0000000000000400)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000014)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 44
-// CHECK-NEXT:   (('r_offset', 0x000000000000041e)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 45
-// CHECK-NEXT:   (('r_offset', 0x0000000000000430)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000015)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 46
-// CHECK-NEXT:   (('r_offset', 0x000000000000044e)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 47
-// CHECK-NEXT:   (('r_offset', 0x0000000000000464)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000016)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 48
-// CHECK-NEXT:   (('r_offset', 0x0000000000000482)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000c)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 49
-// CHECK-NEXT:   (('r_offset', 0x0000000000000494)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000017)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 50
-// CHECK-NEXT:   (('r_offset', 0x00000000000004b2)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000a)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 51
-// CHECK-NEXT:   (('r_offset', 0x00000000000004c4)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000018)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 52
-// CHECK-NEXT:   (('r_offset', 0x00000000000004e2)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 53
-// CHECK-NEXT:   (('r_offset', 0x00000000000004f8)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000019)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 54
-// CHECK-NEXT:   (('r_offset', 0x0000000000000516)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000001)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 55
-// CHECK-NEXT:   (('r_offset', 0x000000000000052c)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000001a)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 56
-// CHECK-NEXT:   (('r_offset', 0x000000000000054a)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 57
-// CHECK-NEXT:   (('r_offset', 0x0000000000000560)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000001b)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 58
-// CHECK-NEXT:   (('r_offset', 0x000000000000057e)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000d)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 59
-// CHECK-NEXT:   (('r_offset', 0x0000000000000590)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000001c)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 60
-// CHECK-NEXT:   (('r_offset', 0x00000000000005ae)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 61
-// CHECK-NEXT:   (('r_offset', 0x00000000000005c0)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000001d)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 62
-// CHECK-NEXT:   (('r_offset', 0x00000000000005de)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 63
-// CHECK-NEXT:   (('r_offset', 0x00000000000005f4)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000001e)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 64
-// CHECK-NEXT:   (('r_offset', 0x0000000000000612)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x0000000d)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 65
-// CHECK-NEXT:   (('r_offset', 0x0000000000000624)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x000000000000001f)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 66
-// CHECK-NEXT:   (('r_offset', 0x0000000000000642)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 67
-// CHECK-NEXT:   (('r_offset', 0x0000000000000654)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000020)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 68
-// CHECK-NEXT:   (('r_offset', 0x0000000000000672)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 69
-// CHECK-NEXT:   (('r_offset', 0x0000000000000688)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000021)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 70
-// CHECK-NEXT:   (('r_offset', 0x00000000000006a6)
-// CHECK-NEXT:    ('r_sym', 0x00000029)
-// CHECK-NEXT:    ('r_type', 0x00000018)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 71
-// CHECK-NEXT:   (('r_offset', 0x00000000000006bc)
-// CHECK-NEXT:    ('r_sym', 0x00000024)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000022)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x68
+// CHECK-NEXT:     Size: 1736
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x20  R_X86_64_PC32 .text 0x0
+// CHECK-NEXT:       0x29  R_X86_64_32   bar   0x0
+// CHECK-NEXT:       0x43  R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x5C  R_X86_64_PC32 .text 0x1
+// CHECK-NEXT:       0x65  R_X86_64_32   bar   0x0
+// CHECK-NEXT:       0x74  R_X86_64_PC32 .text 0x2
+// CHECK-NEXT:       0x7D  R_X86_64_32   bar   0x0
+// CHECK-NEXT:       0x97  R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0xB0  R_X86_64_PC32 .text 0x3
+// CHECK-NEXT:       0xB9  R_X86_64_16   bar   0x0
+// CHECK-NEXT:       0xCE  R_X86_64_16   foo   0x0
+// CHECK-NEXT:       0xE0  R_X86_64_PC32 .text 0x4
+// CHECK-NEXT:       0xFE  R_X86_64_32   foo   0x0
+// CHECK-NEXT:       0x110 R_X86_64_PC32 .text 0x5
+// CHECK-NEXT:       0x12E R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x144 R_X86_64_PC32 .text 0x6
+// CHECK-NEXT:       0x162 R_X86_64_16   foo   0x0
+// CHECK-NEXT:       0x174 R_X86_64_PC32 .text 0x7
+// CHECK-NEXT:       0x192 R_X86_64_32   foo   0x0
+// CHECK-NEXT:       0x1A4 R_X86_64_PC32 .text 0x8
+// CHECK-NEXT:       0x1C2 R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x1D8 R_X86_64_PC32 .text 0x9
+// CHECK-NEXT:       0x1F6 R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x20C R_X86_64_PC32 .text 0xA
+// CHECK-NEXT:       0x22A R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x240 R_X86_64_PC32 .text 0xB
+// CHECK-NEXT:       0x25E R_X86_64_PC16 foo   0x0
+// CHECK-NEXT:       0x270 R_X86_64_PC32 .text 0xC
+// CHECK-NEXT:       0x28E R_X86_64_PC32 foo   0x0
+// CHECK-NEXT:       0x2A0 R_X86_64_PC32 .text 0xD
+// CHECK-NEXT:       0x2BE R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x2D4 R_X86_64_PC32 .text 0xE
+// CHECK-NEXT:       0x2F2 R_X86_64_PC16 foo   0x0
+// CHECK-NEXT:       0x304 R_X86_64_PC32 .text 0xF
+// CHECK-NEXT:       0x322 R_X86_64_PC32 foo   0x0
+// CHECK-NEXT:       0x334 R_X86_64_PC32 .text 0x10
+// CHECK-NEXT:       0x352 R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x368 R_X86_64_PC32 .text 0x11
+// CHECK-NEXT:       0x386 R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x39C R_X86_64_PC32 .text 0x12
+// CHECK-NEXT:       0x3BA R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x3D0 R_X86_64_PC32 .text 0x13
+// CHECK-NEXT:       0x3EE R_X86_64_16   foo   0x0
+// CHECK-NEXT:       0x400 R_X86_64_PC32 .text 0x14
+// CHECK-NEXT:       0x41E R_X86_64_32   foo   0x0
+// CHECK-NEXT:       0x430 R_X86_64_PC32 .text 0x15
+// CHECK-NEXT:       0x44E R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x464 R_X86_64_PC32 .text 0x16
+// CHECK-NEXT:       0x482 R_X86_64_16   foo   0x0
+// CHECK-NEXT:       0x494 R_X86_64_PC32 .text 0x17
+// CHECK-NEXT:       0x4B2 R_X86_64_32   foo   0x0
+// CHECK-NEXT:       0x4C4 R_X86_64_PC32 .text 0x18
+// CHECK-NEXT:       0x4E2 R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x4F8 R_X86_64_PC32 .text 0x19
+// CHECK-NEXT:       0x516 R_X86_64_64   foo   0x0
+// CHECK-NEXT:       0x52C R_X86_64_PC32 .text 0x1A
+// CHECK-NEXT:       0x54A R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x560 R_X86_64_PC32 .text 0x1B
+// CHECK-NEXT:       0x57E R_X86_64_PC16 foo   0x0
+// CHECK-NEXT:       0x590 R_X86_64_PC32 .text 0x1C
+// CHECK-NEXT:       0x5AE R_X86_64_PC32 foo   0x0
+// CHECK-NEXT:       0x5C0 R_X86_64_PC32 .text 0x1D
+// CHECK-NEXT:       0x5DE R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x5F4 R_X86_64_PC32 .text 0x1E
+// CHECK-NEXT:       0x612 R_X86_64_PC16 foo   0x0
+// CHECK-NEXT:       0x624 R_X86_64_PC32 .text 0x1F
+// CHECK-NEXT:       0x642 R_X86_64_PC32 foo   0x0
+// CHECK-NEXT:       0x654 R_X86_64_PC32 .text 0x20
+// CHECK-NEXT:       0x672 R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x688 R_X86_64_PC32 .text 0x21
+// CHECK-NEXT:       0x6A6 R_X86_64_PC64 foo   0x0
+// CHECK-NEXT:       0x6BC R_X86_64_PC32 .text 0x22
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 14000000 00000000 017A4C52 00017810
+// CHECK-NEXT:       0010: 02031B0C 07089001 14000000 1C000000
+// CHECK-NEXT:       0020: 00000000 01000000 04000000 00000000
+// CHECK-NEXT:       0030: 20000000 00000000 017A504C 52000178
+// CHECK-NEXT:       0040: 100B0000 00000000 00000003 1B0C0708
+// CHECK-NEXT:       0050: 90010000 14000000 28000000 00000000
+// CHECK-NEXT:       0060: 01000000 04000000 00000000 14000000
+// CHECK-NEXT:       0070: 70000000 00000000 01000000 04000000
+// CHECK-NEXT:       0080: 00000000 20000000 00000000 017A504C
+// CHECK-NEXT:       0090: 52000178 100B0000 00000000 00000002
+// CHECK-NEXT:       00A0: 1B0C0708 90010000 10000000 28000000
+// CHECK-NEXT:       00B0: 00000000 01000000 02000000 18000000
+// CHECK-NEXT:       00C0: 00000000 017A5052 00017810 04020000
+// CHECK-NEXT:       00D0: 1B0C0708 90010000 10000000 20000000
+// CHECK-NEXT:       00E0: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       00F0: 00000000 017A5052 00017810 06030000
+// CHECK-NEXT:       0100: 00001B0C 07089001 10000000 20000000
+// CHECK-NEXT:       0110: 00000000 01000000 00000000 1C000000
+// CHECK-NEXT:       0120: 00000000 017A5052 00017810 0A040000
+// CHECK-NEXT:       0130: 00000000 00001B0C 07089001 10000000
+// CHECK-NEXT:       0140: 24000000 00000000 01000000 00000000
+// CHECK-NEXT:       0150: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0160: 040A0000 1B0C0708 90010000 10000000
+// CHECK-NEXT:       0170: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       0180: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0190: 060B0000 00001B0C 07089001 10000000
+// CHECK-NEXT:       01A0: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       01B0: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       01C0: 0A0C0000 00000000 00001B0C 07089001
+// CHECK-NEXT:       01D0: 10000000 24000000 00000000 01000000
+// CHECK-NEXT:       01E0: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       01F0: 00017810 0A080000 00000000 00001B0C
+// CHECK-NEXT:       0200: 07089001 10000000 24000000 00000000
+// CHECK-NEXT:       0210: 01000000 00000000 1C000000 00000000
+// CHECK-NEXT:       0220: 017A5052 00017810 0A100000 00000000
+// CHECK-NEXT:       0230: 00001B0C 07089001 10000000 24000000
+// CHECK-NEXT:       0240: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       0250: 00000000 017A5052 00017810 04120000
+// CHECK-NEXT:       0260: 1B0C0708 90010000 10000000 20000000
+// CHECK-NEXT:       0270: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       0280: 00000000 017A5052 00017810 06130000
+// CHECK-NEXT:       0290: 00001B0C 07089001 10000000 20000000
+// CHECK-NEXT:       02A0: 00000000 01000000 00000000 1C000000
+// CHECK-NEXT:       02B0: 00000000 017A5052 00017810 0A140000
+// CHECK-NEXT:       02C0: 00000000 00001B0C 07089001 10000000
+// CHECK-NEXT:       02D0: 24000000 00000000 01000000 00000000
+// CHECK-NEXT:       02E0: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       02F0: 041A0000 1B0C0708 90010000 10000000
+// CHECK-NEXT:       0300: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       0310: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0320: 061B0000 00001B0C 07089001 10000000
+// CHECK-NEXT:       0330: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       0340: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0350: 0A1C0000 00000000 00001B0C 07089001
+// CHECK-NEXT:       0360: 10000000 24000000 00000000 01000000
+// CHECK-NEXT:       0370: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       0380: 00017810 0A180000 00000000 00001B0C
+// CHECK-NEXT:       0390: 07089001 10000000 24000000 00000000
+// CHECK-NEXT:       03A0: 01000000 00000000 1C000000 00000000
+// CHECK-NEXT:       03B0: 017A5052 00017810 0A800000 00000000
+// CHECK-NEXT:       03C0: 00001B0C 07089001 10000000 24000000
+// CHECK-NEXT:       03D0: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       03E0: 00000000 017A5052 00017810 04820000
+// CHECK-NEXT:       03F0: 1B0C0708 90010000 10000000 20000000
+// CHECK-NEXT:       0400: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       0410: 00000000 017A5052 00017810 06830000
+// CHECK-NEXT:       0420: 00001B0C 07089001 10000000 20000000
+// CHECK-NEXT:       0430: 00000000 01000000 00000000 1C000000
+// CHECK-NEXT:       0440: 00000000 017A5052 00017810 0A840000
+// CHECK-NEXT:       0450: 00000000 00001B0C 07089001 10000000
+// CHECK-NEXT:       0460: 24000000 00000000 01000000 00000000
+// CHECK-NEXT:       0470: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0480: 048A0000 1B0C0708 90010000 10000000
+// CHECK-NEXT:       0490: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       04A0: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       04B0: 068B0000 00001B0C 07089001 10000000
+// CHECK-NEXT:       04C0: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       04D0: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       04E0: 0A8C0000 00000000 00001B0C 07089001
+// CHECK-NEXT:       04F0: 10000000 24000000 00000000 01000000
+// CHECK-NEXT:       0500: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       0510: 00017810 0A880000 00000000 00001B0C
+// CHECK-NEXT:       0520: 07089001 10000000 24000000 00000000
+// CHECK-NEXT:       0530: 01000000 00000000 1C000000 00000000
+// CHECK-NEXT:       0540: 017A5052 00017810 0A900000 00000000
+// CHECK-NEXT:       0550: 00001B0C 07089001 10000000 24000000
+// CHECK-NEXT:       0560: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       0570: 00000000 017A5052 00017810 04920000
+// CHECK-NEXT:       0580: 1B0C0708 90010000 10000000 20000000
+// CHECK-NEXT:       0590: 00000000 01000000 00000000 18000000
+// CHECK-NEXT:       05A0: 00000000 017A5052 00017810 06930000
+// CHECK-NEXT:       05B0: 00001B0C 07089001 10000000 20000000
+// CHECK-NEXT:       05C0: 00000000 01000000 00000000 1C000000
+// CHECK-NEXT:       05D0: 00000000 017A5052 00017810 0A940000
+// CHECK-NEXT:       05E0: 00000000 00001B0C 07089001 10000000
+// CHECK-NEXT:       05F0: 24000000 00000000 01000000 00000000
+// CHECK-NEXT:       0600: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0610: 049A0000 1B0C0708 90010000 10000000
+// CHECK-NEXT:       0620: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       0630: 18000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0640: 069B0000 00001B0C 07089001 10000000
+// CHECK-NEXT:       0650: 20000000 00000000 01000000 00000000
+// CHECK-NEXT:       0660: 1C000000 00000000 017A5052 00017810
+// CHECK-NEXT:       0670: 0A9C0000 00000000 00001B0C 07089001
+// CHECK-NEXT:       0680: 10000000 24000000 00000000 01000000
+// CHECK-NEXT:       0690: 00000000 1C000000 00000000 017A5052
+// CHECK-NEXT:       06A0: 00017810 0A980000 00000000 00001B0C
+// CHECK-NEXT:       06B0: 07089001 10000000 24000000 00000000
+// CHECK-NEXT:       06C0: 01000000 00000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+
+// CHECK:        Section {
+// CHECK:          Index: 5
+// CHECK-NEXT:     Name: .rela.eh_frame
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0xE30
+// CHECK-NEXT:     Size: 1728
+// CHECK-NEXT:     Link: 7
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK:        }
diff --git a/test/MC/ELF/comdat.s b/test/MC/ELF/comdat.s
index d7acea6..f9469df 100644
--- a/test/MC/ELF/comdat.s
+++ b/test/MC/ELF/comdat.s
@@ -1,75 +1,81 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump   | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck %s
 
 // Test that we produce the group sections and that they are a the beginning
 // of the file.
 
-// CHECK:       # Section 1
-// CHECK-NEXT:  (('sh_name', 0x0000001b) # '.group'
-// CHECK-NEXT:   ('sh_type', 0x00000011)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:   ('sh_size', 0x000000000000000c)
-// CHECK-NEXT:   ('sh_link', 0x0000000d)
-// CHECK-NEXT:   ('sh_info', 0x00000001)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000004)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 2
-// CHECK-NEXT:  (('sh_name', 0x0000001b) # '.group'
-// CHECK-NEXT:   ('sh_type', 0x00000011)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x000000000000004c)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_link', 0x0000000d)
-// CHECK-NEXT:   ('sh_info', 0x00000002)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000004)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Section 3
-// CHECK-NEXT:  (('sh_name', 0x0000001b) # '.group'
-// CHECK-NEXT:   ('sh_type', 0x00000011)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000054)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000008)
-// CHECK-NEXT:   ('sh_link', 0x0000000d)
-// CHECK-NEXT:   ('sh_info', 0x0000000d)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000004)
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 1
+// CHECK-NEXT:     Name: .group
+// CHECK-NEXT:     Type: SHT_GROUP
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 12
+// CHECK-NEXT:     Link: 13
+// CHECK-NEXT:     Info: 1
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 4
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 2
+// CHECK-NEXT:     Name: .group
+// CHECK-NEXT:     Type: SHT_GROUP
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x4C
+// CHECK-NEXT:     Size: 8
+// CHECK-NEXT:     Link: 13
+// CHECK-NEXT:     Info: 2
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 4
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 3
+// CHECK-NEXT:     Name: .group
+// CHECK-NEXT:     Type: SHT_GROUP
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x54
+// CHECK-NEXT:     Size: 8
+// CHECK-NEXT:     Link: 13
+// CHECK-NEXT:     Info: 13
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 4
+// CHECK-NEXT:   }
 
 // Test that g1 and g2 are local, but g3 is an undefined global.
 
-// CHECK:      # Symbol 1
-// CHECK-NEXT: (('st_name', 0x00000001) # 'g1'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0007)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 2
-// CHECK-NEXT: (('st_name', 0x00000004) # 'g2'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0002)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: g1 (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .foo (0x7)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: g2 (4)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .group (0x2)
+// CHECK-NEXT:   }
 
-// CHECK:      # Symbol 13
-// CHECK-NEXT: (('st_name', 0x00000007) # 'g3'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: g3 (7)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
 
 
 	.section	.foo,"axG",@progbits,g1,comdat
diff --git a/test/MC/ELF/common.s b/test/MC/ELF/common.s
index 046306e..4fc2154 100644
--- a/test/MC/ELF/common.s
+++ b/test/MC/ELF/common.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 
 	.text
@@ -8,13 +8,15 @@
 	.local	common1
 	.comm	common1,1,1
 
-// CHECK: ('st_name', 0x00000001) # 'common1'
-// CHECK-NEXT: ('st_bind', 0x0)
-// CHECK-NEXT: ('st_type', 0x1)
-// CHECK-NEXT: ('st_other', 0x00)
-// CHECK-NEXT: ('st_shndx',
-// CHECK-NEXT: ('st_value', 0x0000000000000000)
-// CHECK-NEXT: ('st_size', 0x0000000000000001)
+// CHECK:        Symbol {
+// CHECK:          Name: common1 (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 1
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section:
+// CHECK-NEXT:   }
 
 
 // Same as common1, but with directives in a different order.
@@ -22,38 +24,44 @@
 	.type	common2,@object
 	.comm	common2,1,1
 
-// CHECK: ('st_name', 0x00000009) # 'common2'
-// CHECK-NEXT: ('st_bind', 0x0)
-// CHECK-NEXT: ('st_type', 0x1)
-// CHECK-NEXT: ('st_other', 0x00)
-// CHECK-NEXT: ('st_shndx',
-// CHECK-NEXT: ('st_value', 0x0000000000000001)
-// CHECK-NEXT: ('st_size', 0x0000000000000001)
+// CHECK:        Symbol {
+// CHECK:          Name: common2 (9)
+// CHECK-NEXT:     Value: 0x1
+// CHECK-NEXT:     Size: 1
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section:
+// CHECK-NEXT:   }
+
 
         .local	common6
         .comm	common6,8,16
 
-// CHECK:      # Symbol 3
-// CHECK-NEXT: (('st_name', 0x00000011) # 'common6'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x1)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0004)
-// CHECK-NEXT:  ('st_value', 0x0000000000000010)
-// CHECK-NEXT:  ('st_size', 0x0000000000000008)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: common6 (17)
+// CHECK-NEXT:     Value: 0x10
+// CHECK-NEXT:     Size: 8
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss (0x4)
+// CHECK-NEXT:   }
+
 
 // Test that without an explicit .local we produce a global.
 	.type	common3,@object
 	.comm	common3,4,4
 
-// CHECK: ('st_name', 0x00000019) # 'common3'
-// CHECK-NEXT: ('st_bind', 0x1)
-// CHECK-NEXT: ('st_type', 0x1)
-// CHECK-NEXT: ('st_other', 0x00)
-// CHECK-NEXT: ('st_shndx', 0xfff2)
-// CHECK-NEXT: ('st_value', 0x0000000000000004)
-// CHECK-NEXT: ('st_size', 0x0000000000000004)
+// CHECK:        Symbol {
+// CHECK:          Name: common3 (25)
+// CHECK-NEXT:     Value: 0x4
+// CHECK-NEXT:     Size: 4
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF2)
+// CHECK-NEXT:   }
 
 
 // Test that without an explicit .local we produce a global, even if the first
@@ -67,22 +75,25 @@ foo:
 	.type	common4,@object
 	.comm	common4,40,16
 
-// CHECK: ('st_name', 0x00000025) # 'common4'
-// CHECK-NEXT: ('st_bind', 0x1)
-// CHECK-NEXT: ('st_type', 0x1)
-// CHECK-NEXT: ('st_other', 0x00)
-// CHECK-NEXT: ('st_shndx', 0xfff2)
-// CHECK-NEXT: ('st_value', 0x0000000000000010)
-// CHECK-NEXT: ('st_size', 0x0000000000000028)
+// CHECK:        Symbol {
+// CHECK:          Name: common4 (37)
+// CHECK-NEXT:     Value: 0x10
+// CHECK-NEXT:     Size: 40
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF2)
+// CHECK-NEXT:   }
+
 
         .comm	common5,4,4
 
-// CHECK:      # Symbol 9
-// CHECK-NEXT: (('st_name', 0x0000002d) # 'common5'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x1)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0xfff2)
-// CHECK-NEXT:  ('st_value', 0x0000000000000004)
-// CHECK-NEXT:  ('st_size', 0x0000000000000004)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: common5 (45)
+// CHECK-NEXT:     Value: 0x4
+// CHECK-NEXT:     Size: 4
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF2)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/common2.s b/test/MC/ELF/common2.s
index b13577d..526ebc2 100644
--- a/test/MC/ELF/common2.s
+++ b/test/MC/ELF/common2.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that the common symbols are placed at the end of .bss. In this example
 // it causes .bss to have size 9 instead of 8.
@@ -9,13 +9,16 @@
         .zero 1
 	.align	8
 
-// CHECK:      (('sh_name', 0x00000007) # '.bss'
-// CHECK-NEXT:  ('sh_type',
-// CHECK-NEXT:  ('sh_flags'
-// CHECK-NEXT:  ('sh_addr',
-// CHECK-NEXT:  ('sh_offset',
-// CHECK-NEXT:  ('sh_size', 0x0000000000000009)
-// CHECK-NEXT:  ('sh_link',
-// CHECK-NEXT:  ('sh_info',
-// CHECK-NEXT:  ('sh_addralign',
-// CHECK-NEXT:  ('sh_entsize',
+// CHECK:        Section {
+// CHECK:          Name: .bss (7)
+// CHECK-NEXT:     Type:
+// CHECK-NEXT:     Flags [
+// CHECK:          ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 9
+// CHECK-NEXT:     Link:
+// CHECK-NEXT:     Info:
+// CHECK-NEXT:     AddressAlignment:
+// CHECK-NEXT:     EntrySize:
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/debug-line.s b/test/MC/ELF/debug-line.s
index fed816a..75e050e 100644
--- a/test/MC/ELF/debug-line.s
+++ b/test/MC/ELF/debug-line.s
@@ -1,18 +1,26 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
 // Test that .debug_line is populated.
 
-// CHECK:     (('sh_name', 0x00000011) # '.debug_line'
-// CHECK-NEXT: ('sh_type', 0x00000001)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000044)
-// CHECK-NEXT: ('sh_size', 0x0000000000000037)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ('_section_data', '33000000 02001c00 00000101 fb0e0d00 01010101 00000001 00000100 666f6f2e 63000000 00000009 02000000 00000000 00150204 000101')
+// CHECK:        Section {
+// CHECK:          Name: .debug_line
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x44
+// CHECK-NEXT:     Size: 55
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 33000000 02001C00 00000101 FB0E0D00
+// CHECK-NEXT:       0010: 01010101 00000001 00000100 666F6F2E
+// CHECK-NEXT:       0020: 63000000 00000009 02000000 00000000
+// CHECK-NEXT:       0030: 00150204 000101
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
 	.section	.debug_line,"",@progbits
 	.text
diff --git a/test/MC/ELF/debug-loc.s b/test/MC/ELF/debug-loc.s
index 3eb3797..b24fa16 100644
--- a/test/MC/ELF/debug-loc.s
+++ b/test/MC/ELF/debug-loc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that we don't regress on the size of the line info section. We used
 // to handle negative line diffs incorrectly which manifested as very
@@ -7,18 +7,20 @@
 // FIXME: This size is the same as gnu as, but we can probably do a bit better.
 // FIXME2: We need a debug_line dumper so that we can test the actual contents.
 
-// CHECK:      # Section 4
-// CHECK-NEXT: (('sh_name', 0x00000011) # '.debug_line'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000044)
-// CHECK-NEXT:  ('sh_size', 0x000000000000003d)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .debug_line
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x44
+// CHECK-NEXT:     Size: 61
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
 
 	.section	.debug_line,"",@progbits
 	.text
diff --git a/test/MC/ELF/diff.s b/test/MC/ELF/diff.s
index 4214fc7..5436510 100644
--- a/test/MC/ELF/diff.s
+++ b/test/MC/ELF/diff.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s
 
         .global zed
 foo:
@@ -8,8 +8,4 @@ bar:
 zed:
         mov zed+(bar-foo), %eax
 
-// CHECK:       # Relocation 0
-// CHECK-NEXT:  (('r_offset', 0x0000000000000005)
-// CHECK-NEXT:   ('r_sym', 0x00000006)
-// CHECK-NEXT:   ('r_type', 0x0000000b)
-// CHECK-NEXT:   ('r_addend', 0x0000000000000001)
+// CHECK:       0x5 R_X86_64_32S zed 0x1
diff --git a/test/MC/ELF/empty-dwarf-lines.s b/test/MC/ELF/empty-dwarf-lines.s
index 7baedbc..241580b 100644
--- a/test/MC/ELF/empty-dwarf-lines.s
+++ b/test/MC/ELF/empty-dwarf-lines.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that the dwarf debug_line section contains no line directives.
 
@@ -7,15 +7,17 @@
 c:
         .asciz   "hi\n"
 
-// CHECK:      # Section 4
-// CHECK-NEXT: (('sh_name', 0x0000000c) # '.debug_line'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000044)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000027)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .debug_line
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x44
+// CHECK-NEXT:     Size: 39
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/empty.s b/test/MC/ELF/empty.s
index b38a621..c421fe8 100644
--- a/test/MC/ELF/empty.s
+++ b/test/MC/ELF/empty.s
@@ -1,70 +1,89 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump   | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that like gnu as we create text, data and bss by default. Also test
 // that shstrtab, symtab and strtab are listed in that order.
 
-// CHECK:      ('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT: ('sh_type', 0x00000001)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT: ('sh_size', 0x0000000000000000)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-
-// CHECK:      ('sh_name', 0x00000026) # '.data'
-// CHECK-NEXT: ('sh_type', 0x00000001)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000003)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT: ('sh_size', 0x0000000000000000)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-
-// CHECK:      ('sh_name', 0x00000007) # '.bss'
-// CHECK-NEXT: ('sh_type', 0x00000008)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000003)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT: ('sh_size', 0x0000000000000000)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-
-// CHECK:      ('sh_name', 0x0000000c) # '.shstrtab'
-// CHECK-NEXT: ('sh_type', 0x00000003)
-// CHECK-NEXT:    ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:    ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:    ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:    ('sh_size', 0x000000000000002c)
-// CHECK-NEXT:    ('sh_link', 0x00000000)
-// CHECK-NEXT:    ('sh_info', 0x00000000)
-// CHECK-NEXT:    ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:    ('sh_entsize', 0x0000000000000000)
-
-// CHECK: ('sh_name', 0x0000001e) # '.symtab'
-// CHECK-NEXT:    ('sh_type', 0x00000002)
-// CHECK-NEXT:    ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:    ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:    ('sh_offset',
-// CHECK-NEXT:    ('sh_size', 0x0000000000000060)
-// CHECK-NEXT:    ('sh_link', 0x00000006)
-// CHECK-NEXT:    ('sh_info', 0x00000004)
-// CHECK-NEXT:    ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:    ('sh_entsize', 0x0000000000000018)
-
-// CHECK: ('sh_name', 0x00000016) # '.strtab'
-// CHECK-NEXT:    ('sh_type', 0x00000003)
-// CHECK-NEXT:    ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:    ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:    ('sh_offset',
-// CHECK-NEXT:    ('sh_size', 0x0000000000000001)
-// CHECK-NEXT:    ('sh_link', 0x00000000)
-// CHECK-NEXT:    ('sh_info', 0x00000000)
-// CHECK-NEXT:    ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:    ('sh_entsize', 0x0000000000000000)
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .data
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_WRITE
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .bss
+// CHECK-NEXT:     Type: SHT_NOBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_WRITE
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .shstrtab
+// CHECK-NEXT:     Type: SHT_STRTAB
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 44
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .symtab
+// CHECK-NEXT:     Type: SHT_SYMTAB
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 96
+// CHECK-NEXT:     Link: 6
+// CHECK-NEXT:     Info: 4
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Name: .strtab
+// CHECK-NEXT:     Type: SHT_STRTAB
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 1
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/entsize.ll b/test/MC/ELF/entsize.ll
index dce6dba..2bf9fa9 100644
--- a/test/MC/ELF/entsize.ll
+++ b/test/MC/ELF/entsize.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=obj -mtriple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck -check-prefix=64 %s
+; RUN: llc -filetype=obj -mtriple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck -check-prefix=64 %s
 
 ; Test that constant mergeable strings have sh_entsize set.
 
@@ -20,25 +20,35 @@ declare void @foo(i64* nocapture) nounwind
 
 ;;;;;
 
-; 64: (('sh_name', 0x0000004e) # '.rodata.str1.1'
-; 64-NEXT:   ('sh_type', 0x00000001)
-; 64-NEXT:   ('sh_flags', 0x0000000000000032)
-; 64-NEXT:   ('sh_addr',
-; 64-NEXT:   ('sh_offset',
-; 64-NEXT:   ('sh_size', 0x000000000000000d)
-; 64-NEXT:   ('sh_link',
-; 64-NEXT:   ('sh_info',
-; 64-NEXT:   ('sh_addralign', 0x0000000000000001)
-; 64-NEXT:   ('sh_entsize', 0x0000000000000001)
-
-; 64: (('sh_name', 0x00000041) # '.rodata.cst8'
-; 64-NEXT:   ('sh_type', 0x00000001)
-; 64-NEXT:   ('sh_flags', 0x0000000000000012)
-; 64-NEXT:   ('sh_addr',
-; 64-NEXT:   ('sh_offset',
-; 64-NEXT:   ('sh_size', 0x0000000000000010)
-; 64-NEXT:   ('sh_link',
-; 64-NEXT:   ('sh_info',
-; 64-NEXT:   ('sh_addralign', 0x0000000000000008)
-; 64-NEXT:   ('sh_entsize', 0x0000000000000008)
-
+; 64:        Section {
+; 64:          Name: .rodata.str1.1
+; 64-NEXT:     Type: SHT_PROGBITS
+; 64-NEXT:     Flags [
+; 64-NEXT:       SHF_ALLOC
+; 64-NEXT:       SHF_MERGE
+; 64-NEXT:       SHF_STRINGS
+; 64-NEXT:     ]
+; 64-NEXT:     Address:
+; 64-NEXT:     Offset:
+; 64-NEXT:     Size: 13
+; 64-NEXT:     Link:
+; 64-NEXT:     Info:
+; 64-NEXT:     AddressAlignment: 1
+; 64-NEXT:     EntrySize: 1
+; 64-NEXT:   }
+
+; 64:        Section {
+; 64:          Name: .rodata.cst8
+; 64-NEXT:     Type: SHT_PROGBITS
+; 64-NEXT:     Flags [
+; 64-NEXT:       SHF_ALLOC
+; 64-NEXT:       SHF_MERGE
+; 64-NEXT:     ]
+; 64-NEXT:     Address:
+; 64-NEXT:     Offset:
+; 64-NEXT:     Size: 16
+; 64-NEXT:     Link:
+; 64-NEXT:     Info:
+; 64-NEXT:     AddressAlignment: 8
+; 64-NEXT:     EntrySize: 8
+; 64-NEXT:   }
diff --git a/test/MC/ELF/entsize.s b/test/MC/ELF/entsize.s
index 4645686..8e084e2 100644
--- a/test/MC/ELF/entsize.s
+++ b/test/MC/ELF/entsize.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck  %s
 
 // Test that mergeable constants have sh_entsize set.
 
@@ -32,38 +32,53 @@
     .quad 42
     .quad 42
 
-// CHECK: # Section 4
-// CHECK-NEXT:   ('sh_name', 0x00000048) # '.rodata.str1.1'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000032)
-// CHECK-NEXT:   ('sh_addr',
-// CHECK-NEXT:   ('sh_offset',
-// CHECK-NEXT:   ('sh_size', 0x000000000000000d)
-// CHECK-NEXT:   ('sh_link',
-// CHECK-NEXT:   ('sh_info',
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000001)
-
-// CHECK: # Section 5
-// CHECK-NEXT:   ('sh_name', 0x00000039) # '.rodata.str2.1'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000032)
-// CHECK-NEXT:   ('sh_addr',
-// CHECK-NEXT:   ('sh_offset',
-// CHECK-NEXT:   ('sh_size', 0x0000000000000010)
-// CHECK-NEXT:   ('sh_link',
-// CHECK-NEXT:   ('sh_info',
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000002)
-
-// CHECK: # Section 6
-// CHECK-NEXT:   ('sh_name', 0x0000002c) # '.rodata.cst8
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000012)
-// CHECK-NEXT:   ('sh_addr',
-// CHECK-NEXT:   ('sh_offset',
-// CHECK-NEXT:   ('sh_size', 0x0000000000000010)
-// CHECK-NEXT:   ('sh_link',
-// CHECK-NEXT:   ('sh_info',
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000008)
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .rodata.str1.1
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_MERGE
+// CHECK-NEXT:       SHF_STRINGS
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 13
+// CHECK-NEXT:     Link:
+// CHECK-NEXT:     Info:
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 1
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Index: 5
+// CHECK-NEXT:     Name: .rodata.str2.1
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_MERGE
+// CHECK-NEXT:       SHF_STRINGS
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 16
+// CHECK-NEXT:     Link:
+// CHECK-NEXT:     Info:
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 2
+// CHECK-NEXT:   }
+// CHECK:        Section {
+// CHECK:          Index: 6
+// CHECK-NEXT:     Name: .rodata.cst8
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_MERGE
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 16
+// CHECK-NEXT:     Link:
+// CHECK-NEXT:     Info:
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 8
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/file.s b/test/MC/ELF/file.s
index 434fb6e..7e287f7 100644
--- a/test/MC/ELF/file.s
+++ b/test/MC/ELF/file.s
@@ -1,23 +1,25 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that the STT_FILE symbol precedes the other local symbols.
 
 .file "foo"
 foa:
-// CHECK:    # Symbol 1
-// CHECK-NEXT:    (('st_name', 0x00000001) # 'foo'
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x4)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0xfff1)
-// CHECK-NEXT:     ('st_value', 0x0000000000000000)
-// CHECK-NEXT:     ('st_size', 0x0000000000000000)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:    # Symbol 2
-// CHECK-NEXT:    (('st_name', 0x00000005) # 'foa'
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x0)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0001)
-// CHECK-NEXT:     ('st_value', 0x0000000000000000)
-// CHECK-NEXT:     ('st_size', 0x0000000000000000)
+
+// CHECK:        Symbol {
+// CHECK:          Name: foo (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: File
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foa (5)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/gen-dwarf.s b/test/MC/ELF/gen-dwarf.s
index 85e0242..907bf42 100644
--- a/test/MC/ELF/gen-dwarf.s
+++ b/test/MC/ELF/gen-dwarf.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -g -triple  i686-pc-linux-gnu %s -filetype=obj -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -g -triple  i686-pc-linux-gnu %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
 
 
 // Test that on ELF:
@@ -14,97 +14,13 @@ foo:
     ret
     .size foo, .-foo
 
-// Section 4 is .debug_line
-// CHECK:       # Section 4
-// CHECK-NEXT:  # '.debug_line'
-
-
-
-// The two relocations, one to symbol 6 and one to 4
-// CHECK:         # '.rel.debug_info'
-// CHECK-NEXT:   ('sh_type',
-// CHECK-NEXT:   ('sh_flags'
-// CHECK-NEXT:   ('sh_addr',
-// CHECK-NEXT:   ('sh_offset',
-// CHECK-NEXT:   ('sh_size',
-// CHECK-NEXT:   ('sh_link',
-// CHECK-NEXT:   ('sh_info',
-// CHECK-NEXT:   ('sh_addralign',
-// CHECK-NEXT:   ('sh_entsize',
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x00000006)
-// CHECK-NEXT:     ('r_sym', 0x000006)
-// CHECK-NEXT:     ('r_type', 0x01)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:    # Relocation 1
-// CHECK-NEXT:    (('r_offset', 0x0000000c)
-// CHECK-NEXT:     ('r_sym', 0x000004)
-// CHECK-NEXT:     ('r_type', 0x01)
-// CHECK-NEXT:    ),
-
-
-// Section 8 is .debug_abbrev
-// CHECK:       # Section 8
-// CHECK-NEXT:  (('sh_name', 0x00000001) # '.debug_abbrev'
-
-// Section 9 is .debug_aranges
-// CHECK:       # Section 9
-// CHECK-NEXT:  (('sh_name', 0x0000001e) # '.debug_aranges'
-
-// Two relocations in .debug_aranges, one to text and one to debug_info.
-// CHECK:       # '.rel.debug_aranges'
-// CHECK:       # Relocation 0
-// CHECK-NEXT:  (('r_offset', 0x00000006)
-// CHECK-NEXT:   ('r_sym', 0x000005)
-// CHECK-NEXT:   ('r_type', 0x01)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Relocation 1
-// CHECK-NEXT: (('r_offset', 0x00000010)
-// CHECK-NEXT:  ('r_sym', 0x000001)
-// CHECK-NEXT:  ('r_type', 0x01)
-// CHECK-NEXT: ),
-
-// Symbol 1 is section 1 (.text)
-// CHECK:         # Symbol 1
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_value', 0x00000000)
-// CHECK-NEXT:     ('st_size', 0x00000000)
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x3)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0001)
-// CHECK-NEXT:    ),
-
-// Symbol 4 is section 4 (.debug_line)
-// CHECK:         # Symbol 4
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_value', 0x00000000)
-// CHECK-NEXT:     ('st_size', 0x00000000)
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x3)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0004)
-// CHECK-NEXT:    ),
-
-// Symbol 5 is section 6 (.debug_info)
-// CHECK:         # Symbol 5
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_value', 0x00000000)
-// CHECK-NEXT:     ('st_size', 0x00000000)
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x3)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0006)
-// CHECK-NEXT:    ),
-
-// Symbol 6 is section 8 (.debug_abbrev)
-// CHECK:         # Symbol 6
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_value', 0x00000000)
-// CHECK-NEXT:     ('st_size', 0x00000000)
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x3)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0008)
-// CHECK-NEXT:    ),
+// CHECK:      Relocations [
+// CHECK:        Section ({{[^ ]+}}) .debug_info {
+// CHECK-NEXT:     0x6 R_386_32 .debug_abbrev 0x0
+// CHECK-NEXT:     0xC R_386_32 .debug_line 0x0
+// CHECK:        }
+// CHECK-NEXT:   Section ({{[^ ]+}}) .debug_aranges {
+// CHECK-NEXT:     0x6 R_386_32 .debug_info 0x0
+// CHECK-NEXT:     0x10 R_386_32 .text 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/global-offset.s b/test/MC/ELF/global-offset.s
index 81ae5d7..c688673 100644
--- a/test/MC/ELF/global-offset.s
+++ b/test/MC/ELF/global-offset.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck  %s
 
 // We test that _GLOBAL_OFFSET_TABLE_ will account for the two bytes at the
 // start of the addl/leal.
@@ -10,14 +10,20 @@
 foo:
         addl    _GLOBAL_OFFSET_TABLE_-foo,%ebx
 
-// CHECK:      ('sh_name', 0x00000005) # '.text'
-// CHECK-NEXT: ('sh_type',
-// CHECK-NEXT: ('sh_flags',
-// CHECK-NEXT: ('sh_addr',
-// CHECK-NEXT: ('sh_offset',
-// CHECK-NEXT: ('sh_size',
-// CHECK-NEXT: ('sh_link',
-// CHECK-NEXT: ('sh_info',
-// CHECK-NEXT: ('sh_addralign',
-// CHECK-NEXT: ('sh_entsize',
-// CHECK-NEXT: ('_section_data', '81c30200 00008d9b 02000000 031d0200 0000')
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type:
+// CHECK-NEXT:     Flags [
+// CHECK:          ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size:
+// CHECK-NEXT:     Link:
+// CHECK-NEXT:     Info:
+// CHECK-NEXT:     AddressAlignment:
+// CHECK-NEXT:     EntrySize:
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 81C30200 00008D9B 02000000 031D0200
+// CHECK-NEXT:       0010: 0000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/got.s b/test/MC/ELF/got.s
index a849872..60dea6d 100644
--- a/test/MC/ELF/got.s
+++ b/test/MC/ELF/got.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
 
 // Test that this produces a R_X86_64_GOT32 and that we have an undefined
 // reference to _GLOBAL_OFFSET_TABLE_.
@@ -6,20 +6,15 @@
         movl	foo@GOT, %eax
         movl	foo@GOTPCREL(%rip), %eax
 
-// CHECK:      ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:    (('r_offset',
-// CHECK-NEXT:     ('r_sym',
-// CHECK-NEXT:     ('r_type', 0x00000003)
-// CHECK-NEXT:     ('r_addend',
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   # Relocation 1
-// CHECK-NEXT:    (('r_offset',
-// CHECK-NEXT:     ('r_sym',
-// CHECK-NEXT:     ('r_type', 0x00000009)
-// CHECK-NEXT:     ('r_addend',
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
+// CHECK:      Relocations [
+// CHECK:        Section ({{[^ ]+}}) .text {
+// CHECK-NEXT:       0x{{[^ ]+}} R_X86_64_GOT32 foo 0x{{[^ ]+}}
+// CHECK-NEXT:       0x{{[^ ]+}} R_X86_64_GOTPCREL foo 0x{{[^ ]+}}
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
 
-// CHECK:     (('st_name', 0x00000005) # '_GLOBAL_OFFSET_TABLE_'
-// CHECK-NEXT: ('st_bind', 0x1)
+// CHECK:        Symbol {
+// CHECK:          Name: _GLOBAL_OFFSET_TABLE_
+// CHECK-NEXT:     Value:
+// CHECK-NEXT:     Size:
+// CHECK-NEXT:     Binding: Global
diff --git a/test/MC/ELF/ident.s b/test/MC/ELF/ident.s
index 56af19a..2592205 100644
--- a/test/MC/ELF/ident.s
+++ b/test/MC/ELF/ident.s
@@ -1,16 +1,23 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
-// CHECK:       (('sh_name', 0x00000007) # '.comment'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000030)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:   ('sh_size', 0x000000000000000d)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000001)
-// CHECK-NEXT:   ('_section_data', '00666f6f 00626172 007a6564 00')
+// CHECK:        Section {
+// CHECK:          Name: .comment
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_MERGE
+// CHECK-NEXT:       SHF_STRINGS
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 13
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 1
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 00666F6F 00626172 007A6564 00
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 
         .ident "foo"
         .ident "bar"
diff --git a/test/MC/ELF/lcomm.s b/test/MC/ELF/lcomm.s
index ae8d0ba..430b79b 100644
--- a/test/MC/ELF/lcomm.s
+++ b/test/MC/ELF/lcomm.s
@@ -1,21 +1,23 @@
-// RUN: llvm-mc -triple i386-pc-linux-gnu %s -filetype=obj -o - | elf-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-pc-linux-gnu %s -filetype=obj -o - | llvm-readobj -t | FileCheck %s
 
 .lcomm A, 5
 .lcomm B, 32 << 20
 
-// CHECK: (('st_name', 0x00000001) # 'A'
-// CHECK:  ('st_value', 0x00000000)
-// CHECK:  ('st_size', 0x00000005)
-// CHECK:  ('st_bind', 0x0)
-// CHECK:  ('st_type', 0x1)
-// CHECK:  ('st_other', 0x00)
-// CHECK:  ('st_shndx', 0x0003)
-// CHECK: ),
-// CHECK: (('st_name', 0x00000003) # 'B'
-// CHECK:  ('st_value', 0x00000005)
-// CHECK:  ('st_size', 0x02000000)
-// CHECK:  ('st_bind', 0x0)
-// CHECK:  ('st_type', 0x1)
-// CHECK:  ('st_other', 0x00)
-// CHECK:  ('st_shndx', 0x0003)
-// CHECK: ),
+// CHECK:        Symbol {
+// CHECK:          Name: A (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 5
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss (0x3)
+// CHECK-NEXT:   }
+// CHECK:        Symbol {
+// CHECK:          Name: B (3)
+// CHECK-NEXT:     Value: 0x5
+// CHECK-NEXT:     Size: 33554432
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss (0x3)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/leb128.s b/test/MC/ELF/leb128.s
index f6daac8..84c5b54 100644
--- a/test/MC/ELF/leb128.s
+++ b/test/MC/ELF/leb128.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
         .sleb128 .Lfoo - .Lbar
 .Lfoo:
@@ -6,14 +6,29 @@
         .fill 126, 1, 0x90
 .Lbar:
 
-// CHECK:     (('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT: ('sh_type', 0x00000001)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT: ('sh_size', 0x0000000000000081)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ('_section_data', '817f7f90 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90')
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 129
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 817F7F90 90909090 90909090 90909090
+// CHECK-NEXT:       0010: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0020: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0030: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0040: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0050: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0060: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0070: 90909090 90909090 90909090 90909090
+// CHECK-NEXT:       0080: 90
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/local-reloc.s b/test/MC/ELF/local-reloc.s
index b32a9cc..4241ba5 100644
--- a/test/MC/ELF/local-reloc.s
+++ b/test/MC/ELF/local-reloc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -r -t | FileCheck  %s
 
 // Test that relocations with local symbols are represented as relocations
 // with the section. They should be equivalent, but gas behaves like this.
@@ -6,26 +6,8 @@
 	movl	foo, %r14d
 foo:
 
-// Section number 1 is .text
-// CHECK:        # Section 1
-// CHECK-next:  (('sh_name', 0x00000001) # '.text'
-
-// Relocation refers to symbol number 2
-// CHECK:      ('_relocations', [
-// CHECK-NEXT:  # Relocation 0
-// CHECK-NEXT:   (('r_offset',
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type',
-// CHECK-NEXT:    ('r_addend',
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-
-// Symbol number 2 is section number 1
-// CHECK:    # Symbol 2
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x3)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0001)
-// CHECK-NEXT:     ('st_value', 0x0000000000000000)
-// CHECK-NEXT:     ('st_size', 0x0000000000000000)
+// CHECKT:     Relocations [
+// CHECK:        Section (1) .text {
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32S .text 0x{{[^ ]+}}
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/merge.s b/test/MC/ELF/merge.s
index 11a80ad..d34635a 100644
--- a/test/MC/ELF/merge.s
+++ b/test/MC/ELF/merge.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck  %s
 
 // Test that PIC relocations with local symbols in a mergeable section are done
 // with a reference to the symbol. Not sure if this is a linker limitation,
@@ -22,76 +22,13 @@ zed:
         .section	bar,"ax",@progbits
 foo:
 
-// Relocation 0 refers to symbol 1
-// CHECK:       ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset',
-// CHECK-NEXT:    ('r_sym', 0x00000001)
-// CHECK-NEXT:    ('r_type', 0x00000002
-// CHECK-NEXT:    ('r_addend',
-// CHECK-NEXT:   ),
-
-// Relocation 1 refers to symbol 6
-// CHECK-NEXT:  # Relocation 1
-// CHECK-NEXT: (('r_offset',
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend',
-// CHECK-NEXT: ),
-
-// Relocation 2 refers to symbol 1
-// CHECK-NEXT:   # Relocation 2
-// CHECK-NEXT:   (('r_offset',
-// CHECK-NEXT:    ('r_sym', 0x00000001)
-// CHECK-NEXT:    ('r_type', 0x0000000a
-// CHECK-NEXT:    ('r_addend',
-// CHECK-NEXT:   ),
-
-// Relocation 3 refers to symbol 2
-// CHECK-NEXT:   # Relocation 3
-// CHECK-NEXT:   (('r_offset',
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000004
-// CHECK-NEXT:    ('r_addend',
-// CHECK-NEXT:   ),
-
-// Relocation 4 refers to symbol 2
-// CHECK-NEXT:   # Relocation 4
-// CHECK-NEXT:   (('r_offset',
-// CHECK-NEXT:    ('r_sym', 0x00000002)
-// CHECK-NEXT:    ('r_type', 0x00000009
-// CHECK-NEXT:    ('r_addend',
-// CHECK-NEXT:   ),
-
-// Relocation 5 refers to symbol 8
-// CHECK-NEXT:   # Relocation 5
-// CHECK-NEXT:   (('r_offset', 0x0000000000000023)
-// CHECK-NEXT:    ('r_sym', 0x00000008)
-// CHECK-NEXT:    ('r_type', 0x0000000b)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-
-// Section 5 is "sec1"
-// CHECK: # Section 5
-// CHECK-NEXT:  (('sh_name', 0x00000035) # '.sec1'
-
-// Symbol number 1 is .Lfoo
-// CHECK:      # Symbol 1
-// CHECK-NEXT: (('st_name', 0x00000001) # '.Lfoo'
-
-// Symbol number 2 is foo
-// CHECK:      # Symbol 2
-// CHECK-NEXT: (('st_name', 0x00000007) # 'foo'
-
-// Symbol number 6 is section 5
-// CHECK:        # Symbol 6
-// CHECK-NEXT:    (('st_name', 0x00000000) # ''
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x3)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0005)
-
-// Symbol number 8 is zed
-// CHECK:        # Symbol 8
-// CHECK-NEXT:    (('st_name', 0x0000000b) # 'zed'
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section (1) .text {
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_PC32    .Lfoo 0x{{[^ ]+}}
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32      .sec1 0x{{[^ ]+}}
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32      .Lfoo 0x{{[^ ]+}}
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_PLT32    foo  0x{{[^ ]+}}
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_GOTPCREL foo  0x{{[^ ]+}}
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_32S      zed  0x{{[^ ]+}}
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/n_bytes.s b/test/MC/ELF/n_bytes.s
index de66322..e658de0 100644
--- a/test/MC/ELF/n_bytes.s
+++ b/test/MC/ELF/n_bytes.s
@@ -1,20 +1,30 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck  %s
 
         .2byte 42, 1, 2, 3
         .4byte 42, 1, 2, 3
         .8byte 42, 1, 2, 3
         .int 42, 1, 2, 3
 
-// CHECK:      # Section 1
-// CHECK-NEXT: (('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000048)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ('_section_data', '2a000100 02000300 2a000000 01000000 02000000 03000000 2a000000 00000000 01000000 00000000 02000000 00000000 03000000 00000000 2a000000 01000000 02000000 03000000')
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 1
+// CHECK-NEXT:     Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 72
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 2A000100 02000300 2A000000 01000000
+// CHECK-NEXT:       0010: 02000000 03000000 2A000000 00000000
+// CHECK-NEXT:       0020: 01000000 00000000 02000000 00000000
+// CHECK-NEXT:       0030: 03000000 00000000 2A000000 01000000
+// CHECK-NEXT:       0040: 02000000 03000000
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/noexec.s b/test/MC/ELF/noexec.s
index d8b7b32..33cb8ae 100644
--- a/test/MC/ELF/noexec.s
+++ b/test/MC/ELF/noexec.s
@@ -1,24 +1,26 @@
-// RUN: llvm-mc -mc-no-exec-stack -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck  %s
+// RUN: llvm-mc -mc-no-exec-stack -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck  %s
 
-// CHECK:       # Section 4
-// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.note.GNU-stack'
-// CHECK-NEXT:   ('sh_type', 0x00000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Index: 4
+// CHECK-NEXT:     Name: .note.GNU-stack
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
 
-// CHECK:       # Symbol 4
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x3)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0004)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
+// CHECK:        Symbol {
+// CHECK:          Name: .note.GNU-stack (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .note.GNU-stack (0x4)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/norelocation.s b/test/MC/ELF/norelocation.s
index c639479..1370382 100644
--- a/test/MC/ELF/norelocation.s
+++ b/test/MC/ELF/norelocation.s
@@ -1,18 +1,26 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd -sr | FileCheck  %s
 
         call bar
 bar:
 
-// CHECK: ('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT: ('sh_type', 0x00000001)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT: ('sh_size', 0x0000000000000005)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ('_section_data', 'e8000000 00')
-// CHECK-NOT: .rela.text
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [ (0x6)
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 5
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: E8000000 00
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
 // CHECK: shstrtab
diff --git a/test/MC/ELF/org.s b/test/MC/ELF/org.s
index 3afc364..d878fa1a 100644
--- a/test/MC/ELF/org.s
+++ b/test/MC/ELF/org.s
@@ -1,13 +1,15 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump   | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
         .zero 4
 foo:
         .zero 4
         .org foo+16
 
-// CHECK:     (('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT: ('sh_type',
-// CHECK-NEXT: ('sh_flags',
-// CHECK-NEXT: ('sh_addr',
-// CHECK-NEXT: ('sh_offset'
-// CHECK-NEXT: ('sh_size', 0x0000000000000014)
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type:
+// CHECK-NEXT:     Flags [
+// CHECK:          ]
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size: 20
diff --git a/test/MC/ELF/pic-diff.s b/test/MC/ELF/pic-diff.s
index 2c68f6c..cffa0dd 100644
--- a/test/MC/ELF/pic-diff.s
+++ b/test/MC/ELF/pic-diff.s
@@ -1,23 +1,20 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
 
-// CHECK:       ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('r_offset', 0x000000000000000c)
-// CHECK-NEXT:     ('r_sym', 0x00000005)
-// CHECK-NEXT:     ('r_type', 0x00000002)
-// CHECK-NEXT:     ('r_addend', 0x0000000000000008)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section ({{[^ ]+}}) {{[^ ]+}} {
+// CHECK-NEXT:     0xC R_X86_64_PC32 baz 0x8
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
 
-// CHECK:         # Symbol 5
-// CHECK-NEXT:    (('st_name', 0x00000005) # 'baz'
-// CHECK-NEXT:     ('st_bind', 0x1)
-// CHECK-NEXT:     ('st_type', 0x0)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0x0000)
-// CHECK-NEXT:     ('st_value', 0x0000000000000000)
-// CHECK-NEXT:     ('st_size', 0x0000000000000000)
-// CHECK-NEXT:    ),
+// CHECK:        Symbol {
+// CHECK:          Name: baz (5)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
 
 .zero 4
 .data
diff --git a/test/MC/ELF/plt.s b/test/MC/ELF/plt.s
index 7d78e23..604a4bf 100644
--- a/test/MC/ELF/plt.s
+++ b/test/MC/ELF/plt.s
@@ -1,14 +1,11 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s
 
 // Test that this produces a R_X86_64_PLT32.
 
 	jmp	foo@PLT
 
-// CHECK:      ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:    (('r_offset',
-// CHECK-NEXT:     ('r_sym',
-// CHECK-NEXT:     ('r_type', 0x00000004)
-// CHECK-NEXT:     ('r_addend',
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section ({{[^ ]+}}) {{[^ ]+}} {
+// CHECK-NEXT:     0x{{[^ ]+}} R_X86_64_PLT32 {{[^ ]+}} 0x{{[^ ]+}}
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/pr9292.s b/test/MC/ELF/pr9292.s
index 05f377f..a6e78dc 100644
--- a/test/MC/ELF/pr9292.s
+++ b/test/MC/ELF/pr9292.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that both foo and bar are undefined.
 
@@ -7,20 +7,21 @@
 mov %eax,bar
 
 
-// CHECK:      (('st_name', 0x00000005) # 'bar'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 5
-// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: bar (5)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/relax-arith.s b/test/MC/ELF/relax-arith.s
index 3236b41..b814556 100644
--- a/test/MC/ELF/relax-arith.s
+++ b/test/MC/ELF/relax-arith.s
@@ -1,11 +1,16 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck  %s
 
 // Test that we correctly relax these instructions into versions that use
 // 16 or 32 bit immediate values.
 
 bar:
-// CHECK: 'imul'
-// CHECK: ('_section_data', '6669db00 0066691c 25000000 00000069 db000000 00691c25 00000000 00000000 4869db00 00000048 691c2500 00000000 000000')
+// CHECK:      Name: imul
+// CHECK:      SectionData (
+// CHECK-NEXT:   0000: 6669DB00 0066691C 25000000 00000069
+// CHECK-NEXT:   0010: DB000000 00691C25 00000000 00000000
+// CHECK-NEXT:   0020: 4869DB00 00000048 691C2500 00000000
+// CHECK-NEXT:   0030: 000000
+// CHECK-NEXT: )
         .section imul
         imul $foo, %bx,  %bx
         imul $foo, bar,  %bx
@@ -14,8 +19,14 @@ bar:
         imul $foo, %rbx, %rbx
         imul $foo, bar,  %rbx
 
-// CHECK: and'
-// CHECK:('_section_data', '6681e300 00668124 25000000 00000081 e3000000 00812425 00000000 00000000 4881e300 00000048 81242500 00000000 000000')
+
+// CHECK:      Name: and
+// CHECK:      SectionData (
+// CHECK-NEXT:   0000: 6681E300 00668124 25000000 00000081
+// CHECK-NEXT:   0010: E3000000 00812425 00000000 00000000
+// CHECK-NEXT:   0020: 4881E300 00000048 81242500 00000000
+// CHECK-NEXT:   0030: 000000
+// CHECK-NEXT: )
         .section and
         and  $foo, %bx
         andw $foo, bar
@@ -24,8 +35,13 @@ bar:
         and  $foo, %rbx
         andq $foo, bar
 
-// CHECK: 'or'
-// CHECK: ('_section_data', '6681cb00 0066810c 25000000 00000081 cb000000 00810c25 00000000 00000000 4881cb00 00000048 810c2500 00000000 000000')
+// CHECK:      Name: or
+// CHECK:      SectionData (
+// CHECK-NEXT:   0000: 6681CB00 0066810C 25000000 00000081
+// CHECK-NEXT:   0010: CB000000 00810C25 00000000 00000000
+// CHECK-NEXT:   0020: 4881CB00 00000048 810C2500 00000000
+// CHECK-NEXT:   0030: 000000
+// CHECK-NEXT: )
         .section or
         or  $foo, %bx
         orw $foo, bar
@@ -34,8 +50,13 @@ bar:
         or  $foo, %rbx
         orq $foo, bar
 
-// CHECK: 'xor'
-// CHECK: ('_section_data', '6681f300 00668134 25000000 00000081 f3000000 00813425 00000000 00000000 4881f300 00000048 81342500 00000000 000000')
+// CHECK:      Name: xor
+// CHECK:      SectionData (
+// CHECK-NEXT:   0000: 6681F300 00668134 25000000 00000081
+// CHECK-NEXT:   0010: F3000000 00813425 00000000 00000000
+// CHECK-NEXT:   0020: 4881F300 00000048 81342500 00000000
+// CHECK-NEXT:   0030: 000000
+// CHECK-NEXT: )
         .section xor
         xor  $foo, %bx
         xorw $foo, bar
@@ -44,8 +65,13 @@ bar:
         xor  $foo, %rbx
         xorq $foo, bar
 
-// CHECK: 'add'
-// CHECK: ('_section_data', '6681c300 00668104 25000000 00000081 c3000000 00810425 00000000 00000000 4881c300 00000048 81042500 00000000 000000')
+// CHECK:      Name: add
+// CHECK:      SectionData (
+// CHECK-NEXT:   0000: 6681C300 00668104 25000000 00000081
+// CHECK-NEXT:   0010: C3000000 00810425 00000000 00000000
+// CHECK-NEXT:   0020: 4881C300 00000048 81042500 00000000
+// CHECK-NEXT:   0030: 000000
+// CHECK-NEXT: )
         .section add
         add  $foo, %bx
         addw $foo, bar
@@ -54,8 +80,13 @@ bar:
         add  $foo, %rbx
         addq $foo, bar
 
-// CHECK: 'sub'
-// CHECK: ('_section_data', '6681eb00 0066812c 25000000 00000081 eb000000 00812c25 00000000 00000000 4881eb00 00000048 812c2500 00000000 000000')
+// CHECK:      Name: sub
+// CHECK:      SectionData (
+// CHECK-NEXT:   000: 6681EB00 0066812C 25000000 00000081
+// CHECK-NEXT:   010: EB000000 00812C25 00000000 00000000
+// CHECK-NEXT:   020: 4881EB00 00000048 812C2500 00000000
+// CHECK-NEXT:   030: 000000
+// CHECK-NEXT: )
         .section sub
         sub  $foo, %bx
         subw $foo, bar
@@ -64,8 +95,13 @@ bar:
         sub  $foo, %rbx
         subq $foo, bar
 
-// CHECK: 'cmp'
-// CHECK: ('_section_data', '6681fb00 0066813c 25000000 00000081 fb000000 00813c25 00000000 00000000 4881fb00 00000048 813c2500 00000000 000000')
+// CHECK:      Name: cmp
+// CHECK:      SectionData (
+// CHECK-NEXT:   0000: 6681FB00 0066813C 25000000 00000081
+// CHECK-NEXT:   0010: FB000000 00813C25 00000000 00000000
+// CHECK-NEXT:   0020: 4881FB00 00000048 813C2500 00000000
+// CHECK-NEXT:   0030: 000000
+// CHECK-NEXT: )
         .section cmp
         cmp  $foo, %bx
         cmpw $foo, bar
diff --git a/test/MC/ELF/relax.s b/test/MC/ELF/relax.s
index 0b5d24f..49ee8e2 100644
--- a/test/MC/ELF/relax.s
+++ b/test/MC/ELF/relax.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd -t | FileCheck  %s
 
 // Test that we do not relax these.
 
@@ -11,17 +11,23 @@ foo:
         jmp foo
         jmp zed
 
-// CHECK: ('sh_name', 0x00000001) # '.text'
-// CHECK-NEXT: ('sh_type', 0x00000001)
-// CHECK-NEXT: ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT: ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT: ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT: ('sh_size', 0x0000000000000006)
-// CHECK-NEXT: ('sh_link', 0x00000000)
-// CHECK-NEXT: ('sh_info', 0x00000000)
-// CHECK-NEXT: ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ('_section_data', 'ebfeebfc ebfa')
-
-// CHECK:       # Symbol 6
-// CHECK-NEXT: (('st_name', 0x00000005) # 'foo'
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 6
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: EBFEEBFC EBFA
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK:        Symbol {
+// CHECK:          Name: foo (5)
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
index 85da2eb..24d0172 100644
--- a/test/MC/ELF/relocation-386.s
+++ b/test/MC/ELF/relocation-386.s
@@ -1,205 +1,86 @@
-// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck  %s
 
 // Test that we produce the correct relocation types and that the relocations
 // correctly point to the section or the symbol.
 
-// CHECK:      # Relocation 0
-// CHECK-NEXT: (('r_offset', 0x00000002)
-// CHECK-NEXT:  ('r_sym', 0x000001)
-// CHECK-NEXT:  ('r_type', 0x09)
-// CHECK-NEXT: ),
-// CHECK-NEXT:  # Relocation 1
-// CHECK-NEXT: (('r_offset',
-// CHECK-NEXT:  ('r_sym',
-// CHECK-NEXT:  ('r_type', 0x04)
-// CHECK-NEXT: ),
-// CHECK-NEXT:  # Relocation 2
-// CHECK-NEXT: (('r_offset',
-// CHECK-NEXT:  ('r_sym',
-// CHECK-NEXT:  ('r_type', 0x0a)
-// CHECK-NEXT: ),
-
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section (1) .text {
+// CHECK-NEXT:     0x2          R_386_GOTOFF     .Lfoo 0x0
+// CHECK-NEXT:     0x{{[^ ]+}}  R_386_PLT32      bar2 0x0
+// CHECK-NEXT:     0x{{[^ ]+}}  R_386_GOTPC      _GLOBAL_OFFSET_TABLE_ 0x0
 // Relocation 3 (bar3@GOTOFF) is done with symbol 7 (bss)
-// CHECK-NEXT:  # Relocation 3
-// CHECK-NEXT: (('r_offset',
-// CHECK-NEXT:  ('r_sym', 0x000007
-// CHECK-NEXT:  ('r_type',
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x{{[^ ]+}}  R_386_GOTOFF     .bss 0x0
 // Relocation 4 (bar2@GOT) is of type R_386_GOT32
-// CHECK-NEXT:  # Relocation 4
-// CHECK-NEXT: (('r_offset',
-// CHECK-NEXT:  ('r_sym',
-// CHECK-NEXT:  ('r_type', 0x03
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x{{[^ ]+}}  R_386_GOT32      bar2j 0x0
 
 // Relocation 5 (foo@TLSGD) is of type R_386_TLS_GD
-// CHECK-NEXT: # Relocation 5
-// CHECK-NEXT: (('r_offset', 0x00000020)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x12)
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x20         R_386_TLS_GD     foo 0x0
 // Relocation 6 ($foo@TPOFF) is of type R_386_TLS_LE_32
-// CHECK-NEXT: # Relocation 6
-// CHECK-NEXT: (('r_offset', 0x00000025)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x22)
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x25         R_386_TLS_LE_32  foo 0x0
 // Relocation 7 (foo@INDNTPOFF) is of type R_386_TLS_IE
-// CHECK-NEXT: # Relocation 7
-// CHECK-NEXT: (('r_offset', 0x0000002b)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x0f)
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x2B         R_386_TLS_IE     foo 0x0
 // Relocation 8 (foo@NTPOFF) is of type R_386_TLS_LE
-// CHECK-NEXT: # Relocation 8
-// CHECK-NEXT: (('r_offset', 0x00000031)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x11)
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x31         R_386_TLS_LE     foo 0x0
 // Relocation 9 (foo@GOTNTPOFF) is of type R_386_TLS_GOTIE
-// CHECK-NEXT: # Relocation 9
-// CHECK-NEXT: (('r_offset', 0x00000037)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x10)
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x37         R_386_TLS_GOTIE  foo 0x0
 // Relocation 10 (foo@TLSLDM) is of type R_386_TLS_LDM
-// CHECK-NEXT: # Relocation 10
-// CHECK-NEXT: (('r_offset', 0x0000003d)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x13)
-// CHECK-NEXT: ),
-
+// CHECK-NEXT:     0x3D         R_386_TLS_LDM    foo 0x0
 // Relocation 11 (foo@DTPOFF) is of type R_386_TLS_LDO_32
-// CHECK-NEXT: # Relocation 11
-// CHECK-NEXT: (('r_offset', 0x00000043)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x20)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x43         R_386_TLS_LDO_32 foo 0x0
 // Relocation 12 (calll 4096) is of type R_386_PC32
-// CHECK-NEXT: # Relocation 12
-// CHECK-NEXT: (('r_offset', 0x00000048)
-// CHECK-NEXT:  ('r_sym', 0x000000)
-// CHECK-NEXT:  ('r_type', 0x02)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x48         R_386_PC32       - 0x0
 // Relocation 13 (zed@GOT) is of type R_386_GOT32 and uses the symbol
-// CHECK-NEXT: # Relocation 13
-// CHECK-NEXT: (('r_offset', 0x0000004e)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x03)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x4E         R_386_GOT32      zed 0x0
 // Relocation 14 (zed@GOTOFF) is of type R_386_GOTOFF and uses the symbol
-// CHECK-NEXT: # Relocation 14
-// CHECK-NEXT: (('r_offset', 0x00000054)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x09)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x54         R_386_GOTOFF     zed 0x0
 // Relocation 15 (zed@INDNTPOFF) is of type R_386_TLS_IE and uses the symbol
-// CHECK-NEXT: # Relocation 15
-// CHECK-NEXT: (('r_offset', 0x0000005a)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x0f)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x5A         R_386_TLS_IE     zed 0x0
 // Relocation 16 (zed@NTPOFF) is of type R_386_TLS_LE and uses the symbol
-// CHECK-NEXT: # Relocation 16
-// CHECK-NEXT: (('r_offset', 0x00000060)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x11)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x60         R_386_TLS_LE     zed 0x0
 // Relocation 17 (zed@GOTNTPOFF) is of type R_386_TLS_GOTIE and uses the symbol
-// CHECK-NEXT: # Relocation 17
-// CHECK-NEXT: (('r_offset', 0x00000066)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x10)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x66         R_386_TLS_GOTIE  zed 0x0
 // Relocation 18 (zed@PLT) is of type R_386_PLT32 and uses the symbol
-// CHECK-NEXT: # Relocation 18
-// CHECK-NEXT: (('r_offset', 0x0000006b)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x04)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x6B         R_386_PLT32      zed 0x0
 // Relocation 19 (zed@TLSGD) is of type R_386_TLS_GD and uses the symbol
-// CHECK-NEXT: # Relocation 19
-// CHECK-NEXT: (('r_offset', 0x00000071)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x12)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x71         R_386_TLS_GD     zed 0x0
 // Relocation 20 (zed@TLSLDM) is of type R_386_TLS_LDM and uses the symbol
-// CHECK-NEXT: # Relocation 20
-// CHECK-NEXT: (('r_offset', 0x00000077)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x13)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x77         R_386_TLS_LDM    zed 0x0
 // Relocation 21 (zed@TPOFF) is of type R_386_TLS_LE_32 and uses the symbol
-// CHECK-NEXT:# Relocation 21
-// CHECK-NEXT: (('r_offset', 0x0000007d)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x22)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x7D         R_386_TLS_LE_32  zed 0x0
 // Relocation 22 (zed@DTPOFF) is of type R_386_TLS_LDO_32 and uses the symbol
-// CHECK-NEXT: Relocation 22
-// CHECK-NEXT: (('r_offset', 0x00000083)
-// CHECK-NEXT:  ('r_sym', 0x000004)
-// CHECK-NEXT:  ('r_type', 0x20)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x83         R_386_TLS_LDO_32 zed 0x0
 // Relocation 23 ($bar) is of type R_386_32 and uses the section
-// CHECK-NEXT: Relocation 23
-// CHECK-NEXT: (('r_offset',
-// CHECK-NEXT:  ('r_sym',
-// CHECK-NEXT:  ('r_type', 0x01)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x{{[^ ]+}}  R_386_32         .text 0x0
 // Relocation 24 (foo@GOTTPOFF(%edx)) is of type R_386_TLS_IE_32 and uses the
 // symbol
-// CHECK-NEXT: Relocation 24
-// CHECK-NEXT: (('r_offset', 0x0000008e)
-// CHECK-NEXT:  ('r_sym', 0x00000d)
-// CHECK-NEXT:  ('r_type', 0x21)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x8E         R_386_TLS_IE_32  foo 0x0
 // Relocation 25 (_GLOBAL_OFFSET_TABLE_-bar2) is of type R_386_GOTPC.
-// CHECK-NEXT: Relocation 25
-// CHECK-NEXT: (('r_offset', 0x00000094)
-// CHECK-NEXT:  ('r_sym', 0x00000b)
-// CHECK-NEXT:  ('r_type', 0x0a)
-// CHECK-NEXT: ),
+// CHECK-NEXT:     0x94         R_386_GOTPC      _GLOBAL_OFFSET_TABLE_ 0x0
 // Relocation 26 (und_symbol-bar2) is of type R_386_PC32
-// CHECK-NEXT: Relocation 26
-// CHECK-NEXT: (('r_offset', 0x0000009a)
-// CHECK-NEXT:  ('r_sym', 0x00000e)
-// CHECK-NEXT:  ('r_type', 0x02)
-// CHECK-NEXT: ),
-
-// Section 4 is bss
-// CHECK:      # Section 4
-// CHECK-NEXT: (('sh_name', 0x0000000b) # '.bss'
-
-// CHECK:      # Symbol 1
-// CHECK-NEXT: (('st_name', 0x00000005) # '.Lfoo'
+// CHECK-NEXT:     0x9A         R_386_PC32       und_symbol 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
 
 // Symbol 4 is zed
-// CHECK:      # Symbol 4
-// CHECK-NEXT: (('st_name', 0x00000035) # 'zed'
-// CHECK-NEXT:  ('st_value', 0x00000000)
-// CHECK-NEXT:  ('st_size', 0x00000000)
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x6)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0005)
-
+// CHECK:        Symbol {
+// CHECK:          Name: zed (53)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: zedsec (0x5)
+// CHECK-NEXT:   }
 // Symbol 7 is section 4
-// CHECK:      # Symbol 7
-// CHECK-NEXT: (('st_name', 0x00000000) # ''
-// CHECK-NEXT:  ('st_value', 0x00000000)
-// CHECK-NEXT:  ('st_size', 0x00000000)
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x3)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0004)
-
+// CHECK:        Symbol {
+// CHECK:          Name: .bss (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss (0x4)
+// CHECK-NEXT:   }
 
         .text
 bar:
diff --git a/test/MC/ELF/relocation-pc.s b/test/MC/ELF/relocation-pc.s
index b6279c3..551f5ff 100644
--- a/test/MC/ELF/relocation-pc.s
+++ b/test/MC/ELF/relocation-pc.s
@@ -1,33 +1,32 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr | FileCheck  %s
 
 // Test that we produce the correct relocation.
 
 	loope	0                 # R_X86_64_PC8
 	jmp	-256              # R_X86_64_PC32
 
-// CHECK:      # Section 2
-// CHECK-NEXT: (('sh_name', 0x00000001) # '.rela.text'
-// CHECK-NEXT:  ('sh_type', 0x00000004)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x00000000000002e8)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000030)
-// CHECK-NEXT:  ('sh_link', 0x00000006)
-// CHECK-NEXT:  ('sh_info', 0x00000001)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000018)
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('r_offset', 0x0000000000000001)
-// CHECK-NEXT:    ('r_sym', 0x00000000)
-// CHECK-NEXT:    ('r_type', 0x0000000f)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:   # Relocation 1
-// CHECK-NEXT:   (('r_offset', 0x0000000000000003)
-// CHECK-NEXT:    ('r_sym', 0x00000000)
-// CHECK-NEXT:    ('r_type', 0x00000002)
-// CHECK-NEXT:    ('r_addend', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Index: 1
+// CHECK-NEXT:     Name: .text
+// CHECK:          Relocations [
+// CHECK-NEXT:       0x1 R_X86_64_PC8 - 0x0
+// CHECK-NEXT:       0x3 R_X86_64_PC32 - 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
+
+// CHECK:        Section {
+// CHECK:          Index: 2
+// CHECK-NEXT:     Name: .rela.text
+// CHECK-NEXT:     Type: SHT_RELA
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x2E8
+// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Link: 6
+// CHECK-NEXT:     Info: 1
+// CHECK-NEXT:     AddressAlignment: 8
+// CHECK-NEXT:     EntrySize: 24
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s
index 5db213b..19bcc18 100644
--- a/test/MC/ELF/relocation.s
+++ b/test/MC/ELF/relocation.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -t | FileCheck  %s
 
 // Test that we produce the correct relocation.
 
@@ -20,102 +20,33 @@ bar:
         addq	$bar,%rax         # R_X86_64_32S
 
 
-// CHECK:  # Section 1
-// CHECK: (('sh_name', 0x00000006) # '.text'
-
-// CHECK: # Relocation 0
-// CHECK-NEXT:  (('r_offset', 0x0000000000000001)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000a)
-// CHECK-NEXT:   ('r_addend',
-
-// CHECK: # Relocation 1
-// CHECK-NEXT:  (('r_offset', 0x0000000000000008)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000b)
-// CHECK-NEXT:   ('r_addend',
-
-// CHECK: # Relocation 2
-// CHECK-NEXT:  (('r_offset', 0x0000000000000013)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000b)
-// CHECK-NEXT:   ('r_addend',
-
-// CHECK: # Relocation 3
-// CHECK-NEXT:  (('r_offset', 0x000000000000001a)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000b)
-// CHECK-NEXT:   ('r_addend',
-
-// CHECK: # Relocation 4
-// CHECK-NEXT:  (('r_offset', 0x0000000000000022)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000b)
-// CHECK-NEXT:   ('r_addend',
-
-// CHECK: # Relocation 5
-// CHECK-NEXT:  (('r_offset', 0x0000000000000026)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000a)
-// CHECK-NEXT:   ('r_addend',
-
-// CHECK: # Relocation 6
-// CHECK-NEXT:  (('r_offset', 0x000000000000002d)
-// CHECK-NEXT:   ('r_sym', 0x00000006)
-// CHECK-NEXT:   ('r_type', 0x00000016)
-// CHECK-NEXT:   ('r_addend', 0xfffffffffffffffc)
-
-// CHECK:  # Relocation 7
-// CHECK-NEXT:  (('r_offset', 0x0000000000000034)
-// CHECK-NEXT:   ('r_sym', 0x00000006)
-// CHECK-NEXT:   ('r_type', 0x00000013)
-// CHECK-NEXT:   ('r_addend', 0xfffffffffffffffc)
-
-// CHECK:  # Relocation 8
-// CHECK-NEXT:  (('r_offset', 0x000000000000003b)
-// CHECK-NEXT:   ('r_sym', 0x00000006)
-// CHECK-NEXT:   ('r_type', 0x00000017)
-// CHECK-NEXT:   ('r_addend', 0x0000000000000000)
-
-// CHECK:  # Relocation 9
-// CHECK-NEXT:  (('r_offset', 0x0000000000000042)
-// CHECK-NEXT:   ('r_sym', 0x00000006)
-// CHECK-NEXT:   ('r_type', 0x00000014)
-// CHECK-NEXT:   ('r_addend', 0xfffffffffffffffc)
-
-// CHECK:  # Relocation 10
-// CHECK-NEXT:  (('r_offset', 0x0000000000000049)
-// CHECK-NEXT:   ('r_sym', 0x00000006)
-// CHECK-NEXT:   ('r_type', 0x00000015)
-// CHECK-NEXT:   ('r_addend', 0x0000000000000000)
-
-// CHECK: # Relocation 11
-// CHECK-NEXT:  (('r_offset', 0x000000000000004e)
-// CHECK-NEXT:   ('r_sym', 0x00000002)
-// CHECK-NEXT:   ('r_type', 0x0000000b)
-// CHECK-NEXT:   ('r_addend', 0x0000000000000000)
-
-// CHECK: # Relocation 12
-// CHECK-NEXT: (('r_offset', 0x0000000000000055)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x00000002)
-// CHECK-NEXT:  ('r_addend', 0xfffffffffffffffc)
-
-// CHECK: # Relocation 13
-// CHECK-NEXT: (('r_offset', 0x000000000000005c)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x00000002)
-// CHECK-NEXT:  ('r_addend', 0x000000000000005c)
-
-// CHECK: # Relocation 14
-// CHECK-NEXT: (('r_offset', 0x0000000000000063)
-// CHECK-NEXT:  ('r_sym', 0x00000002)
-// CHECK-NEXT:  ('r_type', 0x0000000b)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-
-// CHECK:   # Symbol 2
-// CHECK: (('st_name', 0x00000000) # ''
-// CHECK:  ('st_bind', 0x0)
-// CHECK:  ('st_type', 0x3)
-// CHECK:  ('st_other', 0x00)
-// CHECK:  ('st_shndx', 0x0001)
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK:          Relocations [
+// CHECK-NEXT:       0x1 R_X86_64_32        .text
+// CHECK-NEXT:       0x8 R_X86_64_32S       .text
+// CHECK-NEXT:       0x13 R_X86_64_32S      .text
+// CHECK-NEXT:       0x1A R_X86_64_32S      .text
+// CHECK-NEXT:       0x22 R_X86_64_32S      .text
+// CHECK-NEXT:       0x26 R_X86_64_32       .text
+// CHECK-NEXT:       0x2D R_X86_64_GOTTPOFF foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:       0x34 R_X86_64_TLSGD    foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:       0x3B R_X86_64_TPOFF32  foo 0x0
+// CHECK-NEXT:       0x42 R_X86_64_TLSLD    foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:       0x49 R_X86_64_DTPOFF32 foo 0x0
+// CHECK-NEXT:       0x4E R_X86_64_32S      .text 0x0
+// CHECK-NEXT:       0x55 R_X86_64_PC32     foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:       0x5C R_X86_64_PC32     foo 0x5C
+// CHECK-NEXT:       0x63 R_X86_64_32S      .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
+
+// CHECK:        Symbol {
+// CHECK:          Name: .text (0)
+// CHECK-NEXT:     Value:
+// CHECK-NEXT:     Size:
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/rename.s b/test/MC/ELF/rename.s
index 241aa05..c50910b 100644
--- a/test/MC/ELF/rename.s
+++ b/test/MC/ELF/rename.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sr -t | FileCheck %s
 
 // When doing a rename, all the checks for where the relocation should go
 // should be performed with the original symbol. Only if we decide to relocate
@@ -16,31 +16,33 @@ defined3:
         .global defined1
 
 // Section 1 is .text
-// CHECK:      # Section 1
-// CHECK-NEXT: (('sh_name', 0x00000006) # '.text'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000040)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000004)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000004)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-
-// The relocation uses symbol 2
-// CHECK:      # Relocation 0
-// CHECK-NEXT: (('r_offset', 0x0000000000000000)
-// CHECK-NEXT:  ('r_sym', 0x00000002)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
+// CHECK:        Section {
+// CHECK:          Index: 1
+// CHECK-NEXT:     Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 4
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     Relocations [
+// CHECK-NEXT:       0x0 R_X86_64_32 .text 0x0
+// CHECK-NEXT:     ]
+// CHECK-NEXT:   }
 
 // Symbol 2 is section 1
-// CHECK:      # Symbol 2
-// CHECK-NEXT: (('st_name', 0x00000000) # ''
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x3)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK:        Symbol {
+// CHECK:          Name: .text (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/section.s b/test/MC/ELF/section.s
index c71e1a7..a679403 100644
--- a/test/MC/ELF/section.s
+++ b/test/MC/ELF/section.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that these names are accepted.
 
@@ -7,10 +7,10 @@
 .section	.note.GNU-,"",@progbits
 .section	-.note.GNU,"",@progbits
 
-// CHECK: ('sh_name', 0x00000038) # '.note.GNU-stack'
-// CHECK: ('sh_name', 0x0000008f) # '.note.GNU-stack2'
-// CHECK: ('sh_name', 0x000000a0) # '.note.GNU-'
-// CHECK: ('sh_name', 0x00000084) # '-.note.GNU'
+// CHECK: Name: .note.GNU-stack (56)
+// CHECK: Name: .note.GNU-stack2 (143)
+// CHECK: Name: .note.GNU- (160)
+// CHECK: Name: -.note.GNU (132)
 
 // Test that the defaults are used
 
@@ -19,66 +19,81 @@
 .section	.rodata
 .section	zed, ""
 
-// CHECK:      (('sh_name', 0x00000012) # '.init'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Section 11
-// CHECK-NEXT: (('sh_name', 0x00000048) # '.fini'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000006)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Section 12
-// CHECK-NEXT: (('sh_name', 0x00000076) # '.rodata'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Section 13
-// CHECK-NEXT: (('sh_name', 0x00000058) # 'zed'
-// CHECK-NEXT:  ('sh_type', 0x00000001)
-// CHECK-NEXT:  ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:  ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:  ('sh_link', 0x00000000)
-// CHECK-NEXT:  ('sh_info', 0x00000000)
-// CHECK-NEXT:  ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Section {
+// CHECK:          Name: .init
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 11
+// CHECK-NEXT:     Name: .fini
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 12
+// CHECK-NEXT:     Name: .rodata
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 13
+// CHECK-NEXT:     Name: zed
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
 
 .section	.note.test,"",@note
-// CHECK:       (('sh_name', 0x00000007) # '.note.test'
-// CHECK-NEXT:   ('sh_type', 0x00000007)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Name: .note.test
+// CHECK-NEXT:     Type: SHT_NOTE
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
 
 // Test that we can parse these
 foo:
@@ -90,21 +105,26 @@ bar:
 
 .section .eh_frame,"a",@unwind
 
-// CHECK:       (('sh_name', 0x0000004e) # '.eh_frame'
-// CHECK-NEXT:   ('sh_type', 0x70000001)
-// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
-// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_offset', 0x0000000000000050)
-// CHECK-NEXT:   ('sh_size', 0x0000000000000000)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x0000000000000001)
-// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Name: .eh_frame
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x50
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 1
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:   }
 
 // Test that we handle the strings like gas
 .section bar-"foo"
 .section "foo"
 
-// CHECK: ('sh_name', 0x000000ab) # 'bar-"foo"'
-// CHECK: ('sh_name', 0x00000034) # 'foo'
+// CHECK:        Section {
+// CHECK:          Name: bar-"foo" (171)
+// CHECK:        Section {
+// CHECK:          Name: foo (52)
diff --git a/test/MC/ELF/set.s b/test/MC/ELF/set.s
index 2258b19..f6965a5 100644
--- a/test/MC/ELF/set.s
+++ b/test/MC/ELF/set.s
@@ -1,17 +1,18 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck  %s
 
 // Test that we emit the correct value.
 
 .set kernbase,0xffffffff80000000
 
-// CHECK:         (('st_name', 0x00000001) # 'kernbase'
-// CHECK-NEXT:     ('st_bind', 0x0)
-// CHECK-NEXT:     ('st_type', 0x0)
-// CHECK-NEXT:     ('st_other', 0x00)
-// CHECK-NEXT:     ('st_shndx', 0xfff1)
-// CHECK-NEXT:     ('st_value', 0xffffffff80000000)
-// CHECK-NEXT:     ('st_size', 0x0000000000000000)
-// CHECK-NEXT:    ),
+// CHECK:        Symbol {
+// CHECK:          Name: kernbase (1)
+// CHECK-NEXT:     Value: 0xFFFFFFFF80000000
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0xFFF1)
+// CHECK-NEXT:   }
 
 // Test that we accept .set of a symbol after it has been used in a statement.
 
@@ -24,11 +25,12 @@
 	.set	foo2,bar2
 
 // Test that there is an undefined reference to bar
-// CHECK:      (('st_name', 0x0000000a) # 'bar'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: bar (10)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/sleb.s b/test/MC/ELF/sleb.s
index 00e5b4b..5cba582 100644
--- a/test/MC/ELF/sleb.s
+++ b/test/MC/ELF/sleb.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_32 %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_64 %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_64 %s
 // RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_32 %s
 // RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_64 %s
 
@@ -19,10 +19,14 @@ foo:
 
 	.sleb128        8193
 
-// ELF_32: ('sh_name', 0x00000001) # '.text'
-// ELF_32: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
-// ELF_64: ('sh_name', 0x00000001) # '.text'
-// ELF_64: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
+// ELF_32:   Name: .text
+// ELF_32:   SectionData (
+// ELF_32:     0000: 00017F3F 40C000BF 7FFF3F80 4081C000
+// ELF_32:   )
+// ELF_64:   Name: .text
+// ELF_64:   SectionData (
+// ELF_64:     0000: 00017F3F 40C000BF 7FFF3F80 4081C000
+// ELF_64:   )
 // MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // MACHO_32: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
 // MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
diff --git a/test/MC/ELF/subsection.s b/test/MC/ELF/subsection.s
new file mode 100644
index 0000000..d437cac
--- /dev/null
+++ b/test/MC/ELF/subsection.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -filetype=obj %s -o - -triple x86_64-pc-linux | llvm-objdump -s - | FileCheck %s
+
+// CHECK: Contents of section .text:
+// CHECK-NEXT: 0000 03042502 00000003 04250100 0000ebf7
+.text 1
+add 1, %eax
+jmp label
+.subsection
+add 2, %eax
+label:
+
+// CHECK-NOT: Contents of section .rela.text:
+
+// CHECK: Contents of section .data:
+// CHECK-NEXT: 0000 01030402 74657374
+.data
+l0:
+.byte 1
+.subsection 1+1
+l1:
+.byte 2
+l2:
+.subsection l2-l1
+.byte l1-l0
+.subsection 3
+.ascii "test"
+.previous
+.byte 4
+
+// CHECK: Contents of section test:
+// CHECK-NEXT: 0000 010302
+.section test
+.byte 1
+.pushsection test, 1
+.byte 2
+.popsection
+.byte 3
diff --git a/test/MC/ELF/symref.s b/test/MC/ELF/symref.s
index 2dfa058..9a71a81 100644
--- a/test/MC/ELF/symref.s
+++ b/test/MC/ELF/symref.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
 
 defined1:
 defined2:
@@ -21,145 +21,122 @@ defined3:
         .symver global1, g1@@zed
 global1:
 
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section (1) .text {
+// CHECK-NEXT:     0x0 R_X86_64_32 .text 0x0
+// CHECK-NEXT:     0x4 R_X86_64_32 bar2@zed 0x0
+// CHECK-NEXT:     0x8 R_X86_64_32 .text 0x0
+// CHECK-NEXT:     0xC R_X86_64_32 .text 0x0
+// CHECK-NEXT:     0x10 R_X86_64_32 bar6@zed 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
 
-// CHECK:      # Relocation 0
-// CHECK-NEXT: (('r_offset', 0x0000000000000000)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 1
-// CHECK-NEXT: (('r_offset', 0x0000000000000004)
-// CHECK-NEXT:  ('r_sym', 0x0000000b)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 2
-// CHECK-NEXT: (('r_offset', 0x0000000000000008)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 3
-// CHECK-NEXT: (('r_offset', 0x000000000000000c)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 4
-// CHECK-NEXT: (('r_offset', 0x0000000000000010)
-// CHECK-NEXT:  ('r_sym', 0x0000000c)
-// CHECK-NEXT:  ('r_type', 0x0000000a)
-// CHECK-NEXT:  ('r_addend', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT:])
-
-// CHECK:      # Symbol 1
-// CHECK-NEXT: (('st_name', 0x00000013) # 'bar1@zed'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 2
-// CHECK-NEXT: (('st_name', 0x00000025) # 'bar3@@zed'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 3
-// CHECK-NEXT: (('st_name', 0x0000002f) # 'bar5@@zed'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 4
-// CHECK-NEXT: (('st_name', 0x00000001) # 'defined1'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 5
-// CHECK-NEXT: (('st_name', 0x0000000a) # 'defined2'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 6
-// CHECK-NEXT: (('st_name', 0x00000000) # ''
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x3)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 7
-// CHECK-NEXT: (('st_name', 0x00000000) # ''
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x3)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0003)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 8
-// CHECK-NEXT: (('st_name', 0x00000000) # ''
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x3)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0004)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 9
-// CHECK-NEXT: (('st_name', 0x0000004a) # 'g1@@zed'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000014)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 10
-// CHECK-NEXT: (('st_name', 0x00000042) # 'global1'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000014)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 11
-// CHECK-NEXT: (('st_name', 0x0000001c) # 'bar2@zed'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 12
-// CHECK-NEXT: (('st_name', 0x00000039) # 'bar6@zed'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT:])
+// CHECK:        Symbol {
+// CHECK:          Name: bar1@zed (19)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar3@@zed (37)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar5@@zed (47)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: defined1 (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: defined2 (10)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .text (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .data (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .data (0x3)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .bss (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss (0x4)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: g1@@zed (74)
+// CHECK-NEXT:     Value: 0x14
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: global1 (66)
+// CHECK-NEXT:     Value: 0x14
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar2@zed (28)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar6@zed (57)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/tls-i386.s b/test/MC/ELF/tls-i386.s
index 922d4c6..267046e 100644
--- a/test/MC/ELF/tls-i386.s
+++ b/test/MC/ELF/tls-i386.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that all symbols are of type STT_TLS.
 
@@ -17,129 +17,129 @@
         .long   fooD@DTPOFF
         .long   fooE@INDNTPOFF
 
-// CHECK:       (('st_name', 0x00000001) # 'foo1'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 6
-// CHECK-NEXT:  (('st_name', 0x00000006) # 'foo2'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 7
-// CHECK-NEXT:  (('st_name', 0x0000000b) # 'foo3'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 8
-// CHECK-NEXT:  (('st_name', 0x00000010) # 'foo4'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 9
-// CHECK-NEXT:  (('st_name', 0x00000015) # 'foo5'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 10
-// CHECK-NEXT:  (('st_name', 0x0000001a) # 'foo6'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 11
-// CHECK-NEXT:  (('st_name', 0x0000001f) # 'foo7'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 12
-// CHECK-NEXT:  (('st_name', 0x00000024) # 'foo8'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 13
-// CHECK-NEXT:  (('st_name', 0x00000029) # 'foo9'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 14
-// CHECK-NEXT:  (('st_name', 0x0000002e) # 'fooA'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 15
-// CHECK-NEXT:  (('st_name', 0x00000033) # 'fooB'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 16
-// CHECK-NEXT:  (('st_name', 0x00000038) # 'fooC'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 17
-// CHECK-NEXT:  (('st_name', 0x0000003d) # 'fooD'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 18
-// CHECK-NEXT:  (('st_name', 0x00000042) # 'fooE'
-// CHECK-NEXT:   ('st_value', 0x00000000)
-// CHECK-NEXT:   ('st_size', 0x00000000)
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:  ),
-
+// CHECK:        Symbol {
+// CHECK:          Name: foo1 (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo2 (6)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo3 (11)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo4 (16)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo5 (21)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo6 (26)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo7 (31)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo8 (36)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo9 (41)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: fooA (46)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: fooB (51)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: fooC (56)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: fooD (61)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: fooE (66)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/tls.s b/test/MC/ELF/tls.s
index fe2bb4e..c71e396 100644
--- a/test/MC/ELF/tls.s
+++ b/test/MC/ELF/tls.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that all symbols are of type STT_TLS.
 
@@ -12,66 +12,67 @@
 foobar:
 	.long	43
 
-// CHECK:      (('st_name', 0x0000001f) # 'foobar'
-// CHECK-NEXT:  ('st_bind', 0x0)
-// CHECK-NEXT:  ('st_type', 0x6)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0005)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:        Symbol {
+// CHECK:          Name: foobar (31)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .zed (0x5)
+// CHECK-NEXT:   }
 
-// CHECK:       # Symbol 7
-// CHECK-NEXT:  (('st_name', 0x00000001) # 'foo1'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 8
-// CHECK-NEXT:  (('st_name', 0x00000006) # 'foo2'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 9
-// CHECK-NEXT:  (('st_name', 0x0000000b) # 'foo3'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 10
-// CHECK-NEXT:  (('st_name', 0x00000010) # 'foo4'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 11
-// CHECK-NEXT:  (('st_name', 0x00000015) # 'foo5'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 12
-// CHECK-NEXT:  (('st_name', 0x0000001a) # 'foo6'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x6)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
+// CHECK:        Symbol {
+// CHECK:          Name: foo1 (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo2 (6)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo3 (11)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo4 (16)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo5 (21)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo6 (26)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/type.s b/test/MC/ELF/type.s
index ec53e4f..a5b9812 100644
--- a/test/MC/ELF/type.s
+++ b/test/MC/ELF/type.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that both % and @ are accepted.
         .global foo
@@ -12,35 +12,76 @@ bar:
 // Test that gnu_unique_object is accepted.
         .type zed,@gnu_unique_object
 
+obj:
+        .global obj
+        .type obj,@object
+        .type obj,@notype
+
+func:
+        .global func
+        .type func,@function
+        .type func,@object
+
 ifunc:
         .global ifunc
         .type ifunc,@gnu_indirect_function
 
-// CHECK:      # Symbol 4
-// CHECK-NEXT: (('st_name', 0x00000005) # 'bar'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x1)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 5
-// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x2)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 6
-// CHECK-NEXT: (('st_name', 0x00000009) # 'ifunc'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0xa)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0001)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+tls:
+        .global tls
+        .type tls,@tls_object
+        .type tls,@gnu_indirect_function
 
+// CHECK:        Symbol {
+// CHECK:          Name: bar
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Function
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: func
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Function
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: ifunc
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: GNU_IFunc
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: obj
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: tls
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: TLS
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/uleb.s b/test/MC/ELF/uleb.s
index 1e4734b..d755cc2 100644
--- a/test/MC/ELF/uleb.s
+++ b/test/MC/ELF/uleb.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_32 %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_64 %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_64 %s
 // RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_32 %s
 // RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_64 %s
 
@@ -12,10 +12,14 @@ foo:
 	.uleb128	16383
 	.uleb128	16384
 
-// ELF_32: ('sh_name', 0x00000001) # '.text'
-// ELF_32: ('_section_data', '00017f80 01ff7f80 8001')
-// ELF_64: ('sh_name', 0x00000001) # '.text'
-// ELF_64: ('_section_data', '00017f80 01ff7f80 8001')
+// ELF_32:   Name: .text
+// ELF_32:   SectionData (
+// ELF_32:     0000: 00017F80 01FF7F80 8001
+// ELF_32:   )
+// ELF_64:   Name: .text
+// ELF_64:   SectionData (
+// ELF_64:     0000: 00017F80 01FF7F80 8001
+// ELF_64:   )
 // MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
 // MACHO_32: ('_section_data', '00017f80 01ff7f80 8001')
 // MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
diff --git a/test/MC/ELF/undef.s b/test/MC/ELF/undef.s
index e377c63..0d89fb1 100644
--- a/test/MC/ELF/undef.s
+++ b/test/MC/ELF/undef.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test which symbols should be in the symbol table
 
@@ -19,28 +19,21 @@
         .text
         movsd   .Lsym8(%rip), %xmm1
 
-// CHECK:      ('_symbols', [
-// CHECK-NEXT:  # Symbol 0
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 1
-// CHECK-NEXT:  (('st_name', 0x0000000d) # '.Lsym8'
-// CHECK:       # Symbol 2
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 3
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 4
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 5
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK:       # Symbol 6
-// CHECK-NEXT:  (('st_name', 0x00000001) # '.Lsym1'
-// CHECK:       # Symbol 7
-// CHECK-NEXT:  (('st_name', 0x00000008) # 'sym6'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x1)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
+// CHECK:      Symbols [
+
+// CHECK:        Symbol {
+// CHECK:          Name: .Lsym8
+
+// CHECK:        Symbol {
+// CHECK:          Name: .Lsym1
+
+// CHECK:        Symbol {
+// CHECK:          Name: sym6
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: Object
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/undef2.s b/test/MC/ELF/undef2.s
index 6f971c5..6aa66c0 100644
--- a/test/MC/ELF/undef2.s
+++ b/test/MC/ELF/undef2.s
@@ -1,10 +1,18 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck %s
 
 // Test that this produces an undefined reference to .Lfoo
 
         je	.Lfoo
 
-// CHECK: ('_symbols', [
-// CHECK:      (('st_name', 0x00000001) # '.Lfoo'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK: (('sh_name', 0x0000001b) # '.strtab'
+// CHECK:       Section {
+// CHECK:         Name: .strtab
+
+// CHECK:       Symbol {
+// CHECK:         Name: .Lfoo
+// CHECK-NEXT:    Value:
+// CHECK-NEXT:    Size:
+// CHECK-NEXT:    Binding: Global
+// CHECK-NEXT:    Type:
+// CHECK-NEXT:    Other:
+// CHECK-NEXT:    Section:
+// CHECK-NEXT:  }
diff --git a/test/MC/ELF/version.s b/test/MC/ELF/version.s
index 31e952a..0bc9c8b7 100644
--- a/test/MC/ELF/version.s
+++ b/test/MC/ELF/version.s
@@ -1,17 +1,23 @@
-// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump --dump-section-data | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck  %s
 
 .version "1234"
 .version "123"
 
-// CHECK:       (('sh_name', 0x0000000c) # '.note'
-// CHECK-NEXT:   ('sh_type', 0x00000007)
-// CHECK-NEXT:   ('sh_flags', 0x00000000)
-// CHECK-NEXT:   ('sh_addr', 0x00000000)
-// CHECK-NEXT:   ('sh_offset', 0x00000034)
-// CHECK-NEXT:   ('sh_size', 0x00000024)
-// CHECK-NEXT:   ('sh_link', 0x00000000)
-// CHECK-NEXT:   ('sh_info', 0x00000000)
-// CHECK-NEXT:   ('sh_addralign', 0x00000004)
-// CHECK-NEXT:   ('sh_entsize', 0x00000000)
-// CHECK-NEXT:   ('_section_data', '05000000 00000000 01000000 31323334 00000000 04000000 00000000 01000000 31323300')
-// CHECK-NEXT:  ),
+// CHECK:        Section {
+// CHECK:          Name: .note
+// CHECK-NEXT:     Type: SHT_NOTE
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x34
+// CHECK-NEXT:     Size: 36
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 05000000 00000000 01000000 31323334
+// CHECK-NEXT:       0010: 00000000 04000000 00000000 01000000
+// CHECK-NEXT:       0020: 31323300
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/weak-relocation.s b/test/MC/ELF/weak-relocation.s
index 88e841e..0f5bba2 100644
--- a/test/MC/ELF/weak-relocation.s
+++ b/test/MC/ELF/weak-relocation.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r | FileCheck %s
 
 // Test that weak symbols always produce relocations
 
@@ -7,9 +7,8 @@ foo:
 bar:
         call    foo
 
-//CHECK:        # Relocation 0
-//CHECK-NEXT:   (('r_offset', 0x0000000000000001)
-//CHECK-NEXT:    ('r_sym', 0x00000005)
-//CHECK-NEXT:    ('r_type', 0x00000002)
-//CHECK-NEXT:    ('r_addend', 0xfffffffffffffffc)
-//CHECK-NEXT:   ),
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section ({{[0-9]+}}) .text {
+// CHECK-NEXT:     0x1 R_X86_64_PC32 foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/weak.s b/test/MC/ELF/weak.s
index 07a83913..2ed3eb7 100644
--- a/test/MC/ELF/weak.s
+++ b/test/MC/ELF/weak.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // Test that this produces a weak undefined symbol.
 
@@ -9,22 +9,22 @@
         .weak bar
 bar:
 
-//CHECK:        # Symbol 4
-//CHECK-NEXT:   (('st_name', 0x00000005) # 'bar'
-//CHECK-NEXT:    ('st_bind', 0x2)
-//CHECK-NEXT:    ('st_type', 0x0)
-//CHECK-NEXT:    ('st_other', 0x00)
-//CHECK-NEXT:    ('st_shndx', 0x0001)
-//CHECK-NEXT:    ('st_value', 0x0000000000000004)
-//CHECK-NEXT:    ('st_size', 0x0000000000000000)
-//CHECK-NEXT:   ),
-//CHECK-NEXT:   # Symbol 5
-//CHECK:       (('st_name', 0x00000001) # 'foo'
-//CHECK-NEXT:   ('st_bind', 0x2)
-//CHECK-NEXT:   ('st_type', 0x0)
-//CHECK-NEXT:   ('st_other', 0x00)
-//CHECK-NEXT:   ('st_shndx', 0x0000)
-//CHECK-NEXT:   ('st_value', 0x0000000000000000)
-//CHECK-NEXT:   ('st_size', 0x0000000000000000)
-//CHECK-NEXT:  ),
-//CHECK-NEXT: ])
+// CHECK:        Symbol {
+// CHECK:          Name: bar
+// CHECK-NEXT:     Value: 0x4
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Weak
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK:        Symbol {
+// CHECK:          Name: foo
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Weak
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:  ]
diff --git a/test/MC/ELF/weakref-plt.s b/test/MC/ELF/weakref-plt.s
index 2e50093..d6486dc 100644
--- a/test/MC/ELF/weakref-plt.s
+++ b/test/MC/ELF/weakref-plt.s
@@ -1,8 +1,14 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 	.weakref	bar,foo
 	call	bar@PLT
 
-// CHECK:      # Symbol 5
-// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
-// CHECK-NEXT:  ('st_bind', 0x2)
+// CHECK:        Symbol {
+// CHECK:          Name: foo
+// CHECK-NEXT:     Value:
+// CHECK-NEXT:     Size:
+// CHECK-NEXT:     Binding: Weak
+// CHECK-NEXT:     Type:
+// CHECK-NEXT:     Other:
+// CHECK-NEXT:     Section:
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/weakref-reloc.s b/test/MC/ELF/weakref-reloc.s
index 4bbf264..48bda87 100644
--- a/test/MC/ELF/weakref-reloc.s
+++ b/test/MC/ELF/weakref-reloc.s
@@ -1,49 +1,44 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -r -t | FileCheck %s
 
 // Test that the relocations point to the correct symbols. We used to get the
 // symbol index wrong for weakrefs when creating _GLOBAL_OFFSET_TABLE_.
 
-	.weakref	bar,foo
+	.weakref bar,foo
         call    zed@PLT
-	call	bar
+     call	bar
 
-// CHECK:      # Relocation 0
-// CHECK-NEXT: (('r_offset', 0x0000000000000001)
-// CHECK-NEXT:  ('r_sym', 0x00000006)
-// CHECK-NEXT:  ('r_type', 0x00000004)
-// CHECK-NEXT:  ('r_addend', 0xfffffffffffffffc)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Relocation 1
-// CHECK-NEXT: (('r_offset', 0x0000000000000006)
-// CHECK-NEXT:  ('r_sym', 0x00000005)
-// CHECK-NEXT:  ('r_type', 0x00000002)
-// CHECK-NEXT:  ('r_addend', 0xfffffffffffffffc)
-// CHECK-NEXT: ),
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section ({{[0-9]+}}) {{[^ ]+}} {
+// CHECK-NEXT:     0x1 R_X86_64_PLT32 zed 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:     0x6 R_X86_64_PC32 foo 0xFFFFFFFFFFFFFFFC
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
 
-// CHECK:      # Symbol 4
-// CHECK-NEXT: (('st_name', 0x00000009) # '_GLOBAL_OFFSET_TABLE_'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 5
-// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
-// CHECK-NEXT:  ('st_bind', 0x2)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
-// CHECK-NEXT: # Symbol 6
-// CHECK-NEXT: (('st_name', 0x00000005) # 'zed'
-// CHECK-NEXT:  ('st_bind', 0x1)
-// CHECK-NEXT:  ('st_type', 0x0)
-// CHECK-NEXT:  ('st_other', 0x00)
-// CHECK-NEXT:  ('st_shndx', 0x0000)
-// CHECK-NEXT:  ('st_value', 0x0000000000000000)
-// CHECK-NEXT:  ('st_size', 0x0000000000000000)
-// CHECK-NEXT: ),
+// CHECK:      Symbols [
+// CHECK:        Symbol {
+// CHECK:          Name: _GLOBAL_OFFSET_TABLE_ (9)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: foo (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Weak
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: zed (5)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
diff --git a/test/MC/ELF/weakref.s b/test/MC/ELF/weakref.s
index e12d2c7..8717364 100644
--- a/test/MC/ELF/weakref.s
+++ b/test/MC/ELF/weakref.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -t | FileCheck %s
 
 // This is a long test that checks that the aliases created by weakref are
 // never in the symbol table and that the only case it causes a symbol to
@@ -69,166 +69,158 @@ bar15:
         .long bar15
         .long foo15
 
-// CHECK:       # Symbol 0
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 1
-// CHECK-NEXT:  (('st_name', 0x00000015) # 'bar6'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000018)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 2
-// CHECK-NEXT:  (('st_name', 0x0000001a) # 'bar7'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000018)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 3
-// CHECK-NEXT:  (('st_name', 0x0000001f) # 'bar8'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x000000000000001c)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 4
-// CHECK-NEXT:  (('st_name', 0x00000024) # 'bar9'
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000020)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 5
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x3)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 6
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x3)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0003)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 7
-// CHECK-NEXT:  (('st_name', 0x00000000) # ''
-// CHECK-NEXT:   ('st_bind', 0x0)
-// CHECK-NEXT:   ('st_type', 0x3)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0004)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 8
-// CHECK-NEXT:  (('st_name', 0x00000029) # 'bar10'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000028)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 9
-// CHECK-NEXT:  (('st_name', 0x0000002f) # 'bar11'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000030)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 10
-// CHECK-NEXT:  (('st_name', 0x00000035) # 'bar12'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000030)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 11
-// CHECK-NEXT:  (('st_name', 0x0000003b) # 'bar13'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000034)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 12
-// CHECK-NEXT:  (('st_name', 0x00000041) # 'bar14'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000038)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 13
-// CHECK-NEXT:  (('st_name', 0x00000047) # 'bar15'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0001)
-// CHECK-NEXT:   ('st_value', 0x0000000000000040)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 14
-// CHECK-NEXT:  (('st_name', 0x00000001) # 'bar2'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 15
-// CHECK-NEXT:  (('st_name', 0x00000006) # 'bar3'
-// CHECK-NEXT:   ('st_bind', 0x2)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 16
-// CHECK-NEXT:  (('st_name', 0x0000000b) # 'bar4'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT:  # Symbol 17
-// CHECK-NEXT:  (('st_name', 0x00000010) # 'bar5'
-// CHECK-NEXT:   ('st_bind', 0x1)
-// CHECK-NEXT:   ('st_type', 0x0)
-// CHECK-NEXT:   ('st_other', 0x00)
-// CHECK-NEXT:   ('st_shndx', 0x0000)
-// CHECK-NEXT:   ('st_value', 0x0000000000000000)
-// CHECK-NEXT:   ('st_size', 0x0000000000000000)
-// CHECK-NEXT:  ),
-// CHECK-NEXT: ])
+// CHECK:      Symbols [
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar6 (21)
+// CHECK-NEXT:     Value: 0x18
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar7 (26)
+// CHECK-NEXT:     Value: 0x18
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar8 (31)
+// CHECK-NEXT:     Value: 0x1C
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar9 (36)
+// CHECK-NEXT:     Value: 0x20
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .text (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .data (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .data (0x3)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: .bss (0)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: Section
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .bss (0x4)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar10 (41)
+// CHECK-NEXT:     Value: 0x28
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar11 (47)
+// CHECK-NEXT:     Value: 0x30
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar12 (53)
+// CHECK-NEXT:     Value: 0x30
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar13 (59)
+// CHECK-NEXT:     Value: 0x34
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar14 (65)
+// CHECK-NEXT:     Value: 0x38
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar15 (71)
+// CHECK-NEXT:     Value: 0x40
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text (0x1)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar2 (1)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar3 (6)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Weak
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar4 (11)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar5 (16)
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Global
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: (0x0)
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/x86_64-reloc-sizetest.s b/test/MC/ELF/x86_64-reloc-sizetest.s
index acca2f5..bd67ee0 100644
--- a/test/MC/ELF/x86_64-reloc-sizetest.s
+++ b/test/MC/ELF/x86_64-reloc-sizetest.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-linux-gnu -filetype=obj %s | elf-dump | FileCheck %s
+// RUN: llvm-mc -triple x86_64-linux-gnu -filetype=obj %s | llvm-readobj -r | FileCheck %s
 
 // Tests that relocation value fits in the provided size
 // Original bug http://llvm.org/bugs/show_bug.cgi?id=10568
@@ -6,8 +6,8 @@
 L: movq $(L + 2147483648),%rax
 
 
-// CHECK:          Relocation 0
-// CHECK-NEXT:     ('r_offset', 0x0000000000000003)
-// CHECK-NEXT:     ('r_sym'
-// CHECK-NEXT:     ('r_type', 0x0000000b)
-// CHECK-NEXT:     ('r_addend', 0x0000000080000000
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section ({{[0-9]+}}) .text {
+// CHECK-NEXT:     0x3 R_X86_64_32S {{[^ ]+}} 0x80000000
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/ELF/zero.s b/test/MC/ELF/zero.s
index 46ffe17..be92eb8 100644
--- a/test/MC/ELF/zero.s
+++ b/test/MC/ELF/zero.s
@@ -1,16 +1,23 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck %s
 
 .zero 4
 .zero 1,42
 
-// CHECK: ('sh_name', 0x00000001) # '.text'
-// CHECK: ('sh_type', 0x00000001)
-// CHECK: ('sh_flags', 0x0000000000000006)
-// CHECK: ('sh_addr', 0x0000000000000000)
-// CHECK: ('sh_offset', 0x0000000000000040)
-// CHECK: ('sh_size', 0x0000000000000005)
-// CHECK: ('sh_link', 0x00000000)
-// CHECK: ('sh_info', 0x00000000)
-// CHECK: ('sh_addralign', 0x0000000000000004)
-// CHECK: ('sh_entsize', 0x0000000000000000)
-// CHECK: ('_section_data', '00000000 2a')
+// CHECK:        Section {
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_EXECINSTR
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 5
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 4
+// CHECK-NEXT:     EntrySize: 0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 00000000 2A
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
diff --git a/test/MC/Mips/elf-N64.ll b/test/MC/Mips/elf-N64.ll
index ae6de78..a1ea34a 100644
--- a/test/MC/Mips/elf-N64.ll
+++ b/test/MC/Mips/elf-N64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 -disable-mips-delay-filler %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 -disable-mips-delay-filler %s -o - | llvm-readobj -r | FileCheck %s
 
 ; Check for N64 relocation production.
 ;
@@ -12,25 +12,12 @@ define i32 @main() nounwind {
 entry:
 ; Check that the appropriate relocations were created.
 
-; R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16
-; CHECK:     ('r_type3', 0x05)
-; CHECK-NEXT:     ('r_type2', 0x18)
-; CHECK-NEXT:     ('r_type', 0x07)
-
-; R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16
-; CHECK:     ('r_type3', 0x06)
-; CHECK-NEXT:     ('r_type2', 0x18)
-; CHECK-NEXT:     ('r_type', 0x07)
-
-; R_MIPS_GOT_OFST/R_MIPS_NONE/R_MIPS_NONE
-; CHECK:     ('r_type3', 0x00)
-; CHECK-NEXT:     ('r_type2', 0x00)
-; CHECK-NEXT:     ('r_type', 0x14)
-
-; R_MIPS_GOT_OFST/R_MIPS_NONE/R_MIPS_NONE
-; CHECK:     ('r_type3', 0x00)
-; CHECK-NEXT:     ('r_type2', 0x00)
-; CHECK-NEXT:     ('r_type', 0x15)
+; CHECK: Relocations [
+; CHECK:   0x{{[0-9,A-F]+}} R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16
+; CHECK:   0x{{[0-9,A-F]+}} R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16
+; CHECK:   0x{{[0-9,A-F]+}} R_MIPS_GOT_PAGE/R_MIPS_NONE/R_MIPS_NONE
+; CHECK:   0x{{[0-9,A-F]+}} R_MIPS_GOT_OFST/R_MIPS_NONE/R_MIPS_NONE
+; CHECK: ]
 
   %puts = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @str, i64 0, i64 0))
   ret i32 0
diff --git a/test/MC/Mips/elf-bigendian.ll b/test/MC/Mips/elf-bigendian.ll
index 7111deb..a92fe33 100644
--- a/test/MC/Mips/elf-bigendian.ll
+++ b/test/MC/Mips/elf-bigendian.ll
@@ -1,24 +1,37 @@
-; DISABLE: llc -filetype=obj -mtriple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; DISABLE: llc -filetype=obj -mtriple mips-unknown-linux %s -o - | llvm-readobj -h -s -sd | FileCheck %s
 ; RUN: false
 ; XFAIL: *
 
 ; Check that this is big endian.
-; CHECK: ('e_indent[EI_DATA]', 0x02)
+; CHECK: ElfHeader {
+; CHECK:   Ident {
+; CHECK:     DataEncoding: BigEndian
+; CHECK:   }
+; CHECK: }
 
 ; Make sure that a section table (text) entry is correct.
-; CHECK:   (('sh_name', 0x{{[0]*}}5) # '.text'
-; CHECK-NEXT:   ('sh_type', 0x{{[0]*}}1)
-; CHECK-NEXT:   ('sh_flags', 0x{{[0]*}}6)
-; CHECK-NEXT:   ('sh_addr', 0x{{[0-9,a-f]+}})
-; CHECK-NEXT:   ('sh_offset', 0x{{[0-9,a-f]+}})
-; CHECK-NEXT:   ('sh_size', 0x{{[0-9,a-f]+}})
-; CHECK-NEXT:   ('sh_link', 0x{{[0]+}})
-; CHECK-NEXT:   ('sh_info', 0x{{[0]+}})
-; CHECK-NEXT:   ('sh_addralign', 0x{{[0]*}}4)
-; CHECK-NEXT:   ('sh_entsize', 0x{{[0]+}})
+; CHECK:      Sections [
+; CHECK:        Section {
+; CHECK:          Index:
+; CHECK:          Name: .text
+; CHECK-NEXT:     Type: SHT_PROGBITS
+; CHECK-NEXT:     Flags [ (0x6)
+; CHECK-NEXT:       SHF_ALLOC
+; CHECK-NEXT:       SHF_EXECINSTR
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     Address: 0x{{[0-9,A-F]+}}
+; CHECK-NEXT:     Offset: 0x{{[0-9,A-F]+}}
+; CHECK-NEXT:     Size: {{[0-9]+}}
+; CHECK-NEXT:     Link: 0
+; CHECK-NEXT:     Info: 0
+; CHECK-NEXT:     AddressAlignment: 4
+; CHECK-NEXT:     EntrySize: 0
 
 ; See that at least first 3 instructions are correct: GP prologue
-; CHECK-NEXT:   ('_section_data', '3c1c0000 279c0000 0399e021 {{[0-9,a-f, ]*}}')
+; CHECK-NEXT:     SectionData (
+; CHECK-NEXT:       0000: 3C1C0000 279C0000 0399E021 {{[0-9,A-F, ]*}}
+; CHECK:          )
+; CHECK:   }
 
 ; ModuleID = '../br1.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"
diff --git a/test/MC/Mips/elf-gprel-32-64.ll b/test/MC/Mips/elf-gprel-32-64.ll
index b946822..47003fa 100644
--- a/test/MC/Mips/elf-gprel-32-64.ll
+++ b/test/MC/Mips/elf-gprel-32-64.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 %s -o - \
-; RUN: | elf-dump --dump-section-data \
+; RUN: | llvm-readobj -r \
 ; RUN: | FileCheck %s
 
 define i32 @test(i32 %c) nounwind {
@@ -30,8 +30,11 @@ return:
 ; Check that the appropriate relocations were created.
 
 ; R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
-; CHECK: (('sh_name', 0x{{[a-z0-9]+}}) # '.rela.rodata'
-; CHECK:      ('r_type3', 0x00)
-; CHECK-NEXT: ('r_type2', 0x12)
-; CHECK-NEXT: ('r_type', 0x0c)
-
+; CHECK:      Relocations [
+; CHECK:        Section ({{[a-z0-9]+}}) .rodata {
+; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
+; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
+; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
+; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_MIPS_GPREL32/R_MIPS_64/R_MIPS_NONE
+; CHECK-NEXT:   }
+; CHECK-NEXT: ]
diff --git a/test/MC/Mips/elf-reginfo.ll b/test/MC/Mips/elf-reginfo.ll
index 1d7a188..a255af9 100644
--- a/test/MC/Mips/elf-reginfo.ll
+++ b/test/MC/Mips/elf-reginfo.ll
@@ -1,7 +1,7 @@
  ; RUN: llc -filetype=obj -march=mips64el -mcpu=mips64 %s -o - \
- ; RUN: | elf-dump --dump-section-data  | FileCheck --check-prefix=CHECK_64 %s
+ ; RUN: | llvm-readobj -s | FileCheck --check-prefix=CHECK_64 %s
  ; RUN: llc -filetype=obj -march=mipsel -mcpu=mips32 %s -o - \
- ; RUN: | elf-dump --dump-section-data  | FileCheck --check-prefix=CHECK_32 %s
+ ; RUN: | llvm-readobj -s | FileCheck --check-prefix=CHECK_32 %s
 
 ; Check for register information sections.
 ;
@@ -13,14 +13,18 @@ entry:
 ; Check that the appropriate relocations were created.
 
 ; check for .MIPS.options
-; CHECK_64:      (('sh_name', 0x{{[0-9|a-f]+}}) # '.MIPS.options'
-; CHECK_64-NEXT: ('sh_type', 0x7000000d)
-; CHECK_64-NEXT: ('sh_flags', 0x0000000008000002)
+; CHECK_64:      Sections [
+; CHECK_64:        Section {
+; CHECK_64:          Name: .MIPS.options
+; CHECK_64-NEXT:     Type: SHT_MIPS_OPTIONS
+; CHECK_64-NEXT:     Flags [ (0x8000002)
 
 ; check for .reginfo
-; CHECK_32:      (('sh_name', 0x{{[0-9|a-f]+}}) # '.reginfo'
-; CHECK_32-NEXT: ('sh_type', 0x70000006)
-; CHECK_32-NEXT: ('sh_flags', 0x00000002)
+; CHECK_32:      Sections [
+; CHECK_32:        Section {
+; CHECK_32:          Name: .reginfo
+; CHECK_32-NEXT:     Type:  SHT_MIPS_REGINFO
+; CHECK_32-NEXT:     Flags [ (0x2)
 
 
   %puts = tail call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @str, i64 0, i64 0))
@@ -28,4 +32,3 @@ entry:
 
 }
 declare i32 @puts(i8* nocapture) nounwind
-  
diff --git a/test/MC/Mips/elf-relsym.ll b/test/MC/Mips/elf-relsym.ll
index 0f74437..6da9262 100644
--- a/test/MC/Mips/elf-relsym.ll
+++ b/test/MC/Mips/elf-relsym.ll
@@ -1,11 +1,21 @@
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | llvm-readobj -t | FileCheck %s
 
 ; Check that the appropriate symbols were created.
 
-; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$.str'
-; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$.str1'
-; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$CPI0_0'
-; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$CPI0_1'
+; CHECK: Symbols [
+; CHECK:   Symbol {
+; CHECK:     Name: $.str
+; CHECK:   }
+; CHECK:   Symbol {
+; CHECK:     Name: $.str1
+; CHECK:   }
+; CHECK:   Symbol {
+; CHECK:     Name: $CPI0_0
+; CHECK:   }
+; CHECK:   Symbol {
+; CHECK:     Name: $CPI0_1
+; CHECK:   }
+; CHECK: ]
 
 @.str = private unnamed_addr constant [6 x i8] c"abcde\00", align 1
 @gc1 = external global i8*
diff --git a/test/MC/Mips/elf-tls.ll b/test/MC/Mips/elf-tls.ll
index b4183b8..9f604e0 100644
--- a/test/MC/Mips/elf-tls.ll
+++ b/test/MC/Mips/elf-tls.ll
@@ -1,10 +1,14 @@
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | llvm-readobj -r | FileCheck %s
 
 ; Check that the appropriate relocations were created.
 
-; CHECK:     ('r_type', 0x2b)
-; CHECK:     ('r_type', 0x2c)
-; CHECK:     ('r_type', 0x2d)
+; CHECK: Relocations [
+; CHECK:   Section (1) .text {
+; CHECK:     R_MIPS_TLS_LDM
+; CHECK:     R_MIPS_TLS_DTPREL_HI16
+; CHECK:     R_MIPS_TLS_DTPREL_LO16
+; CHECK:   }
+; CHECK: ]
 
 @t1 = thread_local global i32 0, align 4
 
diff --git a/test/MC/Mips/elf_basic.s b/test/MC/Mips/elf_basic.s
index ffc3b11..6c1e769 100644
--- a/test/MC/Mips/elf_basic.s
+++ b/test/MC/Mips/elf_basic.s
@@ -1,35 +1,41 @@
 // 32 bit big endian
-// RUN: llvm-mc -filetype=obj -triple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32 %s
+// RUN: llvm-mc -filetype=obj -triple mips-unknown-linux %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32 %s
 // 32 bit little endian
-// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-LE32 %s
+// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-LE32 %s
 // 64 bit big endian
-// RUN: llvm-mc -filetype=obj -arch=mips64 -triple mips64-unknown-linux %s -o - | elf-dump --dump-section-data | FileCheck -check-prefix=CHECK-BE64 %s
+// RUN: llvm-mc -filetype=obj -arch=mips64 -triple mips64-unknown-linux %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64 %s
 // 64 bit little endian
-// RUN: llvm-mc -filetype=obj -arch=mips64el -triple mips64el-unknown-linux %s -o - | elf-dump --dump-section-data | FileCheck -check-prefix=CHECK-LE64 %s
+// RUN: llvm-mc -filetype=obj -arch=mips64el -triple mips64el-unknown-linux %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-LE64 %s
 
 // Check that we produce 32 bit with each endian.
 
-// This is 32 bit.
-// CHECK-BE32: ('e_indent[EI_CLASS]', 0x01)
-// This is big endian.
-// CHECK-BE32: ('e_indent[EI_DATA]', 0x02)
+// CHECK-BE32: ElfHeader {
+// CHECK-BE32:   Ident {
+// CHECK-BE32:     Class: 32-bit
+// CHECK-BE32:     DataEncoding: BigEndian
+// CHECK-BE32:   }
+// CHECK-BE32: }
 
-// This is 32 bit.
-// CHECK-LE32: ('e_indent[EI_CLASS]', 0x01)
-// This is little endian.
-// CHECK-LE32: ('e_indent[EI_DATA]', 0x01)
+// CHECK-LE32: ElfHeader {
+// CHECK-LE32:   Ident {
+// CHECK-LE32:     Class: 32-bit
+// CHECK-LE32:     DataEncoding: LittleEndian
+// CHECK-LE32:   }
+// CHECK-LE32: }
 
 // Check that we produce 64 bit with each endian.
 
-// This is 64 bit.
-// CHECK-BE64: ('e_indent[EI_CLASS]', 0x02)
-// This is big endian.
-// CHECK-BE64: ('e_indent[EI_DATA]', 0x02)
+// CHECK-BE64: ElfHeader {
+// CHECK-BE64:   Ident {
+// CHECK-BE64:     Class: 64-bit
+// CHECK-BE64:     DataEncoding: BigEndian
+// CHECK-BE64:   }
+// CHECK-BE64: }
 
-// This is 64 bit.
-// CHECK-LE64: ('e_indent[EI_CLASS]', 0x02)
-// This is little endian.
-// CHECK-LE64: ('e_indent[EI_DATA]', 0x01)
-
-// Check that we are setting EI_OSABI to ELFOSABI_LINUX.
-// CHECK-LE64: ('e_indent[EI_OSABI]', 0x03)
+// CHECK-LE64: ElfHeader {
+// CHECK-LE64:   Ident {
+// CHECK-LE64:     Class: 64-bit
+// CHECK-LE64:     DataEncoding: LittleEndian
+// CHECK-LE64:     OS/ABI: GNU/Linux
+// CHECK-LE64:   }
+// CHECK-LE64: }
diff --git a/test/MC/Mips/elf_eflags.ll b/test/MC/Mips/elf_eflags.ll
index 315cb81..6d16a42 100644
--- a/test/MC/Mips/elf_eflags.ll
+++ b/test/MC/Mips/elf_eflags.ll
@@ -13,52 +13,52 @@
 ; EF_MIPS_ARCH_32R2 (0x70000000)
 ; EF_MIPS_ARCH_64R2 (0x80000000)
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32_PIC %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2_PIC %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE32R2-MICROMIPS_PIC %s
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64_PIC %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 -relocation-model=static %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64R2 %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE64R2_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64_PIC %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 -relocation-model=static %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64R2 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips64r2 %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-BE64R2_PIC %s
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+mips16 -relocation-model=pic %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-LE32R2-MIPS16 %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+mips16 -relocation-model=pic %s -o - | llvm-readobj -h | FileCheck -check-prefix=CHECK-LE32R2-MIPS16 %s
  
 ; 32(R1) bit with NO_REORDER and static
-; CHECK-BE32: ('e_flags', 0x50001001)
+; CHECK-BE32: Flags [ (0x50001001)
 ;
 ; 32(R1) bit with NO_REORDER and PIC
-; CHECK-BE32_PIC: ('e_flags', 0x50001003)
+; CHECK-BE32_PIC: Flags [ (0x50001003)
 ;
 ; 32R2 bit with NO_REORDER and static
-; CHECK-BE32R2: ('e_flags', 0x70001001)
+; CHECK-BE32R2: Flags [ (0x70001001)
 ;
 ; 32R2 bit with NO_REORDER and PIC
-; CHECK-BE32R2_PIC: ('e_flags', 0x70001003)
+; CHECK-BE32R2_PIC: Flags [ (0x70001003)
 ;
 ; 32R2 bit MICROMIPS with NO_REORDER and static
-; CHECK-BE32R2-MICROMIPS: ('e_flags', 0x72001001)
+; CHECK-BE32R2-MICROMIPS: Flags [ (0x72001001)
 ;
 ; 32R2 bit MICROMIPS with NO_REORDER and PIC
-;CHECK-BE32R2-MICROMIPS_PIC:  ('e_flags', 0x72001003)
+;CHECK-BE32R2-MICROMIPS_PIC: Flags [ (0x72001003)
 ;
 ; 64(R1) bit with NO_REORDER and static
-; CHECK-BE64: ('e_flags', 0x60000001)
+; CHECK-BE64: Flags [ (0x60000001)
 ;
 ; 64(R1) bit with NO_REORDER and PIC
-; CHECK-BE64_PIC: ('e_flags', 0x60000003)
+; CHECK-BE64_PIC: Flags [ (0x60000003)
 ;
 ; 64R2 bit with NO_REORDER and static
-; CHECK-BE64R2: ('e_flags', 0x80000001)
+; CHECK-BE64R2: Flags [ (0x80000001)
 ;
 ; 64R2 bit with NO_REORDER and PIC
-; CHECK-BE64R2_PIC: ('e_flags', 0x80000003)
+; CHECK-BE64R2_PIC: Flags [ (0x80000003)
 ;
 ; 32R2 bit MIPS16 with PIC
-; CHECK-LE32R2-MIPS16: ('e_flags', 0x74001002)
+; CHECK-LE32R2-MIPS16: Flags [ (0x74001002)
  
 define i32 @main() nounwind {
 entry:
diff --git a/test/MC/Mips/elf_st_other.ll b/test/MC/Mips/elf_st_other.ll
index f188ce7..bc56c00 100644
--- a/test/MC/Mips/elf_st_other.ll
+++ b/test/MC/Mips/elf_st_other.ll
@@ -1,13 +1,12 @@
 ; This tests value of ELF st_other field for function symbol table entries.
 ; For microMIPS value should be equal to STO_MIPS_MICROMIPS.
 
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32r2 -mattr=+micromips %s -o - | llvm-readobj -t | FileCheck %s
 
 define i32 @main() nounwind {
 entry:
   ret i32 0
 }
 
-; CHECK:  'main'
-; CHECK:  ('st_other', 0x80)
-
+; CHECK:     Name: main
+; CHECK:     Other: 128
diff --git a/test/MC/Mips/expr1.s b/test/MC/Mips/expr1.s
new file mode 100644
index 0000000..67664c1
--- /dev/null
+++ b/test/MC/Mips/expr1.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc  %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
+# Check that the assembler can handle the expressions as operands.
+# CHECK:  .text
+# CHECK:  .globl  foo
+# CHECK:  foo:
+# CHECK:  lw   $4, %lo(foo)($4)    # encoding: [A,A,0x84,0x8c]
+# CHECK:                           #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
+# CHECK:  lw   $4, 56($4)          # encoding: [0x38,0x00,0x84,0x8c]
+# CHECK:  lw   $4, %lo(foo+8)($4)  # encoding: [0x08'A',A,0x84,0x8c]
+# CHECK:                           #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
+# CHECK:  lw   $4, %lo(foo+8)($4)  # encoding: [0x08'A',A,0x84,0x8c]
+# CHECK:                           #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
+# CHECK:  lw   $4, %lo(foo+8)($4)  # encoding: [0x08'A',A,0x84,0x8c]
+# CHECK:                           #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
+# CHECK:  .space  64
+
+  .globl  foo
+  .ent  foo
+foo:
+  lw  $4,%lo(foo)($4)
+  lw  $4,((10 + 4) * 4)($4)
+  lw  $4,%lo (2 * 4) + foo($4)
+  lw  $4,%lo((2 * 4) + foo)($4)
+  lw  $4,(((%lo ((2 * 4) + foo))))($4)
+  .space  64
+  .end  foo
diff --git a/test/MC/Mips/higher_highest.ll b/test/MC/Mips/higher_highest.ll
index 0c66522..6c3d71f 100644
--- a/test/MC/Mips/higher_highest.ll
+++ b/test/MC/Mips/higher_highest.ll
@@ -1,14 +1,16 @@
-; DISABLE: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch -filetype=obj < %s -o - | elf-dump --dump-section-data | FileCheck %s
+; DISABLE: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch -filetype=obj < %s -o - | llvm-readobj -r | FileCheck %s
 ; RUN: false
 ; XFAIL: *
 ; Disabled because currently we don't have a way to generate these relocations.
 ;
 ; Check that the R_MIPS_HIGHER and R_MIPS_HIGHEST relocations were created.
 
-; CHECK:     ('r_type', 0x1d)
-; CHECK:     ('r_type', 0x1d)
-; CHECK:     ('r_type', 0x1c)
-; CHECK:     ('r_type', 0x1c)
+; CHECK: Relocations [
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_HIGHEST
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_HIGHEST
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_HIGHER
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_HIGHER
+; CHECK: ]
 
 @g0 = external global i32
 
diff --git a/test/MC/Mips/micromips-alu-instructions.s b/test/MC/Mips/micromips-alu-instructions.s
new file mode 100644
index 0000000..c541e1a
--- /dev/null
+++ b/test/MC/Mips/micromips-alu-instructions.s
@@ -0,0 +1,64 @@
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips | FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for arithmetic and logical instructions.
+#------------------------------------------------------------------------------
+# Arithmetic and Logical Instructions
+#------------------------------------------------------------------------------
+# CHECK: add   $9, $6, $7      # encoding: [0x10,0x49,0xe6,0x00]
+# CHECK: addi  $9, $6, 17767   # encoding: [0x67,0x45,0x26,0x11]
+# CHECK: addiu $9, $6, -15001  # encoding: [0x67,0xc5,0x26,0x31]
+# CHECK: addi  $9, $6, 17767   # encoding: [0x67,0x45,0x26,0x11]
+# CHECK: addiu $9, $6, -15001  # encoding: [0x67,0xc5,0x26,0x31]
+# CHECK: addu  $9, $6, $7      # encoding: [0x50,0x49,0xe6,0x00]
+# CHECK: sub   $9, $6, $7      # encoding: [0x90,0x49,0xe6,0x00]
+# CHECK: subu  $4, $3, $5      # encoding: [0xd0,0x21,0xa3,0x00]
+# CHECK: neg   $6, $7          # encoding: [0x90,0x31,0xe0,0x00]
+# CHECK: negu  $6, $7          # encoding: [0xd0,0x31,0xe0,0x00]
+# CHECK: move  $7, $8          # encoding: [0x50,0x39,0x08,0x00]
+# CHECK: slt    $3, $3, $5     # encoding: [0x50,0x1b,0xa3,0x00]
+# CHECK: slti   $3, $3, 103    # encoding: [0x67,0x00,0x63,0x90]
+# CHECK: slti   $3, $3, 103    # encoding: [0x67,0x00,0x63,0x90]
+# CHECK: sltiu  $3, $3, 103    # encoding: [0x67,0x00,0x63,0xb0]
+# CHECK: sltu   $3, $3, $5     # encoding: [0x90,0x1b,0xa3,0x00]
+# CHECK: and    $9, $6, $7     # encoding: [0x50,0x4a,0xe6,0x00]
+# CHECK: andi   $9, $6, 17767  # encoding: [0x67,0x45,0x26,0xd1]
+# CHECK: andi   $9, $6, 17767  # encoding: [0x67,0x45,0x26,0xd1]
+# CHECK: or     $3, $4, $5     # encoding: [0x90,0x1a,0xa4,0x00]
+# CHECK: ori    $9, $6, 17767  # encoding: [0x67,0x45,0x26,0x51]
+# CHECK: xor    $3, $3, $5     # encoding: [0x10,0x1b,0xa3,0x00]
+# CHECK: xori   $9, $6, 17767  # encoding: [0x67,0x45,0x26,0x71]
+# CHECK: xori   $9, $6, 17767  # encoding: [0x67,0x45,0x26,0x71]
+# CHECK: nor    $9, $6, $7     # encoding: [0xd0,0x4a,0xe6,0x00]
+# CHECK: not    $7, $8         # encoding: [0xd0,0x3a,0x08,0x00]
+# CHECK: mul    $9, $6, $7     # encoding: [0x10,0x4a,0xe6,0x00]
+# CHECK: mult   $9, $7         # encoding: [0x3c,0x8b,0xe9,0x00]
+# CHECK: multu  $9, $7         # encoding: [0x3c,0x9b,0xe9,0x00]
+    add    $9, $6, $7
+    add    $9, $6, 17767
+    addu   $9, $6, -15001
+    addi   $9, $6, 17767
+    addiu  $9, $6,-15001
+    addu   $9, $6, $7
+    sub    $9, $6, $7
+    subu   $4, $3, $5
+    neg    $6, $7
+    negu   $6, $7
+    move   $7, $8
+    slt    $3, $3, $5
+    slt    $3, $3, 103
+    slti   $3, $3, 103
+    sltiu  $3, $3, 103
+    sltu   $3, $3, $5
+    and    $9, $6, $7
+    and    $9, $6, 17767
+    andi   $9, $6, 17767
+    or     $3, $4, $5
+    ori    $9, $6, 17767
+    xor    $3, $3, $5
+    xor    $9, $6, 17767
+    xori   $9, $6, 17767
+    nor    $9, $6, $7
+    nor    $7, $8, $zero
+    mul    $9, $6, $7
+    mult   $9, $7
+    multu  $9, $7
diff --git a/test/MC/Mips/micromips-loadstore-instructions.s b/test/MC/Mips/micromips-loadstore-instructions.s
new file mode 100644
index 0000000..623e2ac
--- /dev/null
+++ b/test/MC/Mips/micromips-loadstore-instructions.s
@@ -0,0 +1,22 @@
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips | FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for load and store instructions.
+#------------------------------------------------------------------------------
+# Load and Store Instructions
+#------------------------------------------------------------------------------
+# CHECK: lb     $5, 8($4)      # encoding: [0x08,0x00,0xa4,0x1c]
+# CHECK: lbu    $6, 8($4)      # encoding: [0x08,0x00,0xc4,0x14]
+# CHECK: lh     $2, 8($4)      # encoding: [0x08,0x00,0x44,0x3c]
+# CHECK: lhu    $4, 8($2)      # encoding: [0x08,0x00,0x82,0x34]
+# CHECK: lw     $6, 4($5)      # encoding: [0x04,0x00,0xc5,0xfc]
+# CHECK: sb     $5, 8($4)      # encoding: [0x08,0x00,0xa4,0x18]
+# CHECK: sh     $2, 8($4)      # encoding: [0x08,0x00,0x44,0x38]
+# CHECK: sw     $5, 4($6)      # encoding: [0x04,0x00,0xa6,0xf8]
+     lb     $5, 8($4)
+     lbu    $6, 8($4)
+     lh     $2, 8($4)
+     lhu    $4, 8($2)
+     lw     $6, 4($5)
+     sb     $5, 8($4)
+     sh     $2, 8($4)
+     sw     $5, 4($6)
diff --git a/test/MC/Mips/micromips-shift-instructions.s b/test/MC/Mips/micromips-shift-instructions.s
new file mode 100644
index 0000000..3b5060f
--- /dev/null
+++ b/test/MC/Mips/micromips-shift-instructions.s
@@ -0,0 +1,22 @@
+# RUN: llvm-mc %s -triple=mipsel -show-encoding -mcpu=mips32r2 -mattr=micromips | FileCheck %s
+# Check that the assembler can handle the documented syntax
+# for shift instructions.
+#------------------------------------------------------------------------------
+# Shift Instructions
+#------------------------------------------------------------------------------
+# CHECK: sll    $4, $3, 7      # encoding: [0x00,0x38,0x83,0x00]
+# CHECK: sllv   $2, $3, $5     # encoding: [0x10,0x10,0x65,0x00]
+# CHECK: sra    $4, $3, 7      # encoding: [0x80,0x38,0x83,0x00]
+# CHECK: srav   $2, $3, $5     # encoding: [0x90,0x10,0x65,0x00]
+# CHECK: srl    $4, $3, 7      # encoding: [0x40,0x38,0x83,0x00]
+# CHECK: srlv   $2, $3, $5     # encoding: [0x50,0x10,0x65,0x00]
+# CHECK: rotr   $9, $6, 7      # encoding: [0xc0,0x38,0x26,0x01]
+# CHECK: rotrv  $9, $6, $7     # encoding: [0xd0,0x48,0xc7,0x00]
+     sll    $4, $3, 7
+     sllv   $2, $3, $5
+     sra    $4, $3, 7
+     srav   $2, $3, $5
+     srl    $4, $3, 7
+     srlv   $2, $3, $5
+     rotr   $9, $6, 7
+     rotrv  $9, $6, $7
diff --git a/test/MC/Mips/mips-alu-instructions.s b/test/MC/Mips/mips-alu-instructions.s
index 7384d19..586e88b 100644
--- a/test/MC/Mips/mips-alu-instructions.s
+++ b/test/MC/Mips/mips-alu-instructions.s
@@ -1,7 +1,6 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for arithmetic and logical instructions.
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 #------------------------------------------------------------------------------
 # Logical instructions
 #------------------------------------------------------------------------------
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index 3385fe1..1622965 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -1,7 +1,6 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for macro instructions
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 #------------------------------------------------------------------------------
 # Load immediate instructions
 #------------------------------------------------------------------------------
diff --git a/test/MC/Mips/mips-fpu-instructions.s b/test/MC/Mips/mips-fpu-instructions.s
index a126c6f..e515872 100644
--- a/test/MC/Mips/mips-fpu-instructions.s
+++ b/test/MC/Mips/mips-fpu-instructions.s
@@ -1,7 +1,6 @@
 # RUN: llvm-mc  %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for FPU instructions.
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 #------------------------------------------------------------------------------
 # FP aritmetic  instructions
 #------------------------------------------------------------------------------
@@ -157,6 +156,8 @@
 # CHECK:  mtc0    $9, $8, 3               # encoding: [0x03,0x40,0x89,0x40]
 # CHECK:  mfc2    $5, $7, 4               # encoding: [0x04,0x38,0x05,0x48]
 # CHECK:  mtc2    $9, $4, 5               # encoding: [0x05,0x20,0x89,0x48]
+# CHECK:  movf    $2, $1, $fcc0           # encoding: [0x01,0x10,0x20,0x00]
+# CHECK:  movt    $2, $1, $fcc0           # encoding: [0x01,0x10,0x21,0x00]
 
    cfc1    $a2,$0
    mfc1    $a2,$f7
@@ -176,3 +177,5 @@
    mtc0    $9, $8, 3
    mfc2    $5, $7, 4
    mtc2    $9, $4, 5
+   movf    $2, $1, $fcc0
+   movt    $2, $1, $fcc0
diff --git a/test/MC/Mips/mips-jump-instructions.s b/test/MC/Mips/mips-jump-instructions.s
index 1dcb287..597f687 100644
--- a/test/MC/Mips/mips-jump-instructions.s
+++ b/test/MC/Mips/mips-jump-instructions.s
@@ -1,31 +1,54 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | \
-# RUN: FileCheck %s
+# RUN: FileCheck -check-prefix=CHECK32  %s
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips64r2 | \
+# RUN: FileCheck -check-prefix=CHECK64  %s
+
 # Check that the assembler can handle the documented syntax
 # for jumps and branches.
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 #------------------------------------------------------------------------------
 # Branch instructions
 #------------------------------------------------------------------------------
-# CHECK:   b 1332                 # encoding: [0x4d,0x01,0x00,0x10]
-# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bc1f 1332              # encoding: [0x4d,0x01,0x00,0x45]
-# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bc1t 1332              # encoding: [0x4d,0x01,0x01,0x45]
-# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   beq $9, $6, 1332       # encoding: [0x4d,0x01,0x26,0x11]
-# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bgez $6, 1332          # encoding: [0x4d,0x01,0xc1,0x04]
-# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bgezal $6, 1332        # encoding: [0x4d,0x01,0xd1,0x04]
-# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bgtz $6, 1332          # encoding: [0x4d,0x01,0xc0,0x1c]
-# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   blez $6, 1332          # encoding: [0x4d,0x01,0xc0,0x18]
-# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bne $9, $6, 1332       # encoding: [0x4d,0x01,0x26,0x15]
-# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   bal     1332           # encoding: [0x4d,0x01,0x11,0x04]
-# CHECK:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   b 1332                 # encoding: [0x4d,0x01,0x00,0x10]
+# CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   bc1f 1332              # encoding: [0x4d,0x01,0x00,0x45]
+# CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   bc1t 1332              # encoding: [0x4d,0x01,0x01,0x45]
+# CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   beq $9, $6, 1332       # encoding: [0x4d,0x01,0x26,0x11]
+# CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   bgez $6, 1332          # encoding: [0x4d,0x01,0xc1,0x04]
+# CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   bgezal $6, 1332        # encoding: [0x4d,0x01,0xd1,0x04]
+# CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   bgtz $6, 1332          # encoding: [0x4d,0x01,0xc0,0x1c]
+# CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   blez $6, 1332          # encoding: [0x4d,0x01,0xc0,0x18]
+# CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   bne $9, $6, 1332       # encoding: [0x4d,0x01,0x26,0x15]
+# CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   bal     1332           # encoding: [0x4d,0x01,0x11,0x04]
+# CHECK32:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+
+# CHECK64:   b 1332                 # encoding: [0x4d,0x01,0x00,0x10]
+# CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   bc1f 1332              # encoding: [0x4d,0x01,0x00,0x45]
+# CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   bc1t 1332              # encoding: [0x4d,0x01,0x01,0x45]
+# CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   beq $9, $6, 1332       # encoding: [0x4d,0x01,0x26,0x11]
+# CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   bgez $6, 1332          # encoding: [0x4d,0x01,0xc1,0x04]
+# CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   bgezal $6, 1332        # encoding: [0x4d,0x01,0xd1,0x04]
+# CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   bgtz $6, 1332          # encoding: [0x4d,0x01,0xc0,0x1c]
+# CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   blez $6, 1332          # encoding: [0x4d,0x01,0xc0,0x18]
+# CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   bne $9, $6, 1332       # encoding: [0x4d,0x01,0x26,0x15]
+# CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   bal     1332           # encoding: [0x4d,0x01,0x11,0x04]
+# CHECK64:   nop                    # encoding: [0x00,0x00,0x00,0x00]
 
 .set noreorder
 
@@ -54,24 +77,43 @@ end_of_code:
 #------------------------------------------------------------------------------
 # Jump instructions
 #------------------------------------------------------------------------------
-# CHECK:   j 1328               # encoding: [0x4c,0x01,0x00,0x08]
-# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   jal 1328             # encoding: [0x4c,0x01,0x00,0x0c]
-# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   jalr $6              # encoding: [0x09,0xf8,0xc0,0x00]
-# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   jalr $25             # encoding: [0x09,0xf8,0x20,0x03]
-# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   jalr $10, $11        # encoding: [0x09,0x50,0x60,0x01]
-# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
-# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
-# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   jalr  $25            # encoding: [0x09,0xf8,0x20,0x03]
-# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
-# CHECK:   jalr  $4, $25        # encoding: [0x09,0x20,0x20,0x03]
-# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   j 1328               # encoding: [0x4c,0x01,0x00,0x08]
+# CHECK32:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   jal 1328             # encoding: [0x4c,0x01,0x00,0x0c]
+# CHECK32:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   jalr $6              # encoding: [0x09,0xf8,0xc0,0x00]
+# CHECK32:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   jalr $25             # encoding: [0x09,0xf8,0x20,0x03]
+# CHECK32:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   jalr $10, $11        # encoding: [0x09,0x50,0x60,0x01]
+# CHECK32:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
+# CHECK32:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
+# CHECK32:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   jalr  $25            # encoding: [0x09,0xf8,0x20,0x03]
+# CHECK32:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK32:   jalr  $4, $25        # encoding: [0x09,0x20,0x20,0x03]
+# CHECK32:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+
+# CHECK64:   j 1328               # encoding: [0x4c,0x01,0x00,0x08]
+# CHECK64:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   jal 1328             # encoding: [0x4c,0x01,0x00,0x0c]
+# CHECK64:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   jalr $6              # encoding: [0x09,0xf8,0xc0,0x00]
+# CHECK64:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   jalr $25             # encoding: [0x09,0xf8,0x20,0x03]
+# CHECK64:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   jalr $10, $11        # encoding: [0x09,0x50,0x60,0x01]
+# CHECK64:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
+# CHECK64:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   jr $7                # encoding: [0x08,0x00,0xe0,0x00]
+# CHECK64:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   jalr  $25            # encoding: [0x09,0xf8,0x20,0x03]
+# CHECK64:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK64:   jalr  $4, $25        # encoding: [0x09,0x20,0x20,0x03]
+# CHECK64:   nop                  # encoding: [0x00,0x00,0x00,0x00]
 
 
    j 1328
diff --git a/test/MC/Mips/mips-memory-instructions.s b/test/MC/Mips/mips-memory-instructions.s
index b5f1267..c8b0559 100644
--- a/test/MC/Mips/mips-memory-instructions.s
+++ b/test/MC/Mips/mips-memory-instructions.s
@@ -1,7 +1,6 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for loads and stores.
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 #------------------------------------------------------------------------------
 # Memory store instructions
 #------------------------------------------------------------------------------
diff --git a/test/MC/Mips/mips-relocations.s b/test/MC/Mips/mips-relocations.s
index ff71c75..6f095d1 100644
--- a/test/MC/Mips/mips-relocations.s
+++ b/test/MC/Mips/mips-relocations.s
@@ -1,7 +1,6 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for relocations.
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 # CHECK:  lui   $2, %hi(_gp_disp)     # encoding: [A,A,0x02,0x3c]
 # CHECK:                              #   fixup A - offset: 0, value: _gp_disp@ABS_HI, kind: fixup_Mips_HI16
 # CHECK:  addiu $2, $2, %lo(_gp_disp) # encoding: [A,A,0x42,0x24]
diff --git a/test/MC/Mips/mips64-alu-instructions.s b/test/MC/Mips/mips64-alu-instructions.s
index 1b4ebdf..db6c972 100644
--- a/test/MC/Mips/mips64-alu-instructions.s
+++ b/test/MC/Mips/mips64-alu-instructions.s
@@ -1,7 +1,6 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
 # Check that the assembler can handle the documented syntax
 # for arithmetic and logical instructions.
-# CHECK: .section __TEXT,__text,regular,pure_instructions
 #------------------------------------------------------------------------------
 # Logical instructions
 #------------------------------------------------------------------------------
@@ -13,6 +12,7 @@
 # CHECK:  ins    $19, $9, 6, 7   # encoding: [0x84,0x61,0x33,0x7d]
 # CHECK:  nor    $9, $6, $7      # encoding: [0x27,0x48,0xc7,0x00]
 # CHECK:  or     $3, $3, $5      # encoding: [0x25,0x18,0x65,0x00]
+# CHECK:  ori    $4, $5, 17767   # encoding: [0x67,0x45,0xa4,0x34]
 # CHECK:  ori    $9, $6, 17767   # encoding: [0x67,0x45,0xc9,0x34]
 # CHECK:  rotr   $9, $6, 7       # encoding: [0xc2,0x49,0x26,0x00]
 # CHECK:  rotrv  $9, $6, $7      # encoding: [0x46,0x48,0xe6,0x00]
@@ -40,6 +40,7 @@
      ins    $19, $9, 6,7
      nor    $9,  $6, $7
      or     $3,  $3, $5
+     or     $4,  $5, 17767
      ori    $9,  $6, 17767
      rotr   $9,  $6, 7
      rotrv  $9,  $6, $7
diff --git a/test/MC/Mips/mips_directives.s b/test/MC/Mips/mips_directives.s
index df7e645..45247cd 100644
--- a/test/MC/Mips/mips_directives.s
+++ b/test/MC/Mips/mips_directives.s
@@ -1,11 +1,20 @@
 # RUN: llvm-mc -show-encoding -triple mips-unknown-unknown %s | FileCheck %s
 #
+# CHECK:  .text
+# CHECK:  $BB0_2:
 $BB0_2:
   .ent directives_test
     .frame    $sp,0,$ra
     .mask     0x00000000,0
     .fmask    0x00000000,0
+# CHECK:   b 1332               # encoding: [0x10,0x00,0x01,0x4d]
+# CHECK:   j 1328               # encoding: [0x08,0x00,0x01,0x4c]
+# CHECK:   jal 1328             # encoding: [0x0c,0x00,0x01,0x4c]
+
     .set    noreorder
+     b 1332
+     j 1328
+     jal 1328
     .set    nomacro
     .set    noat
 $JTI0_0:
@@ -15,7 +24,16 @@ $JTI0_0:
 # CHECK-NEXT:     .4byte    2013265916
     .set  at=$12
     .set macro
+# CHECK:   b 1332               # encoding: [0x10,0x00,0x01,0x4d]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   j 1328               # encoding: [0x08,0x00,0x01,0x4c]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK:   jal 1328             # encoding: [0x0c,0x00,0x01,0x4c]
+# CHECK:   nop                  # encoding: [0x00,0x00,0x00,0x00]
     .set reorder
+    b 1332
+    j 1328
+    jal 1328
     .set  at=$a0
     .set STORE_MASK,$t7
     .set FPU_MASK,$f7
diff --git a/test/MC/Mips/nabi-regs.s b/test/MC/Mips/nabi-regs.s
index 9371208..050fb81 100644
--- a/test/MC/Mips/nabi-regs.s
+++ b/test/MC/Mips/nabi-regs.s
@@ -8,7 +8,6 @@
 # RUN: -mcpu=mips64r2 -arch=mips64 | \
 # RUN: FileCheck %s
 
-# CHECK: .section    __TEXT,__text,regular,pure_instructions
     .text
 foo:
 
diff --git a/test/MC/Mips/r-mips-got-disp.ll b/test/MC/Mips/r-mips-got-disp.ll
index 73396ac..7e78a46 100644
--- a/test/MC/Mips/r-mips-got-disp.ll
+++ b/test/MC/Mips/r-mips-got-disp.ll
@@ -1,8 +1,9 @@
-; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 < %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 < %s -o - | llvm-readobj -r | FileCheck %s
 
 ; Check that the R_MIPS_GOT_DISP relocations were created.
 
-; CHECK:     ('r_type', 0x13)
+; CHECK: Relocations [
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT_DISP
 
 @shl = global i64 1, align 8
 @.str = private unnamed_addr constant [8 x i8] c"0x%llx\0A\00", align 1
diff --git a/test/MC/Mips/set-at-directive.s b/test/MC/Mips/set-at-directive.s
index 98a3a35..828175a 100644
--- a/test/MC/Mips/set-at-directive.s
+++ b/test/MC/Mips/set-at-directive.s
@@ -3,7 +3,6 @@
 # Check that the assembler can handle the documented syntax
 # for ".set at" and set the correct value.
 
-# CHECK: .section __TEXT,__text,regular,pure_instructions
     .text
 foo:
 # CHECK:   jr    $1                      # encoding: [0x08,0x00,0x20,0x00]
diff --git a/test/MC/Mips/sym-offset.ll b/test/MC/Mips/sym-offset.ll
index 5162c91..c7450f7 100644
--- a/test/MC/Mips/sym-offset.ll
+++ b/test/MC/Mips/sym-offset.ll
@@ -1,4 +1,4 @@
-; DISABLED: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; DISABLED: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | llvm-readobj -s -sd | FileCheck %s
 ; RUN: false
 ; XFAIL: *
 
@@ -13,7 +13,9 @@ entry:
 ; 8841000e        lwl     at,14(v0)
 ; 9841000b        lwr     at,11(v0)
 
-; CHECK: ('_section_data', '00001c3c 00009c27 21e09903 0000828f 0e004188 0b004198
+; CHECK:          SectionData (
+; CHECK:            0000: 00001C3C 00009C27 21E09903 0000828F
+; CHECK-NEXT:       0010: 0E004188 0B004198
 
   %call = tail call i32 @memcmp(i8* getelementptr inbounds ([11 x i8]* @string1, i32 0, i32 0), i8* getelementptr inbounds ([10 x i8]* @string2, i32 0, i32 0), i32 4) nounwind readonly
   %cmp = icmp eq i32 %call, 0
diff --git a/test/MC/Mips/xgot.ll b/test/MC/Mips/xgot.ll
index bfe9b9a..e2a500f 100644
--- a/test/MC/Mips/xgot.ll
+++ b/test/MC/Mips/xgot.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mxgot %s -o - | elf-dump --dump-section-data  | FileCheck %s
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mxgot %s -o - | llvm-readobj -r | FileCheck %s
 
 @.str = private unnamed_addr constant [16 x i8] c"ext_1=%d, i=%d\0A\00", align 1
 @ext_1 = external global i32
@@ -9,29 +9,16 @@ entry:
 ; Check that the appropriate relocations were created. 
 ; For the xgot case we want to see R_MIPS_[GOT|CALL]_[HI|LO]16.
 
-; R_MIPS_HI16
-; CHECK:     ('r_type', 0x05)
-
-; R_MIPS_LO16
-; CHECK:     ('r_type', 0x06)
-
-; R_MIPS_GOT_HI16
-; CHECK:     ('r_type', 0x16)
-
-; R_MIPS_GOT_LO16
-; CHECK:     ('r_type', 0x17)
-
-; R_MIPS_GOT
-; CHECK:     ('r_type', 0x09)
-
-; R_MIPS_LO16
-; CHECK:     ('r_type', 0x06)
-
-; R_MIPS_CALL_HI16
-; CHECK:     ('r_type', 0x1e)
-
-; R_MIPS_CALL_LO16
-; CHECK:     ('r_type', 0x1f)
+; CHECK: Relocations [
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_HI16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_LO16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT_HI16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT_LO16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_GOT
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_LO16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_CALL_HI16
+; CHECK:     0x{{[0-9,A-F]+}} R_MIPS_CALL_LO16
+; CHECK: ]
 
   %0 = load i32* @ext_1, align 4
   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str, i32 0, i32 0), i32 %0) nounwind
diff --git a/test/MC/PowerPC/ppc64-encoding-bookII.s b/test/MC/PowerPC/ppc64-encoding-bookII.s
new file mode 100644
index 0000000..e74c971
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-encoding-bookII.s
@@ -0,0 +1,58 @@
+
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
+
+# Cache management instruction
+
+# FIXME: icbi 2, 3
+# FIXME: icbt 1, 2, 3
+
+# FIXME: dcbt 2, 3, 10
+# FIXME: dcbtst 2, 3, 10
+# CHECK: dcbz 2, 3                       # encoding: [0x7c,0x02,0x1f,0xec]
+         dcbz 2, 3
+# CHECK: dcbst 2, 3                      # encoding: [0x7c,0x02,0x18,0x6c]
+         dcbst 2, 3
+# FIXME: dcbf 2, 3, 1
+
+# Synchronization instructions
+
+# FIXME: isync
+
+# FIXME: lbarx 2, 3, 4, 1
+# FIXME: lharx 2, 3, 4, 1
+# FIXME: lwarx 2, 3, 4, 1
+# FIXME: ldarx 2, 3, 4, 1
+
+# FIXME: stbcx. 2, 3, 4
+# FIXME: sthcx. 2, 3, 4
+# CHECK: stwcx. 2, 3, 4                  # encoding: [0x7c,0x43,0x21,0x2d]
+         stwcx. 2, 3, 4
+# CHECK: stdcx. 2, 3, 4                  # encoding: [0x7c,0x43,0x21,0xad]
+         stdcx. 2, 3, 4
+
+# FIXME: sync 2
+# FIXME: eieio
+# FIXME: wait 2
+
+# Extended mnemonics
+
+# CHECK: dcbf 2, 3                       # encoding: [0x7c,0x02,0x18,0xac]
+         dcbf 2, 3
+# FIXME: dcbfl 2, 3
+
+# FIXME: lbarx 2, 3, 4
+# FIXME: lharx 2, 3, 4
+# CHECK: lwarx 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0x28]
+         lwarx 2, 3, 4
+# CHECK: ldarx 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0xa8]
+         ldarx 2, 3, 4
+
+# CHECK: sync                            # encoding: [0x7c,0x00,0x04,0xac]
+         sync
+# FIXME: lwsync
+# FIXME: ptesync
+
+# FIXME: wait
+# FIXME: waitrsv
+# FIXME: waitimpl
+
diff --git a/test/MC/PowerPC/ppc64-encoding-ext.s b/test/MC/PowerPC/ppc64-encoding-ext.s
new file mode 100644
index 0000000..4395b19
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-encoding-ext.s
@@ -0,0 +1,331 @@
+
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
+
+# FIXME: Condition register bit symbols
+
+# Branch mnemonics
+
+# CHECK: blr                             # encoding: [0x4e,0x80,0x00,0x20]
+         blr
+# CHECK: bctr                            # encoding: [0x4e,0x80,0x04,0x20]
+         bctr
+# FIXME: blrl
+# CHECK: bctrl                           # encoding: [0x4e,0x80,0x04,0x21]
+         bctrl
+
+# FIXME: bt 2, target
+# FIXME: bta 2, target
+# FIXME: btlr 2
+# FIXME: btctr 2
+# FIXME: btl 2, target
+# FIXME: btla 2, target
+# FIXME: btlrl 2
+# FIXME: btctrl 2
+
+# FIXME: bf 2, target
+# FIXME: bfa 2, target
+# FIXME: bflr 2
+# FIXME: bfctr 2
+# FIXME: bfl 2, target
+# FIXME: bfla 2, target
+# FIXME: bflrl 2
+# FIXME: bfctrl 2
+
+# CHECK: bdnz target                     # encoding: [0x42,0x00,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdnz target
+# FIXME: bdnza target
+# CHECK: bdnzlr                          # encoding: [0x4e,0x00,0x00,0x20]
+         bdnzlr
+# FIXME: bdnzl target
+# FIXME: bdnzla target
+# FIXME: bdnzlrl
+
+# FIXME: bdnzt 2, target
+# FIXME: bdnzta 2, target
+# FIXME: bdnztlr 2
+# FIXME: bdnztl 2, target
+# FIXME: bdnztla 2, target
+# FIXME: bdnztlrl 2
+# FIXME: bdnzf 2, target
+# FIXME: bdnzfa 2, target
+# FIXME: bdnzflr 2
+# FIXME: bdnzfl 2, target
+# FIXME: bdnzfla 2, target
+# FIXME: bdnzflrl 2
+
+# CHECK: bdz target                      # encoding: [0x42,0x40,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bdz target
+# FIXME: bdza target
+# CHECK: bdzlr                           # encoding: [0x4e,0x40,0x00,0x20]
+         bdzlr
+# FIXME: bdzl target
+# FIXME: bdzla target
+
+# FIXME: bdzlrl
+# FIXME: bdzt 2, target
+# FIXME: bdzta 2, target
+# FIXME: bdztlr 2
+# FIXME: bdztl 2, target
+# FIXME: bdztla 2, target
+# FIXME: bdztlrl 2
+# FIXME: bdzf 2, target
+# FIXME: bdzfa 2, target
+# FIXME: bdzflr 2
+# FIXME: bdzfl 2, target
+# FIXME: bdzfla 2, target
+# FIXME: bdzflrl 2
+
+# CHECK: blt 2, target                   # encoding: [0x41,0x88,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         blt 2, target
+# FIXME: blta 2, target
+# CHECK: bltlr 2                         # encoding: [0x4d,0x88,0x00,0x20]
+         bltlr 2
+# CHECK: bltctr 2                        # encoding: [0x4d,0x88,0x04,0x20]
+         bltctr 2
+# FIXME: bltl 2, target
+# FIXME: bltla 2, target
+# FIXME: bltlrl 2
+# CHECK: bltctrl 2                       # encoding: [0x4d,0x88,0x04,0x21]
+         bltctrl 2
+
+# CHECK: ble 2, target                   # encoding: [0x40,0x89,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         ble 2, target
+# FIXME: blea 2, target
+# CHECK: blelr 2                         # encoding: [0x4c,0x89,0x00,0x20]
+         blelr 2
+# CHECK: blectr 2                        # encoding: [0x4c,0x89,0x04,0x20]
+         blectr 2
+# FIXME: blel 2, target
+# FIXME: blela 2, target
+# FIXME: blelrl 2
+# CHECK: blectrl 2                       # encoding: [0x4c,0x89,0x04,0x21]
+         blectrl 2
+
+# CHECK: beq 2, target                   # encoding: [0x41,0x8a,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         beq 2, target
+# FIXME: beqa 2, target
+# CHECK: beqlr 2                         # encoding: [0x4d,0x8a,0x00,0x20]
+         beqlr 2
+# CHECK: beqctr 2                        # encoding: [0x4d,0x8a,0x04,0x20]
+         beqctr 2
+# FIXME: beql 2, target
+# FIXME: beqla 2, target
+# FIXME: beqlrl 2
+# CHECK: beqctrl 2                       # encoding: [0x4d,0x8a,0x04,0x21]
+         beqctrl 2
+
+# CHECK: bge 2, target                   # encoding: [0x40,0x88,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bge 2, target
+# FIXME: bgea 2, target
+# CHECK: bgelr 2                         # encoding: [0x4c,0x88,0x00,0x20]
+         bgelr 2
+# CHECK: bgectr 2                        # encoding: [0x4c,0x88,0x04,0x20]
+         bgectr 2
+# FIXME: bgel 2, target
+# FIXME: bgela 2, target
+# FIXME: bgelrl 2
+# CHECK: bgectrl 2                       # encoding: [0x4c,0x88,0x04,0x21]
+         bgectrl 2
+
+# CHECK: bgt 2, target                   # encoding: [0x41,0x89,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bgt 2, target
+# FIXME: bgta 2, target
+# CHECK: bgtlr 2                         # encoding: [0x4d,0x89,0x00,0x20]
+         bgtlr 2
+# CHECK: bgtctr 2                        # encoding: [0x4d,0x89,0x04,0x20]
+         bgtctr 2
+# FIXME: bgtl 2, target
+# FIXME: bgtla 2, target
+# FIXME: bgtlrl 2
+# CHECK: bgtctrl 2                       # encoding: [0x4d,0x89,0x04,0x21]
+         bgtctrl 2
+
+# CHECK: bge 2, target                   # encoding: [0x40,0x88,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnl 2, target
+# FIXME: bnla 2, target
+# CHECK: bgelr 2                         # encoding: [0x4c,0x88,0x00,0x20]
+         bnllr 2
+# CHECK: bgectr 2                        # encoding: [0x4c,0x88,0x04,0x20]
+         bnlctr 2
+# FIXME: bnll 2, target
+# FIXME: bnlla 2, target
+# FIXME: bnllrl 2
+# CHECK: bgectrl 2                       # encoding: [0x4c,0x88,0x04,0x21]
+         bnlctrl 2
+
+# CHECK: bne 2, target                   # encoding: [0x40,0x8a,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bne 2, target
+# FIXME: bnea 2, target
+# CHECK: bnelr 2                         # encoding: [0x4c,0x8a,0x00,0x20]
+         bnelr 2
+# CHECK: bnectr 2                        # encoding: [0x4c,0x8a,0x04,0x20]
+         bnectr 2
+# FIXME: bnel 2, target
+# FIXME: bnela 2, target
+# FIXME: bnelrl 2
+# CHECK: bnectrl 2                       # encoding: [0x4c,0x8a,0x04,0x21]
+         bnectrl 2
+
+# CHECK: ble 2, target                   # encoding: [0x40,0x89,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bng 2, target
+# FIXME: bnga 2, target
+# CHECK: blelr 2                         # encoding: [0x4c,0x89,0x00,0x20]
+         bnglr 2
+# CHECK: blectr 2                        # encoding: [0x4c,0x89,0x04,0x20]
+         bngctr 2
+# FIXME: bngl 2, target
+# FIXME: bngla 2, target
+# FIXME: bnglrl 2
+# CHECK: blectrl 2                       # encoding: [0x4c,0x89,0x04,0x21]
+         bngctrl 2
+
+# CHECK: bun 2, target                   # encoding: [0x41,0x8b,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bso 2, target
+# FIXME: bsoa 2, target
+# CHECK: bunlr 2                         # encoding: [0x4d,0x8b,0x00,0x20]
+         bsolr 2
+# CHECK: bunctr 2                        # encoding: [0x4d,0x8b,0x04,0x20]
+         bsoctr 2
+# FIXME: bsol 2, target
+# FIXME: bsola 2, target
+# FIXME: bsolrl 2
+# CHECK: bunctrl 2                       # encoding: [0x4d,0x8b,0x04,0x21]
+         bsoctrl 2
+
+# CHECK: bnu 2, target                   # encoding: [0x40,0x8b,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bns 2, target
+# FIXME: bnsa 2, target
+# CHECK: bnulr 2                         # encoding: [0x4c,0x8b,0x00,0x20]
+         bnslr 2
+# CHECK: bnuctr 2                        # encoding: [0x4c,0x8b,0x04,0x20]
+         bnsctr 2
+# FIXME: bnsl 2, target
+# FIXME: bnsla 2, target
+# FIXME: bnslrl 2
+# CHECK: bnuctrl 2                       # encoding: [0x4c,0x8b,0x04,0x21]
+         bnsctrl 2
+
+# CHECK: bun 2, target                   # encoding: [0x41,0x8b,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bun 2, target
+# FIXME: buna 2, target
+# CHECK: bunlr 2                         # encoding: [0x4d,0x8b,0x00,0x20]
+         bunlr 2
+# CHECK: bunctr 2                        # encoding: [0x4d,0x8b,0x04,0x20]
+         bunctr 2
+# FIXME: bunl 2, target
+# FIXME: bunla 2, target
+# FIXME: bunlrl 2
+# CHECK: bunctrl 2                       # encoding: [0x4d,0x8b,0x04,0x21]
+         bunctrl 2
+
+# CHECK: bnu 2, target                   # encoding: [0x40,0x8b,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_brcond14
+         bnu 2, target
+# FIXME: bnua 2, target
+# CHECK: bnulr 2                         # encoding: [0x4c,0x8b,0x00,0x20]
+         bnulr 2
+# CHECK: bnuctr 2                        # encoding: [0x4c,0x8b,0x04,0x20]
+         bnuctr 2
+# FIXME: bnul 2, target
+# FIXME: bnula 2, target
+# FIXME: bnulrl 2
+# CHECK: bnuctrl 2                       # encoding: [0x4c,0x8b,0x04,0x21]
+         bnuctrl 2
+
+# FIXME: Condition register logical mnemonics
+
+# FIXME: Subtract mnemonics
+
+# Compare mnemonics
+
+# CHECK: cmpdi 2, 3, 128                 # encoding: [0x2d,0x23,0x00,0x80]
+         cmpdi 2, 3, 128
+# CHECK: cmpd 2, 3, 4                    # encoding: [0x7d,0x23,0x20,0x00]
+         cmpd 2, 3, 4
+# CHECK: cmpldi 2, 3, 128                # encoding: [0x29,0x23,0x00,0x80]
+         cmpldi 2, 3, 128
+# CHECK: cmpld 2, 3, 4                   # encoding: [0x7d,0x23,0x20,0x40]
+         cmpld 2, 3, 4
+
+# CHECK: cmpwi 2, 3, 128                 # encoding: [0x2d,0x03,0x00,0x80]
+         cmpwi 2, 3, 128
+# CHECK: cmpw 2, 3, 4                    # encoding: [0x7d,0x03,0x20,0x00]
+         cmpw 2, 3, 4
+# CHECK: cmplwi 2, 3, 128                # encoding: [0x29,0x03,0x00,0x80]
+         cmplwi 2, 3, 128
+# CHECK: cmplw 2, 3, 4                   # encoding: [0x7d,0x03,0x20,0x40]
+         cmplw 2, 3, 4
+
+# FIXME: Trap mnemonics
+
+# Rotate and shift mnemonics
+
+# FIXME: extldi 2, 3, 4, 5
+# FIXME: extrdi 2, 3, 4, 5
+# FIXME: insrdi 2, 3, 4, 5
+# FIXME: rotldi 2, 3, 4
+# FIXME: rotrdi 2, 3, 4
+# FIXME: rotld 2, 3, 4
+# CHECK: sldi 2, 3, 4                    # encoding: [0x78,0x62,0x26,0xe4]
+         sldi 2, 3, 4
+# CHECK: rldicl 2, 3, 60, 4              # encoding: [0x78,0x62,0xe1,0x02]
+         srdi 2, 3, 4
+# FIXME: clrldi 2, 3, 4
+# FIXME: clrrdi 2, 3, 4
+# FIXME: clrlsldi 2, 3, 4, 5
+
+# FIXME: extlwi 2, 3, 4, 5
+# FIXME: extrwi 2, 3, 4, 5
+# FIXME: inslwi 2, 3, 4, 5
+# FIXME: insrwi 2, 3, 4, 5
+# FIXME: rotlwi 2, 3, 4
+# FIXME: rotrwi 2, 3, 4
+# FIXME: rotlw 2, 3, 4
+# CHECK: slwi 2, 3, 4                    # encoding: [0x54,0x62,0x20,0x36]
+         slwi 2, 3, 4
+# CHECK: srwi 2, 3, 4                    # encoding: [0x54,0x62,0xe1,0x3e]
+         srwi 2, 3, 4
+# FIXME: clrlwi 2, 3, 4
+# FIXME: clrrwi 2, 3, 4
+# FIXME: clrlslwi 2, 3, 4, 5
+
+# Move to/from special purpose register mnemonics
+
+# FIXME: mtxer 2
+# FIXME: mfxer 2
+# CHECK: mtlr 2                          # encoding: [0x7c,0x48,0x03,0xa6]
+         mtlr 2
+# CHECK: mflr 2                          # encoding: [0x7c,0x48,0x02,0xa6]
+         mflr 2
+# CHECK: mtctr 2                         # encoding: [0x7c,0x49,0x03,0xa6]
+         mtctr 2
+# CHECK: mfctr 2                         # encoding: [0x7c,0x49,0x02,0xa6]
+         mfctr 2
+
+# Miscellaneous mnemonics
+
+# CHECK: nop                             # encoding: [0x60,0x00,0x00,0x00]
+         nop
+# FIXME: xnop
+# CHECK: li 2, 128                       # encoding: [0x38,0x40,0x00,0x80]
+         li 2, 128
+# CHECK: lis 2, 128                      # encoding: [0x3c,0x40,0x00,0x80]
+         lis 2, 128
+# FIXME: la 2, 128(4)
+# CHECK: mr 2, 3                         # encoding: [0x7c,0x62,0x1b,0x78]
+         mr 2, 3
+# FIXME: not 2, 3
+
diff --git a/test/MC/PowerPC/ppc64-encoding-fp.s b/test/MC/PowerPC/ppc64-encoding-fp.s
new file mode 100644
index 0000000..ae0e286
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-encoding-fp.s
@@ -0,0 +1,263 @@
+
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
+
+# Floating-point facility
+
+# Floating-point load instructions
+
+# CHECK: lfs 2, 128(4)                   # encoding: [0xc0,0x44,0x00,0x80]
+         lfs 2, 128(4)
+# CHECK: lfsx 2, 3, 4                    # encoding: [0x7c,0x43,0x24,0x2e]
+         lfsx 2, 3, 4
+# CHECK: lfsu 2, 128(4)                  # encoding: [0xc4,0x44,0x00,0x80]
+         lfsu 2, 128(4)
+# CHECK: lfsux 2, 3, 4                   # encoding: [0x7c,0x43,0x24,0x6e]
+         lfsux 2, 3, 4
+# CHECK: lfd 2, 128(4)                   # encoding: [0xc8,0x44,0x00,0x80]
+         lfd 2, 128(4)
+# CHECK: lfdx 2, 3, 4                    # encoding: [0x7c,0x43,0x24,0xae]
+         lfdx 2, 3, 4
+# CHECK: lfdu 2, 128(4)                  # encoding: [0xcc,0x44,0x00,0x80]
+         lfdu 2, 128(4)
+# CHECK: lfdux 2, 3, 4                   # encoding: [0x7c,0x43,0x24,0xee]
+         lfdux 2, 3, 4
+# CHECK: lfiwax 2, 3, 4                  # encoding: [0x7c,0x43,0x26,0xae]
+         lfiwax 2, 3, 4
+# CHECK: lfiwzx 2, 3, 4                  # encoding: [0x7c,0x43,0x26,0xee]
+         lfiwzx 2, 3, 4
+
+# Floating-point store instructions
+
+# CHECK: stfs 2, 128(4)                  # encoding: [0xd0,0x44,0x00,0x80]
+         stfs 2, 128(4)
+# CHECK: stfsx 2, 3, 4                   # encoding: [0x7c,0x43,0x25,0x2e]
+         stfsx 2, 3, 4
+# CHECK: stfsu 2, 128(4)                 # encoding: [0xd4,0x44,0x00,0x80]
+         stfsu 2, 128(4)
+# CHECK: stfsux 2, 3, 4                  # encoding: [0x7c,0x43,0x25,0x6e]
+         stfsux 2, 3, 4
+# CHECK: stfd 2, 128(4)                  # encoding: [0xd8,0x44,0x00,0x80]
+         stfd 2, 128(4)
+# CHECK: stfdx 2, 3, 4                   # encoding: [0x7c,0x43,0x25,0xae]
+         stfdx 2, 3, 4
+# CHECK: stfdu 2, 128(4)                 # encoding: [0xdc,0x44,0x00,0x80]
+         stfdu 2, 128(4)
+# CHECK: stfdux 2, 3, 4                  # encoding: [0x7c,0x43,0x25,0xee]
+         stfdux 2, 3, 4
+# CHECK: stfiwx 2, 3, 4                  # encoding: [0x7c,0x43,0x27,0xae]
+         stfiwx 2, 3, 4
+
+# Floating-point move instructions
+
+# CHECK: fmr 2, 3                        # encoding: [0xfc,0x40,0x18,0x90]
+         fmr 2, 3
+# CHECK: fmr. 2, 3                       # encoding: [0xfc,0x40,0x18,0x91]
+         fmr. 2, 3
+# CHECK: fneg 2, 3                       # encoding: [0xfc,0x40,0x18,0x50]
+         fneg 2, 3
+# CHECK: fneg. 2, 3                      # encoding: [0xfc,0x40,0x18,0x51]
+         fneg. 2, 3
+# CHECK: fabs 2, 3                       # encoding: [0xfc,0x40,0x1a,0x10]
+         fabs 2, 3
+# CHECK: fabs. 2, 3                      # encoding: [0xfc,0x40,0x1a,0x11]
+         fabs. 2, 3
+# CHECK: fnabs 2, 3                      # encoding: [0xfc,0x40,0x19,0x10]
+         fnabs 2, 3
+# CHECK: fnabs. 2, 3                     # encoding: [0xfc,0x40,0x19,0x11]
+         fnabs. 2, 3
+# FIXME: fcpsgn 2, 3
+# FIXME: fcpsgn. 2, 3
+
+# Floating-point arithmetic instructions
+
+# CHECK: fadd 2, 3, 4                    # encoding: [0xfc,0x43,0x20,0x2a]
+         fadd 2, 3, 4
+# CHECK: fadd. 2, 3, 4                   # encoding: [0xfc,0x43,0x20,0x2b]
+         fadd. 2, 3, 4
+# CHECK: fadds 2, 3, 4                   # encoding: [0xec,0x43,0x20,0x2a]
+         fadds 2, 3, 4
+# CHECK: fadds. 2, 3, 4                  # encoding: [0xec,0x43,0x20,0x2b]
+         fadds. 2, 3, 4
+# CHECK: fsub 2, 3, 4                    # encoding: [0xfc,0x43,0x20,0x28]
+         fsub 2, 3, 4
+# CHECK: fsub. 2, 3, 4                   # encoding: [0xfc,0x43,0x20,0x29]
+         fsub. 2, 3, 4
+# CHECK: fsubs 2, 3, 4                   # encoding: [0xec,0x43,0x20,0x28]
+         fsubs 2, 3, 4
+# CHECK: fsubs. 2, 3, 4                  # encoding: [0xec,0x43,0x20,0x29]
+         fsubs. 2, 3, 4
+
+# CHECK: fmul 2, 3, 4                    # encoding: [0xfc,0x43,0x01,0x32]
+         fmul 2, 3, 4
+# CHECK: fmul. 2, 3, 4                   # encoding: [0xfc,0x43,0x01,0x33]
+         fmul. 2, 3, 4
+# CHECK: fmuls 2, 3, 4                   # encoding: [0xec,0x43,0x01,0x32]
+         fmuls 2, 3, 4
+# CHECK: fmuls. 2, 3, 4                  # encoding: [0xec,0x43,0x01,0x33]
+         fmuls. 2, 3, 4
+# CHECK: fdiv 2, 3, 4                    # encoding: [0xfc,0x43,0x20,0x24]
+         fdiv 2, 3, 4
+# CHECK: fdiv. 2, 3, 4                   # encoding: [0xfc,0x43,0x20,0x25]
+         fdiv. 2, 3, 4
+# CHECK: fdivs 2, 3, 4                   # encoding: [0xec,0x43,0x20,0x24]
+         fdivs 2, 3, 4
+# CHECK: fdivs. 2, 3, 4                  # encoding: [0xec,0x43,0x20,0x25]
+         fdivs. 2, 3, 4
+# CHECK: fsqrt 2, 3                      # encoding: [0xfc,0x40,0x18,0x2c]
+         fsqrt 2, 3
+# CHECK: fsqrt. 2, 3                     # encoding: [0xfc,0x40,0x18,0x2d]
+         fsqrt. 2, 3
+# CHECK: fsqrts 2, 3                     # encoding: [0xec,0x40,0x18,0x2c]
+         fsqrts 2, 3
+# CHECK: fsqrts. 2, 3                    # encoding: [0xec,0x40,0x18,0x2d]
+         fsqrts. 2, 3
+
+# CHECK: fre 2, 3                        # encoding: [0xfc,0x40,0x18,0x30]
+         fre 2, 3
+# CHECK: fre. 2, 3                       # encoding: [0xfc,0x40,0x18,0x31]
+         fre. 2, 3
+# CHECK: fres 2, 3                       # encoding: [0xec,0x40,0x18,0x30]
+         fres 2, 3
+# CHECK: fres. 2, 3                      # encoding: [0xec,0x40,0x18,0x31]
+         fres. 2, 3
+# CHECK: frsqrte 2, 3                    # encoding: [0xfc,0x40,0x18,0x34]
+         frsqrte 2, 3
+# CHECK: frsqrte. 2, 3                   # encoding: [0xfc,0x40,0x18,0x35]
+         frsqrte. 2, 3
+# CHECK: frsqrtes 2, 3                   # encoding: [0xec,0x40,0x18,0x34]
+         frsqrtes 2, 3
+# CHECK: frsqrtes. 2, 3                  # encoding: [0xec,0x40,0x18,0x35]
+         frsqrtes. 2, 3
+# FIXME: ftdiv 2, 3, 4
+# FIXME: ftsqrt 2, 3, 4
+
+# CHECK: fmadd 2, 3, 4, 5                # encoding: [0xfc,0x43,0x29,0x3a]
+         fmadd 2, 3, 4, 5
+# CHECK: fmadd. 2, 3, 4, 5               # encoding: [0xfc,0x43,0x29,0x3b]
+         fmadd. 2, 3, 4, 5
+# CHECK: fmadds 2, 3, 4, 5               # encoding: [0xec,0x43,0x29,0x3a]
+         fmadds 2, 3, 4, 5
+# CHECK: fmadds. 2, 3, 4, 5              # encoding: [0xec,0x43,0x29,0x3b]
+         fmadds. 2, 3, 4, 5
+# CHECK: fmsub 2, 3, 4, 5                # encoding: [0xfc,0x43,0x29,0x38]
+         fmsub 2, 3, 4, 5
+# CHECK: fmsub. 2, 3, 4, 5               # encoding: [0xfc,0x43,0x29,0x39]
+         fmsub. 2, 3, 4, 5
+# CHECK: fmsubs 2, 3, 4, 5               # encoding: [0xec,0x43,0x29,0x38]
+         fmsubs 2, 3, 4, 5
+# CHECK: fmsubs. 2, 3, 4, 5              # encoding: [0xec,0x43,0x29,0x39]
+         fmsubs. 2, 3, 4, 5
+# CHECK: fnmadd 2, 3, 4, 5               # encoding: [0xfc,0x43,0x29,0x3e]
+         fnmadd 2, 3, 4, 5
+# CHECK: fnmadd. 2, 3, 4, 5              # encoding: [0xfc,0x43,0x29,0x3f]
+         fnmadd. 2, 3, 4, 5
+# CHECK: fnmadds 2, 3, 4, 5              # encoding: [0xec,0x43,0x29,0x3e]
+         fnmadds 2, 3, 4, 5
+# CHECK: fnmadds. 2, 3, 4, 5             # encoding: [0xec,0x43,0x29,0x3f]
+         fnmadds. 2, 3, 4, 5
+# CHECK: fnmsub 2, 3, 4, 5               # encoding: [0xfc,0x43,0x29,0x3c]
+         fnmsub 2, 3, 4, 5
+# CHECK: fnmsub. 2, 3, 4, 5              # encoding: [0xfc,0x43,0x29,0x3d]
+         fnmsub. 2, 3, 4, 5
+# CHECK: fnmsubs 2, 3, 4, 5              # encoding: [0xec,0x43,0x29,0x3c]
+         fnmsubs 2, 3, 4, 5
+# CHECK: fnmsubs. 2, 3, 4, 5             # encoding: [0xec,0x43,0x29,0x3d]
+         fnmsubs. 2, 3, 4, 5
+
+# Floating-point rounding and conversion instructions
+
+# CHECK: frsp 2, 3                       # encoding: [0xfc,0x40,0x18,0x18]
+         frsp 2, 3
+# CHECK: frsp. 2, 3                      # encoding: [0xfc,0x40,0x18,0x19]
+         frsp. 2, 3
+
+# FIXME: fctid 2, 3
+# FIXME: fctid. 2, 3
+# CHECK: fctidz 2, 3                     # encoding: [0xfc,0x40,0x1e,0x5e]
+         fctidz 2, 3
+# CHECK: fctidz. 2, 3                    # encoding: [0xfc,0x40,0x1e,0x5f]
+         fctidz. 2, 3
+# FIXME: fctidu 2, 3
+# FIXME: fctidu. 2, 3
+# CHECK: fctiduz 2, 3                    # encoding: [0xfc,0x40,0x1f,0x5e]
+         fctiduz 2, 3
+# CHECK: fctiduz. 2, 3                   # encoding: [0xfc,0x40,0x1f,0x5f]
+         fctiduz. 2, 3
+# FIXME: fctiw 2, 3
+# FIXME: fctiw. 2, 3
+# CHECK: fctiwz 2, 3                     # encoding: [0xfc,0x40,0x18,0x1e]
+         fctiwz 2, 3
+# CHECK: fctiwz. 2, 3                    # encoding: [0xfc,0x40,0x18,0x1f]
+         fctiwz. 2, 3
+# FIXME: fctiwu 2, 3
+# FIXME: fctiwu. 2, 3
+# CHECK: fctiwuz 2, 3                    # encoding: [0xfc,0x40,0x19,0x1e]
+         fctiwuz 2, 3
+# CHECK: fctiwuz. 2, 3                   # encoding: [0xfc,0x40,0x19,0x1f]
+         fctiwuz. 2, 3
+# CHECK: fcfid 2, 3                      # encoding: [0xfc,0x40,0x1e,0x9c]
+         fcfid 2, 3
+# CHECK: fcfid. 2, 3                     # encoding: [0xfc,0x40,0x1e,0x9d]
+         fcfid. 2, 3
+# CHECK: fcfidu 2, 3                     # encoding: [0xfc,0x40,0x1f,0x9c]
+         fcfidu 2, 3
+# CHECK: fcfidu. 2, 3                    # encoding: [0xfc,0x40,0x1f,0x9d]
+         fcfidu. 2, 3
+# CHECK: fcfids 2, 3                     # encoding: [0xec,0x40,0x1e,0x9c]
+         fcfids 2, 3
+# CHECK: fcfids. 2, 3                    # encoding: [0xec,0x40,0x1e,0x9d]
+         fcfids. 2, 3
+# CHECK: fcfidus 2, 3                    # encoding: [0xec,0x40,0x1f,0x9c]
+         fcfidus 2, 3
+# CHECK: fcfidus. 2, 3                   # encoding: [0xec,0x40,0x1f,0x9d]
+         fcfidus. 2, 3
+# CHECK: frin 2, 3                       # encoding: [0xfc,0x40,0x1b,0x10]
+         frin 2, 3
+# CHECK: frin. 2, 3                      # encoding: [0xfc,0x40,0x1b,0x11]
+         frin. 2, 3
+# CHECK: frip 2, 3                       # encoding: [0xfc,0x40,0x1b,0x90]
+         frip 2, 3
+# CHECK: frip. 2, 3                      # encoding: [0xfc,0x40,0x1b,0x91]
+         frip. 2, 3
+# CHECK: friz 2, 3                       # encoding: [0xfc,0x40,0x1b,0x50]
+         friz 2, 3
+# CHECK: friz. 2, 3                      # encoding: [0xfc,0x40,0x1b,0x51]
+         friz. 2, 3
+# CHECK: frim 2, 3                       # encoding: [0xfc,0x40,0x1b,0xd0]
+         frim 2, 3
+# CHECK: frim. 2, 3                      # encoding: [0xfc,0x40,0x1b,0xd1]
+         frim. 2, 3
+
+# Floating-point compare instructions
+
+# CHECK: fcmpu 2, 3, 4                   # encoding: [0xfd,0x03,0x20,0x00]
+         fcmpu 2, 3, 4
+# FIXME: fcmpo 2, 3, 4
+
+# Floating-point select instruction
+
+# CHECK: fsel 2, 3, 4, 5                 # encoding: [0xfc,0x43,0x29,0x2e]
+         fsel 2, 3, 4, 5
+# CHECK: fsel. 2, 3, 4, 5                # encoding: [0xfc,0x43,0x29,0x2f]
+         fsel. 2, 3, 4, 5
+
+# Floating-point status and control register instructions
+
+# CHECK: mffs 2                          # encoding: [0xfc,0x40,0x04,0x8e]
+         mffs 2
+# FIXME: mffs. 2
+
+# FIXME: mcrfs 2, 3
+
+# FIXME: mtfsfi 2, 3, 1
+# FIXME: mtfsfi. 2, 3, 1
+# FIXME: mtfsf 2, 3, 1, 1
+# FIXME: mtfsf. 2, 3, 1, 1
+
+# CHECK: mtfsb0 31                       # encoding: [0xff,0xe0,0x00,0x8c]
+         mtfsb0 31
+# FIXME: mtfsb0. 31
+# CHECK: mtfsb1 31                       # encoding: [0xff,0xe0,0x00,0x4c]
+         mtfsb1 31
+# FIXME: mtfsb1. 31
+
diff --git a/test/MC/PowerPC/ppc64-encoding-vmx.s b/test/MC/PowerPC/ppc64-encoding-vmx.s
new file mode 100644
index 0000000..0154076
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-encoding-vmx.s
@@ -0,0 +1,384 @@
+
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
+
+# Vector facility
+
+# Vector storage access instructions
+
+# CHECK: lvebx 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0x0e]
+         lvebx 2, 3, 4
+# CHECK: lvehx 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0x4e]
+         lvehx 2, 3, 4
+# CHECK: lvewx 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0x8e]
+         lvewx 2, 3, 4
+# CHECK: lvx 2, 3, 4                     # encoding: [0x7c,0x43,0x20,0xce]
+         lvx 2, 3, 4
+# CHECK: lvxl 2, 3, 4                    # encoding: [0x7c,0x43,0x22,0xce]
+         lvxl 2, 3, 4
+# CHECK: stvebx 2, 3, 4                  # encoding: [0x7c,0x43,0x21,0x0e]
+         stvebx 2, 3, 4
+# CHECK: stvehx 2, 3, 4                  # encoding: [0x7c,0x43,0x21,0x4e]
+         stvehx 2, 3, 4
+# CHECK: stvewx 2, 3, 4                  # encoding: [0x7c,0x43,0x21,0x8e]
+         stvewx 2, 3, 4
+# CHECK: stvx 2, 3, 4                    # encoding: [0x7c,0x43,0x21,0xce]
+         stvx 2, 3, 4
+# CHECK: stvxl 2, 3, 4                   # encoding: [0x7c,0x43,0x23,0xce]
+         stvxl 2, 3, 4
+# CHECK: lvsl 2, 3, 4                    # encoding: [0x7c,0x43,0x20,0x0c]
+         lvsl 2, 3, 4
+# CHECK: lvsr 2, 3, 4                    # encoding: [0x7c,0x43,0x20,0x4c]
+         lvsr 2, 3, 4
+
+# Vector permute and formatting instructions
+
+# CHECK: vpkpx 2, 3, 4                   # encoding: [0x10,0x43,0x23,0x0e]
+         vpkpx 2, 3, 4
+# CHECK: vpkshss 2, 3, 4                 # encoding: [0x10,0x43,0x21,0x8e]
+         vpkshss 2, 3, 4
+# CHECK: vpkshus 2, 3, 4                 # encoding: [0x10,0x43,0x21,0x0e]
+         vpkshus 2, 3, 4
+# CHECK: vpkswss 2, 3, 4                 # encoding: [0x10,0x43,0x21,0xce]
+         vpkswss 2, 3, 4
+# CHECK: vpkswus 2, 3, 4                 # encoding: [0x10,0x43,0x21,0x4e]
+         vpkswus 2, 3, 4
+# CHECK: vpkuhum 2, 3, 4                 # encoding: [0x10,0x43,0x20,0x0e]
+         vpkuhum 2, 3, 4
+# CHECK: vpkuhus 2, 3, 4                 # encoding: [0x10,0x43,0x20,0x8e]
+         vpkuhus 2, 3, 4
+# CHECK: vpkuwum 2, 3, 4                 # encoding: [0x10,0x43,0x20,0x4e]
+         vpkuwum 2, 3, 4
+# CHECK: vpkuwus 2, 3, 4                 # encoding: [0x10,0x43,0x20,0xce]
+         vpkuwus 2, 3, 4
+
+# CHECK: vupkhpx 2, 3                    # encoding: [0x10,0x40,0x1b,0x4e]
+         vupkhpx 2, 3
+# CHECK: vupkhsb 2, 3                    # encoding: [0x10,0x40,0x1a,0x0e]
+         vupkhsb 2, 3
+# CHECK: vupkhsh 2, 3                    # encoding: [0x10,0x40,0x1a,0x4e]
+         vupkhsh 2, 3
+# CHECK: vupklpx 2, 3                    # encoding: [0x10,0x40,0x1b,0xce]
+         vupklpx 2, 3
+# CHECK: vupklsb 2, 3                    # encoding: [0x10,0x40,0x1a,0x8e]
+         vupklsb 2, 3
+# CHECK: vupklsh 2, 3                    # encoding: [0x10,0x40,0x1a,0xce]
+         vupklsh 2, 3
+
+# CHECK: vmrghb 2, 3, 4                  # encoding: [0x10,0x43,0x20,0x0c]
+         vmrghb 2, 3, 4
+# CHECK: vmrghh 2, 3, 4                  # encoding: [0x10,0x43,0x20,0x4c]
+         vmrghh 2, 3, 4
+# CHECK: vmrghw 2, 3, 4                  # encoding: [0x10,0x43,0x20,0x8c]
+         vmrghw 2, 3, 4
+# CHECK: vmrglb 2, 3, 4                  # encoding: [0x10,0x43,0x21,0x0c]
+         vmrglb 2, 3, 4
+# CHECK: vmrglh 2, 3, 4                  # encoding: [0x10,0x43,0x21,0x4c]
+         vmrglh 2, 3, 4
+# CHECK: vmrglw 2, 3, 4                  # encoding: [0x10,0x43,0x21,0x8c]
+         vmrglw 2, 3, 4
+
+# CHECK: vspltb 2, 3, 1                  # encoding: [0x10,0x41,0x1a,0x0c]
+         vspltb 2, 3, 1
+# CHECK: vsplth 2, 3, 1                  # encoding: [0x10,0x41,0x1a,0x4c]
+         vsplth 2, 3, 1
+# CHECK: vspltw 2, 3, 1                  # encoding: [0x10,0x41,0x1a,0x8c]
+         vspltw 2, 3, 1
+# CHECK: vspltisb 2, 3                   # encoding: [0x10,0x43,0x03,0x0c]
+         vspltisb 2, 3
+# CHECK: vspltish 2, 3                   # encoding: [0x10,0x43,0x03,0x4c]
+         vspltish 2, 3
+# CHECK: vspltisw 2, 3                   # encoding: [0x10,0x43,0x03,0x8c]
+         vspltisw 2, 3
+
+# CHECK: vperm 2, 3, 4, 5                # encoding: [0x10,0x43,0x21,0x6b]
+         vperm 2, 3, 4, 5
+# CHECK: vsel 2, 3, 4, 5                 # encoding: [0x10,0x43,0x21,0x6a]
+         vsel 2, 3, 4, 5
+
+# CHECK: vsl 2, 3, 4                     # encoding: [0x10,0x43,0x21,0xc4]
+         vsl 2, 3, 4
+# CHECK: vsldoi 2, 3, 4, 5               # encoding: [0x10,0x43,0x21,0x6c]
+         vsldoi 2, 3, 4, 5
+# CHECK: vslo 2, 3, 4                    # encoding: [0x10,0x43,0x24,0x0c]
+         vslo 2, 3, 4
+# CHECK: vsr 2, 3, 4                     # encoding: [0x10,0x43,0x22,0xc4]
+         vsr 2, 3, 4
+# CHECK: vsro 2, 3, 4                    # encoding: [0x10,0x43,0x24,0x4c]
+         vsro 2, 3, 4
+
+# Vector integer arithmetic instructions
+
+# CHECK: vaddcuw 2, 3, 4                 # encoding: [0x10,0x43,0x21,0x80]
+         vaddcuw 2, 3, 4
+# CHECK: vaddsbs 2, 3, 4                 # encoding: [0x10,0x43,0x23,0x00]
+         vaddsbs 2, 3, 4
+# CHECK: vaddshs 2, 3, 4                 # encoding: [0x10,0x43,0x23,0x40]
+         vaddshs 2, 3, 4
+# CHECK: vaddsws 2, 3, 4                 # encoding: [0x10,0x43,0x23,0x80]
+         vaddsws 2, 3, 4
+# CHECK: vaddubm 2, 3, 4                 # encoding: [0x10,0x43,0x20,0x00]
+         vaddubm 2, 3, 4
+# CHECK: vadduhm 2, 3, 4                 # encoding: [0x10,0x43,0x20,0x40]
+         vadduhm 2, 3, 4
+# CHECK: vadduwm 2, 3, 4                 # encoding: [0x10,0x43,0x20,0x80]
+         vadduwm 2, 3, 4
+# CHECK: vaddubs 2, 3, 4                 # encoding: [0x10,0x43,0x22,0x00]
+         vaddubs 2, 3, 4
+# CHECK: vadduhs 2, 3, 4                 # encoding: [0x10,0x43,0x22,0x40]
+         vadduhs 2, 3, 4
+# CHECK: vadduws 2, 3, 4                 # encoding: [0x10,0x43,0x22,0x80]
+         vadduws 2, 3, 4
+
+# CHECK: vsubcuw 2, 3, 4                 # encoding: [0x10,0x43,0x25,0x80]
+         vsubcuw 2, 3, 4
+# CHECK: vsubsbs 2, 3, 4                 # encoding: [0x10,0x43,0x27,0x00]
+         vsubsbs 2, 3, 4
+# CHECK: vsubshs 2, 3, 4                 # encoding: [0x10,0x43,0x27,0x40]
+         vsubshs 2, 3, 4
+# CHECK: vsubsws 2, 3, 4                 # encoding: [0x10,0x43,0x27,0x80]
+         vsubsws 2, 3, 4
+# CHECK: vsububm 2, 3, 4                 # encoding: [0x10,0x43,0x24,0x00]
+         vsububm 2, 3, 4
+# CHECK: vsubuhm 2, 3, 4                 # encoding: [0x10,0x43,0x24,0x40]
+         vsubuhm 2, 3, 4
+# CHECK: vsubuwm 2, 3, 4                 # encoding: [0x10,0x43,0x24,0x80]
+         vsubuwm 2, 3, 4
+# CHECK: vsububs 2, 3, 4                 # encoding: [0x10,0x43,0x26,0x00]
+         vsububs 2, 3, 4
+# CHECK: vsubuhs 2, 3, 4                 # encoding: [0x10,0x43,0x26,0x40]
+         vsubuhs 2, 3, 4
+# CHECK: vsubuws 2, 3, 4                 # encoding: [0x10,0x43,0x26,0x80]
+         vsubuws 2, 3, 4
+
+# CHECK: vmulesb 2, 3, 4                 # encoding: [0x10,0x43,0x23,0x08]
+         vmulesb 2, 3, 4
+# CHECK: vmulesh 2, 3, 4                 # encoding: [0x10,0x43,0x23,0x48]
+         vmulesh 2, 3, 4
+# CHECK: vmuleub 2, 3, 4                 # encoding: [0x10,0x43,0x22,0x08]
+         vmuleub 2, 3, 4
+# CHECK: vmuleuh 2, 3, 4                 # encoding: [0x10,0x43,0x22,0x48]
+         vmuleuh 2, 3, 4
+# CHECK: vmulosb 2, 3, 4                 # encoding: [0x10,0x43,0x21,0x08]
+         vmulosb 2, 3, 4
+# CHECK: vmulosh 2, 3, 4                 # encoding: [0x10,0x43,0x21,0x48]
+         vmulosh 2, 3, 4
+# CHECK: vmuloub 2, 3, 4                 # encoding: [0x10,0x43,0x20,0x08]
+         vmuloub 2, 3, 4
+# CHECK: vmulouh 2, 3, 4                 # encoding: [0x10,0x43,0x20,0x48]
+         vmulouh 2, 3, 4
+
+# CHECK: vmhaddshs 2, 3, 4, 5            # encoding: [0x10,0x43,0x21,0x60]
+         vmhaddshs 2, 3, 4, 5
+# CHECK: vmhraddshs 2, 3, 4, 5           # encoding: [0x10,0x43,0x21,0x61]
+         vmhraddshs 2, 3, 4, 5
+# CHECK: vmladduhm 2, 3, 4, 5            # encoding: [0x10,0x43,0x21,0x62]
+         vmladduhm 2, 3, 4, 5
+# CHECK: vmsumubm 2, 3, 4, 5             # encoding: [0x10,0x43,0x21,0x64]
+         vmsumubm 2, 3, 4, 5
+# CHECK: vmsummbm 2, 3, 4, 5             # encoding: [0x10,0x43,0x21,0x65]
+         vmsummbm 2, 3, 4, 5
+# CHECK: vmsumshm 2, 3, 4, 5             # encoding: [0x10,0x43,0x21,0x68]
+         vmsumshm 2, 3, 4, 5
+# CHECK: vmsumshs 2, 3, 4, 5             # encoding: [0x10,0x43,0x21,0x69]
+         vmsumshs 2, 3, 4, 5
+# CHECK: vmsumuhm 2, 3, 4, 5             # encoding: [0x10,0x43,0x21,0x66]
+         vmsumuhm 2, 3, 4, 5
+# CHECK: vmsumuhs 2, 3, 4, 5             # encoding: [0x10,0x43,0x21,0x67]
+         vmsumuhs 2, 3, 4, 5
+
+# CHECK: vsumsws 2, 3, 4                 # encoding: [0x10,0x43,0x27,0x88]
+         vsumsws 2, 3, 4
+# CHECK: vsum2sws 2, 3, 4                # encoding: [0x10,0x43,0x26,0x88]
+         vsum2sws 2, 3, 4
+# CHECK: vsum4sbs 2, 3, 4                # encoding: [0x10,0x43,0x27,0x08]
+         vsum4sbs 2, 3, 4
+# CHECK: vsum4shs 2, 3, 4                # encoding: [0x10,0x43,0x26,0x48]
+         vsum4shs 2, 3, 4
+# CHECK: vsum4ubs 2, 3, 4                # encoding: [0x10,0x43,0x26,0x08]
+         vsum4ubs 2, 3, 4
+
+# CHECK: vavgsb 2, 3, 4                  # encoding: [0x10,0x43,0x25,0x02]
+         vavgsb 2, 3, 4
+# CHECK: vavgsh 2, 3, 4                  # encoding: [0x10,0x43,0x25,0x42]
+         vavgsh 2, 3, 4
+# CHECK: vavgsw 2, 3, 4                  # encoding: [0x10,0x43,0x25,0x82]
+         vavgsw 2, 3, 4
+# CHECK: vavgub 2, 3, 4                  # encoding: [0x10,0x43,0x24,0x02]
+         vavgub 2, 3, 4
+# CHECK: vavguh 2, 3, 4                  # encoding: [0x10,0x43,0x24,0x42]
+         vavguh 2, 3, 4
+# CHECK: vavguw 2, 3, 4                  # encoding: [0x10,0x43,0x24,0x82]
+         vavguw 2, 3, 4
+
+# CHECK: vmaxsb 2, 3, 4                  # encoding: [0x10,0x43,0x21,0x02]
+         vmaxsb 2, 3, 4
+# CHECK: vmaxsh 2, 3, 4                  # encoding: [0x10,0x43,0x21,0x42]
+         vmaxsh 2, 3, 4
+# CHECK: vmaxsw 2, 3, 4                  # encoding: [0x10,0x43,0x21,0x82]
+         vmaxsw 2, 3, 4
+# CHECK: vmaxub 2, 3, 4                  # encoding: [0x10,0x43,0x20,0x02]
+         vmaxub 2, 3, 4
+# CHECK: vmaxuh 2, 3, 4                  # encoding: [0x10,0x43,0x20,0x42]
+         vmaxuh 2, 3, 4
+# CHECK: vmaxuw 2, 3, 4                  # encoding: [0x10,0x43,0x20,0x82]
+         vmaxuw 2, 3, 4
+
+# CHECK: vminsb 2, 3, 4                  # encoding: [0x10,0x43,0x23,0x02]
+         vminsb 2, 3, 4
+# CHECK: vminsh 2, 3, 4                  # encoding: [0x10,0x43,0x23,0x42]
+         vminsh 2, 3, 4
+# CHECK: vminsw 2, 3, 4                  # encoding: [0x10,0x43,0x23,0x82]
+         vminsw 2, 3, 4
+# CHECK: vminub 2, 3, 4                  # encoding: [0x10,0x43,0x22,0x02]
+         vminub 2, 3, 4
+# CHECK: vminuh 2, 3, 4                  # encoding: [0x10,0x43,0x22,0x42]
+         vminuh 2, 3, 4
+# CHECK: vminuw 2, 3, 4                  # encoding: [0x10,0x43,0x22,0x82]
+         vminuw 2, 3, 4
+
+# Vector integer compare instructions
+
+# CHECK: vcmpequb 2, 3, 4                # encoding: [0x10,0x43,0x20,0x06]
+         vcmpequb 2, 3, 4
+# CHECK: vcmpequb. 2, 3, 4               # encoding: [0x10,0x43,0x24,0x06]
+         vcmpequb. 2, 3, 4
+# CHECK: vcmpequh 2, 3, 4                # encoding: [0x10,0x43,0x20,0x46]
+         vcmpequh 2, 3, 4
+# CHECK: vcmpequh. 2, 3, 4               # encoding: [0x10,0x43,0x24,0x46]
+         vcmpequh. 2, 3, 4
+# CHECK: vcmpequw 2, 3, 4                # encoding: [0x10,0x43,0x20,0x86]
+         vcmpequw 2, 3, 4
+# CHECK: vcmpequw. 2, 3, 4               # encoding: [0x10,0x43,0x24,0x86]
+         vcmpequw. 2, 3, 4
+# CHECK: vcmpgtsb 2, 3, 4                # encoding: [0x10,0x43,0x23,0x06]
+         vcmpgtsb 2, 3, 4
+# CHECK: vcmpgtsb. 2, 3, 4               # encoding: [0x10,0x43,0x27,0x06]
+         vcmpgtsb. 2, 3, 4
+# CHECK: vcmpgtsh 2, 3, 4                # encoding: [0x10,0x43,0x23,0x46]
+         vcmpgtsh 2, 3, 4
+# CHECK: vcmpgtsh. 2, 3, 4               # encoding: [0x10,0x43,0x27,0x46]
+         vcmpgtsh. 2, 3, 4
+# CHECK: vcmpgtsw 2, 3, 4                # encoding: [0x10,0x43,0x23,0x86]
+         vcmpgtsw 2, 3, 4
+# CHECK: vcmpgtsw. 2, 3, 4               # encoding: [0x10,0x43,0x27,0x86]
+         vcmpgtsw. 2, 3, 4
+# CHECK: vcmpgtub 2, 3, 4                # encoding: [0x10,0x43,0x22,0x06]
+         vcmpgtub 2, 3, 4
+# CHECK: vcmpgtub. 2, 3, 4               # encoding: [0x10,0x43,0x26,0x06]
+         vcmpgtub. 2, 3, 4
+# CHECK: vcmpgtuh 2, 3, 4                # encoding: [0x10,0x43,0x22,0x46]
+         vcmpgtuh 2, 3, 4
+# CHECK: vcmpgtuh. 2, 3, 4               # encoding: [0x10,0x43,0x26,0x46]
+         vcmpgtuh. 2, 3, 4
+# CHECK: vcmpgtuw 2, 3, 4                # encoding: [0x10,0x43,0x22,0x86]
+         vcmpgtuw 2, 3, 4
+# CHECK: vcmpgtuw. 2, 3, 4               # encoding: [0x10,0x43,0x26,0x86]
+         vcmpgtuw. 2, 3, 4
+
+# Vector integer logical instructions
+
+# CHECK: vand 2, 3, 4                    # encoding: [0x10,0x43,0x24,0x04]
+         vand 2, 3, 4
+# CHECK: vandc 2, 3, 4                   # encoding: [0x10,0x43,0x24,0x44]
+         vandc 2, 3, 4
+# CHECK: vnor 2, 3, 4                    # encoding: [0x10,0x43,0x25,0x04]
+         vnor 2, 3, 4
+# CHECK: vor 2, 3, 4                     # encoding: [0x10,0x43,0x24,0x84]
+         vor 2, 3, 4
+# CHECK: vxor 2, 3, 4                    # encoding: [0x10,0x43,0x24,0xc4]
+         vxor 2, 3, 4
+
+# Vector integer rotate and shift instructions
+
+# CHECK: vrlb 2, 3, 4                    # encoding: [0x10,0x43,0x20,0x04]
+         vrlb 2, 3, 4
+# CHECK: vrlh 2, 3, 4                    # encoding: [0x10,0x43,0x20,0x44]
+         vrlh 2, 3, 4
+# CHECK: vrlw 2, 3, 4                    # encoding: [0x10,0x43,0x20,0x84]
+         vrlw 2, 3, 4
+
+# CHECK: vslb 2, 3, 4                    # encoding: [0x10,0x43,0x21,0x04]
+         vslb 2, 3, 4
+# CHECK: vslh 2, 3, 4                    # encoding: [0x10,0x43,0x21,0x44]
+         vslh 2, 3, 4
+# CHECK: vslw 2, 3, 4                    # encoding: [0x10,0x43,0x21,0x84]
+         vslw 2, 3, 4
+# CHECK: vsrb 2, 3, 4                    # encoding: [0x10,0x43,0x22,0x04]
+         vsrb 2, 3, 4
+# CHECK: vsrh 2, 3, 4                    # encoding: [0x10,0x43,0x22,0x44]
+         vsrh 2, 3, 4
+# CHECK: vsrw 2, 3, 4                    # encoding: [0x10,0x43,0x22,0x84]
+         vsrw 2, 3, 4
+# CHECK: vsrab 2, 3, 4                   # encoding: [0x10,0x43,0x23,0x04]
+         vsrab 2, 3, 4
+# CHECK: vsrah 2, 3, 4                   # encoding: [0x10,0x43,0x23,0x44]
+         vsrah 2, 3, 4
+# CHECK: vsraw 2, 3, 4                   # encoding: [0x10,0x43,0x23,0x84]
+         vsraw 2, 3, 4
+
+# Vector floating-point instructions
+
+# CHECK: vaddfp 2, 3, 4                  # encoding: [0x10,0x43,0x20,0x0a]
+         vaddfp 2, 3, 4
+# CHECK: vsubfp 2, 3, 4                  # encoding: [0x10,0x43,0x20,0x4a]
+         vsubfp 2, 3, 4
+# CHECK: vmaddfp 2, 3, 4, 5              # encoding: [0x10,0x43,0x29,0x2e]
+         vmaddfp 2, 3, 4, 5
+# CHECK: vnmsubfp 2, 3, 4, 5             # encoding: [0x10,0x43,0x29,0x2f]
+         vnmsubfp 2, 3, 4, 5
+
+# CHECK: vmaxfp 2, 3, 4                  # encoding: [0x10,0x43,0x24,0x0a]
+         vmaxfp 2, 3, 4
+# CHECK: vminfp 2, 3, 4                  # encoding: [0x10,0x43,0x24,0x4a]
+         vminfp 2, 3, 4
+
+# CHECK: vctsxs 2, 3, 4                  # encoding: [0x10,0x44,0x1b,0xca]
+         vctsxs 2, 3, 4
+# CHECK: vctuxs 2, 3, 4                  # encoding: [0x10,0x44,0x1b,0x8a]
+         vctuxs 2, 3, 4
+# CHECK: vcfsx 2, 3, 4                   # encoding: [0x10,0x44,0x1b,0x4a]
+         vcfsx 2, 3, 4
+# CHECK: vcfux 2, 3, 4                   # encoding: [0x10,0x44,0x1b,0x0a]
+         vcfux 2, 3, 4
+# CHECK: vrfim 2, 3                      # encoding: [0x10,0x40,0x1a,0xca]
+         vrfim 2, 3
+# CHECK: vrfin 2, 3                      # encoding: [0x10,0x40,0x1a,0x0a]
+         vrfin 2, 3
+# CHECK: vrfip 2, 3                      # encoding: [0x10,0x40,0x1a,0x8a]
+         vrfip 2, 3
+# CHECK: vrfiz 2, 3                      # encoding: [0x10,0x40,0x1a,0x4a]
+         vrfiz 2, 3
+
+# CHECK: vcmpbfp 2, 3, 4                 # encoding: [0x10,0x43,0x23,0xc6]
+         vcmpbfp 2, 3, 4
+# CHECK: vcmpbfp. 2, 3, 4                # encoding: [0x10,0x43,0x27,0xc6]
+         vcmpbfp. 2, 3, 4
+# CHECK: vcmpeqfp 2, 3, 4                # encoding: [0x10,0x43,0x20,0xc6]
+         vcmpeqfp 2, 3, 4
+# CHECK: vcmpeqfp. 2, 3, 4               # encoding: [0x10,0x43,0x24,0xc6]
+         vcmpeqfp. 2, 3, 4
+# CHECK: vcmpgefp 2, 3, 4                # encoding: [0x10,0x43,0x21,0xc6]
+         vcmpgefp 2, 3, 4
+# CHECK: vcmpgefp. 2, 3, 4               # encoding: [0x10,0x43,0x25,0xc6]
+         vcmpgefp. 2, 3, 4
+# CHECK: vcmpgtfp 2, 3, 4                # encoding: [0x10,0x43,0x22,0xc6]
+         vcmpgtfp 2, 3, 4
+# CHECK: vcmpgtfp. 2, 3, 4               # encoding: [0x10,0x43,0x26,0xc6]
+         vcmpgtfp. 2, 3, 4
+
+# CHECK: vexptefp 2, 3                   # encoding: [0x10,0x40,0x19,0x8a]
+         vexptefp 2, 3
+# CHECK: vlogefp 2, 3                    # encoding: [0x10,0x40,0x19,0xca]
+         vlogefp 2, 3
+# CHECK: vrefp 2, 3                      # encoding: [0x10,0x40,0x19,0x0a]
+         vrefp 2, 3
+# CHECK: vrsqrtefp 2, 3                  # encoding: [0x10,0x40,0x19,0x4a]
+         vrsqrtefp 2, 3
+
+# Vector status and control register instructions
+
+# CHECK: mtvscr 2                        # encoding: [0x10,0x00,0x16,0x44]
+         mtvscr 2
+# CHECK: mfvscr 2                        # encoding: [0x10,0x40,0x06,0x04]
+         mfvscr 2
+
diff --git a/test/MC/PowerPC/ppc64-encoding.s b/test/MC/PowerPC/ppc64-encoding.s
new file mode 100644
index 0000000..dda7960
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-encoding.s
@@ -0,0 +1,480 @@
+
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
+
+# Branch facility
+
+# Branch instructions
+
+# CHECK: b target                        # encoding: [0b010010AA,A,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_br24
+         b target
+# FIXME: ba target
+# CHECK: bl target                       # encoding: [0b010010AA,A,A,0bAAAAAA01]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target, kind: fixup_ppc_br24
+         bl target
+# FIXME: bla target
+
+# FIXME: bc 4, 10, target
+# FIXME: bca 4, 10, target
+# FIXME: bcl 4, 10, target
+# FIXME: bcla 4, 10, target
+
+# FIXME: bclr 4, 10, 3
+# FIXME: bclrl 4, 10, 3
+# FIXME: bcctr 4, 10, 3
+# FIXME: bcctrl 4, 10, 3
+
+# Condition register instructions
+
+# FIXME: crand 2, 3, 4
+# FIXME: crnand 2, 3, 4
+# CHECK: cror 2, 3, 4                    # encoding: [0x4c,0x43,0x23,0x82]
+         cror 2, 3, 4
+# FIXME: crxor 2, 3, 4
+# FIXME: crnor 2, 3, 4
+# CHECK: creqv 2, 3, 4                   # encoding: [0x4c,0x43,0x22,0x42]
+         creqv 2, 3, 4
+# FIXME: crandc 2, 3, 4
+# FIXME: crorc 2, 3, 4
+# CHECK: mcrf 2, 3                       # encoding: [0x4d,0x0c,0x00,0x00]
+         mcrf 2, 3
+
+# System call instruction
+
+# FIXME: sc 1
+
+# Fixed-point facility
+
+# Fixed-point load instructions
+
+# CHECK: lbz 2, 128(4)                   # encoding: [0x88,0x44,0x00,0x80]
+         lbz 2, 128(4)
+# CHECK: lbzx 2, 3, 4                    # encoding: [0x7c,0x43,0x20,0xae]
+         lbzx 2, 3, 4
+# CHECK: lbzu 2, 128(4)                  # encoding: [0x8c,0x44,0x00,0x80]
+         lbzu 2, 128(4)
+# CHECK: lbzux 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0xee]
+         lbzux 2, 3, 4
+# CHECK: lhz 2, 128(4)                   # encoding: [0xa0,0x44,0x00,0x80]
+         lhz 2, 128(4)
+# CHECK: lhzx 2, 3, 4                    # encoding: [0x7c,0x43,0x22,0x2e]
+         lhzx 2, 3, 4
+# CHECK: lhzu 2, 128(4)                  # encoding: [0xa4,0x44,0x00,0x80]
+         lhzu 2, 128(4)
+# CHECK: lhzux 2, 3, 4                   # encoding: [0x7c,0x43,0x22,0x6e]
+         lhzux 2, 3, 4
+# CHECK: lha 2, 128(4)                   # encoding: [0xa8,0x44,0x00,0x80]
+         lha 2, 128(4)
+# CHECK: lhax 2, 3, 4                    # encoding: [0x7c,0x43,0x22,0xae]
+         lhax 2, 3, 4
+# CHECK: lhau 2, 128(4)                  # encoding: [0xac,0x44,0x00,0x80]
+         lhau 2, 128(4)
+# CHECK: lhaux 2, 3, 4                   # encoding: [0x7c,0x43,0x22,0xee]
+         lhaux 2, 3, 4
+# CHECK: lwz 2, 128(4)                   # encoding: [0x80,0x44,0x00,0x80]
+         lwz 2, 128(4)
+# CHECK: lwzx 2, 3, 4                    # encoding: [0x7c,0x43,0x20,0x2e]
+         lwzx 2, 3, 4
+# CHECK: lwzu 2, 128(4)                  # encoding: [0x84,0x44,0x00,0x80]
+         lwzu 2, 128(4)
+# CHECK: lwzux 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0x6e]
+         lwzux 2, 3, 4
+# CHECK: lwa 2, 128(4)                   # encoding: [0xe8,0x44,0x00,0x82]
+         lwa 2, 128(4)
+# CHECK: lwax 2, 3, 4                    # encoding: [0x7c,0x43,0x22,0xaa]
+         lwax 2, 3, 4
+# CHECK: lwaux 2, 3, 4                   # encoding: [0x7c,0x43,0x22,0xea]
+         lwaux 2, 3, 4
+# CHECK: ld 2, 128(4)                    # encoding: [0xe8,0x44,0x00,0x80]
+         ld 2, 128(4)
+# CHECK: ldx 2, 3, 4                     # encoding: [0x7c,0x43,0x20,0x2a]
+         ldx 2, 3, 4
+# CHECK: ldu 2, 128(4)                   # encoding: [0xe8,0x44,0x00,0x81]
+         ldu 2, 128(4)
+# CHECK: ldux 2, 3, 4                    # encoding: [0x7c,0x43,0x20,0x6a]
+         ldux 2, 3, 4
+
+# Fixed-point store instructions
+
+# CHECK: stb 2, 128(4)                   # encoding: [0x98,0x44,0x00,0x80]
+         stb 2, 128(4)
+# CHECK: stbx 2, 3, 4                    # encoding: [0x7c,0x43,0x21,0xae]
+         stbx 2, 3, 4
+# CHECK: stbu 2, 128(4)                  # encoding: [0x9c,0x44,0x00,0x80]
+         stbu 2, 128(4)
+# CHECK: stbux 2, 3, 4                   # encoding: [0x7c,0x43,0x21,0xee]
+         stbux 2, 3, 4
+# CHECK: sth 2, 128(4)                   # encoding: [0xb0,0x44,0x00,0x80]
+         sth 2, 128(4)
+# CHECK: sthx 2, 3, 4                    # encoding: [0x7c,0x43,0x23,0x2e]
+         sthx 2, 3, 4
+# CHECK: sthu 2, 128(4)                  # encoding: [0xb4,0x44,0x00,0x80]
+         sthu 2, 128(4)
+# CHECK: sthux 2, 3, 4                   # encoding: [0x7c,0x43,0x23,0x6e]
+         sthux 2, 3, 4
+# CHECK: stw 2, 128(4)                   # encoding: [0x90,0x44,0x00,0x80]
+         stw 2, 128(4)
+# CHECK: stwx 2, 3, 4                    # encoding: [0x7c,0x43,0x21,0x2e]
+         stwx 2, 3, 4
+# CHECK: stwu 2, 128(4)                  # encoding: [0x94,0x44,0x00,0x80]
+         stwu 2, 128(4)
+# CHECK: stwux 2, 3, 4                   # encoding: [0x7c,0x43,0x21,0x6e]
+         stwux 2, 3, 4
+# CHECK: std 2, 128(4)                   # encoding: [0xf8,0x44,0x00,0x80]
+         std 2, 128(4)
+# CHECK: stdx 2, 3, 4                    # encoding: [0x7c,0x43,0x21,0x2a]
+         stdx 2, 3, 4
+# CHECK: stdu 2, 128(4)                  # encoding: [0xf8,0x44,0x00,0x81]
+         stdu 2, 128(4)
+# CHECK: stdux 2, 3, 4                   # encoding: [0x7c,0x43,0x21,0x6a]
+         stdux 2, 3, 4
+
+# Fixed-point load and store with byte reversal instructions
+
+# CHECK: lhbrx 2, 3, 4                   # encoding: [0x7c,0x43,0x26,0x2c]
+         lhbrx 2, 3, 4
+# CHECK: sthbrx 2, 3, 4                  # encoding: [0x7c,0x43,0x27,0x2c]
+         sthbrx 2, 3, 4
+# CHECK: lwbrx 2, 3, 4                   # encoding: [0x7c,0x43,0x24,0x2c]
+         lwbrx 2, 3, 4
+# CHECK: stwbrx 2, 3, 4                  # encoding: [0x7c,0x43,0x25,0x2c]
+         stwbrx 2, 3, 4
+# CHECK: ldbrx 2, 3, 4                   # encoding: [0x7c,0x43,0x24,0x28]
+         ldbrx 2, 3, 4
+# CHECK: stdbrx 2, 3, 4                  # encoding: [0x7c,0x43,0x25,0x28]
+         stdbrx 2, 3, 4
+
+# FIXME: Fixed-point load and store multiple instructions
+
+# FIXME: Fixed-point move assist instructions
+
+# Fixed-point arithmetic instructions
+
+# CHECK: addi 2, 3, 128                  # encoding: [0x38,0x43,0x00,0x80]
+         addi 2, 3, 128
+# CHECK: addis 2, 3, 128                 # encoding: [0x3c,0x43,0x00,0x80]
+         addis 2, 3, 128
+# CHECK: add 2, 3, 4                     # encoding: [0x7c,0x43,0x22,0x14]
+         add 2, 3, 4
+# CHECK: add. 2, 3, 4                    # encoding: [0x7c,0x43,0x22,0x15]
+         add. 2, 3, 4
+# FIXME: addo 2, 3, 4
+# FIXME: addo. 2, 3, 4
+# CHECK: subf 2, 3, 4                    # encoding: [0x7c,0x43,0x20,0x50]
+         subf 2, 3, 4
+# CHECK: subf. 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0x51]
+         subf. 2, 3, 4
+# FIXME: subfo 2, 3, 4
+# FIXME: subfo. 2, 3, 4
+# CHECK: addic 2, 3, 128                 # encoding: [0x30,0x43,0x00,0x80]
+         addic 2, 3, 128
+# CHECK: addic. 2, 3, 128                # encoding: [0x34,0x43,0x00,0x80]
+         addic. 2, 3, 128
+# CHECK: subfic 2, 3, 4                  # encoding: [0x20,0x43,0x00,0x04]
+         subfic 2, 3, 4
+
+# CHECK: addc 2, 3, 4                    # encoding: [0x7c,0x43,0x20,0x14]
+         addc 2, 3, 4
+# CHECK: addc. 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0x15]
+         addc. 2, 3, 4
+# FIXME: addco 2, 3, 4
+# FIXME: addco. 2, 3, 4
+# CHECK: subfc 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0x10]
+         subfc 2, 3, 4
+# CHECK: subfc 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0x10]
+         subfc 2, 3, 4
+# FIXME: subfco 2, 3, 4
+# FIXME: subfco. 2, 3, 4
+
+# CHECK: adde 2, 3, 4                    # encoding: [0x7c,0x43,0x21,0x14]
+         adde 2, 3, 4
+# CHECK: adde. 2, 3, 4                   # encoding: [0x7c,0x43,0x21,0x15]
+         adde. 2, 3, 4
+# FIXME: addeo 2, 3, 4
+# FIXME: addeo. 2, 3, 4
+# CHECK: subfe 2, 3, 4                   # encoding: [0x7c,0x43,0x21,0x10]
+         subfe 2, 3, 4
+# CHECK: subfe. 2, 3, 4                  # encoding: [0x7c,0x43,0x21,0x11]
+         subfe. 2, 3, 4
+# FIXME: subfeo 2, 3, 4
+# FIXME: subfeo. 2, 3, 4
+
+# CHECK: addme 2, 3                      # encoding: [0x7c,0x43,0x01,0xd4]
+         addme 2, 3
+# CHECK: addme. 2, 3                     # encoding: [0x7c,0x43,0x01,0xd5]
+         addme. 2, 3
+# FIXME: addmeo 2, 3
+# FIXME: addmeo. 2, 3
+# CHECK: subfme 2, 3                     # encoding: [0x7c,0x43,0x01,0xd0]
+         subfme 2, 3
+# CHECK: subfme. 2, 3                    # encoding: [0x7c,0x43,0x01,0xd1]
+         subfme. 2, 3
+# FIXME: subfmeo 2, 3
+# FIXME: subfmeo. 2, 3
+
+# CHECK: addze 2, 3                      # encoding: [0x7c,0x43,0x01,0x94]
+         addze 2, 3
+# CHECK: addze. 2, 3                     # encoding: [0x7c,0x43,0x01,0x95]
+         addze. 2, 3
+# FIXME: addzeo 2, 3
+# FIXME: addzeo. 2, 3
+# CHECK: subfze 2, 3                     # encoding: [0x7c,0x43,0x01,0x90]
+         subfze 2, 3
+# CHECK: subfze. 2, 3                    # encoding: [0x7c,0x43,0x01,0x91]
+         subfze. 2, 3
+# FIXME: subfzeo 2, 3
+# FIXME: subfzeo. 2, 3
+
+# CHECK: neg 2, 3                        # encoding: [0x7c,0x43,0x00,0xd0]
+         neg 2, 3
+# CHECK: neg. 2, 3                       # encoding: [0x7c,0x43,0x00,0xd1]
+         neg. 2, 3
+# FIXME: nego 2, 3
+# FIXME: nego. 2, 3
+
+# CHECK: mulli 2, 3, 128                 # encoding: [0x1c,0x43,0x00,0x80]
+         mulli 2, 3, 128
+# CHECK: mulhw 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0x96]
+         mulhw 2, 3, 4
+# CHECK: mulhw. 2, 3, 4                  # encoding: [0x7c,0x43,0x20,0x97]
+         mulhw. 2, 3, 4
+# CHECK: mullw 2, 3, 4                   # encoding: [0x7c,0x43,0x21,0xd6]
+         mullw 2, 3, 4
+# CHECK: mullw. 2, 3, 4                  # encoding: [0x7c,0x43,0x21,0xd7]
+         mullw. 2, 3, 4
+# FIXME: mullwo 2, 3, 4
+# FIXME: mullwo. 2, 3, 4
+# CHECK: mulhwu 2, 3, 4                  # encoding: [0x7c,0x43,0x20,0x16]
+         mulhwu 2, 3, 4
+# CHECK: mulhwu. 2, 3, 4                 # encoding: [0x7c,0x43,0x20,0x17]
+         mulhwu. 2, 3, 4
+
+# CHECK: divw 2, 3, 4                    # encoding: [0x7c,0x43,0x23,0xd6]
+         divw 2, 3, 4
+# CHECK: divw. 2, 3, 4                   # encoding: [0x7c,0x43,0x23,0xd7]
+         divw. 2, 3, 4
+# FIXME: divwo 2, 3, 4
+# FIXME: divwo. 2, 3, 4
+# CHECK: divwu 2, 3, 4                   # encoding: [0x7c,0x43,0x23,0x96]
+         divwu 2, 3, 4
+# CHECK: divwu. 2, 3, 4                  # encoding: [0x7c,0x43,0x23,0x97]
+         divwu. 2, 3, 4
+# FIXME: divwuo 2, 3, 4
+# FIXME: divwuo. 2, 3, 4
+# FIXME: divwe 2, 3, 4
+# FIXME: divwe. 2, 3, 4
+# FIXME: divweo 2, 3, 4
+# FIXME: divweo. 2, 3, 4
+# FIXME: divweu 2, 3, 4
+# FIXME: divweu. 2, 3, 4
+# FIXME: divweuo 2, 3, 4
+# FIXME: divweuo. 2, 3, 4
+
+# CHECK: mulld 2, 3, 4                   # encoding: [0x7c,0x43,0x21,0xd2]
+         mulld 2, 3, 4
+# CHECK: mulld. 2, 3, 4                  # encoding: [0x7c,0x43,0x21,0xd3]
+         mulld. 2, 3, 4
+# FIXME: mulldo 2, 3, 4
+# FIXME: mulldo. 2, 3, 4
+# CHECK: mulhd 2, 3, 4                   # encoding: [0x7c,0x43,0x20,0x92]
+         mulhd 2, 3, 4
+# CHECK: mulhd. 2, 3, 4                  # encoding: [0x7c,0x43,0x20,0x93]
+         mulhd. 2, 3, 4
+# CHECK: mulhdu 2, 3, 4                  # encoding: [0x7c,0x43,0x20,0x12]
+         mulhdu 2, 3, 4
+# CHECK: mulhdu. 2, 3, 4                 # encoding: [0x7c,0x43,0x20,0x13]
+         mulhdu. 2, 3, 4
+
+# CHECK: divd 2, 3, 4                    # encoding: [0x7c,0x43,0x23,0xd2]
+         divd 2, 3, 4
+# CHECK: divd. 2, 3, 4                   # encoding: [0x7c,0x43,0x23,0xd3]
+         divd. 2, 3, 4
+# FIXME: divdo 2, 3, 4
+# FIXME: divdo. 2, 3, 4
+# CHECK: divdu 2, 3, 4                   # encoding: [0x7c,0x43,0x23,0x92]
+         divdu 2, 3, 4
+# CHECK: divdu. 2, 3, 4                  # encoding: [0x7c,0x43,0x23,0x93]
+         divdu. 2, 3, 4
+# FIXME: divduo 2, 3, 4
+# FIXME: divduo. 2, 3, 4
+# FIXME: divde 2, 3, 4
+# FIXME: divde. 2, 3, 4
+# FIXME: divdeo 2, 3, 4
+# FIXME: divdeo. 2, 3, 4
+# FIXME: divdeu 2, 3, 4
+# FIXME: divdeu. 2, 3, 4
+# FIXME: divdeuo 2, 3, 4
+# FIXME: divdeuo. 2, 3, 4
+
+# FIXME: Fixed-point compare instructions
+
+# FIXME: Fixed-point trap instructions
+
+# Fixed-point select
+
+# CHECK: isel 2, 3, 4, 5                 # encoding: [0x7c,0x43,0x21,0x5e]
+         isel 2, 3, 4, 5
+
+# Fixed-point logical instructions
+
+# CHECK: andi. 2, 3, 128                 # encoding: [0x70,0x62,0x00,0x80]
+         andi. 2, 3, 128
+# CHECK: andis. 2, 3, 128                # encoding: [0x74,0x62,0x00,0x80]
+         andis. 2, 3, 128
+# CHECK: ori 2, 3, 128                   # encoding: [0x60,0x62,0x00,0x80]
+         ori 2, 3, 128
+# CHECK: oris 2, 3, 128                  # encoding: [0x64,0x62,0x00,0x80]
+         oris 2, 3, 128
+# CHECK: xori 2, 3, 128                  # encoding: [0x68,0x62,0x00,0x80]
+         xori 2, 3, 128
+# CHECK: xoris 2, 3, 128                 # encoding: [0x6c,0x62,0x00,0x80]
+         xoris 2, 3, 128
+# CHECK: and 2, 3, 4                     # encoding: [0x7c,0x62,0x20,0x38]
+         and 2, 3, 4
+# CHECK: and. 2, 3, 4                    # encoding: [0x7c,0x62,0x20,0x39]
+         and. 2, 3, 4
+# CHECK: xor 2, 3, 4                     # encoding: [0x7c,0x62,0x22,0x78]
+         xor 2, 3, 4
+# CHECK: xor. 2, 3, 4                    # encoding: [0x7c,0x62,0x22,0x79]
+         xor. 2, 3, 4
+# CHECK: nand 2, 3, 4                    # encoding: [0x7c,0x62,0x23,0xb8]
+         nand 2, 3, 4
+# CHECK: nand. 2, 3, 4                   # encoding: [0x7c,0x62,0x23,0xb9]
+         nand. 2, 3, 4
+# CHECK: or 2, 3, 4                      # encoding: [0x7c,0x62,0x23,0x78]
+         or 2, 3, 4
+# CHECK: or. 2, 3, 4                     # encoding: [0x7c,0x62,0x23,0x79]
+         or. 2, 3, 4
+# CHECK: nor 2, 3, 4                     # encoding: [0x7c,0x62,0x20,0xf8]
+         nor 2, 3, 4
+# CHECK: nor. 2, 3, 4                    # encoding: [0x7c,0x62,0x20,0xf9]
+         nor. 2, 3, 4
+# CHECK: eqv 2, 3, 4                     # encoding: [0x7c,0x62,0x22,0x38]
+         eqv 2, 3, 4
+# CHECK: eqv. 2, 3, 4                    # encoding: [0x7c,0x62,0x22,0x39]
+         eqv. 2, 3, 4
+# CHECK: andc 2, 3, 4                    # encoding: [0x7c,0x62,0x20,0x78]
+         andc 2, 3, 4
+# CHECK: andc. 2, 3, 4                   # encoding: [0x7c,0x62,0x20,0x79]
+         andc. 2, 3, 4
+# CHECK: orc 2, 3, 4                     # encoding: [0x7c,0x62,0x23,0x38]
+         orc 2, 3, 4
+# CHECK: orc. 2, 3, 4                    # encoding: [0x7c,0x62,0x23,0x39]
+         orc. 2, 3, 4
+
+# CHECK: extsb 2, 3                      # encoding: [0x7c,0x62,0x07,0x74]
+         extsb 2, 3
+# CHECK: extsb. 2, 3                     # encoding: [0x7c,0x62,0x07,0x75]
+         extsb. 2, 3
+# CHECK: extsh 2, 3                      # encoding: [0x7c,0x62,0x07,0x34]
+         extsh 2, 3
+# CHECK: extsh. 2, 3                     # encoding: [0x7c,0x62,0x07,0x35]
+         extsh. 2, 3
+
+# CHECK: cntlzw 2, 3                     # encoding: [0x7c,0x62,0x00,0x34]
+         cntlzw 2, 3
+# CHECK: cntlzw. 2, 3                    # encoding: [0x7c,0x62,0x00,0x35]
+         cntlzw. 2, 3
+# FIXME: cmpb 2, 3, 4
+# FIXME: popcntb 2, 3
+# CHECK: popcntw 2, 3                    # encoding: [0x7c,0x62,0x02,0xf4]
+         popcntw 2, 3
+# FIXME: prtyd 2, 3
+# FIXME: prtyw 2, 3
+
+# CHECK: extsw 2, 3                      # encoding: [0x7c,0x62,0x07,0xb4]
+         extsw 2, 3
+# CHECK: extsw. 2, 3                     # encoding: [0x7c,0x62,0x07,0xb5]
+         extsw. 2, 3
+
+# CHECK: cntlzd 2, 3                     # encoding: [0x7c,0x62,0x00,0x74]
+         cntlzd 2, 3
+# CHECK: cntlzd. 2, 3                    # encoding: [0x7c,0x62,0x00,0x75]
+         cntlzd. 2, 3
+# CHECK: popcntd 2, 3                    # encoding: [0x7c,0x62,0x03,0xf4]
+         popcntd 2, 3
+# FIXME: bpermd 2, 3, 4
+
+# Fixed-point rotate and shift instructions
+
+# CHECK: rlwinm 2, 3, 4, 5, 6            # encoding: [0x54,0x62,0x21,0x4c]
+         rlwinm 2, 3, 4, 5, 6
+# CHECK: rlwinm. 2, 3, 4, 5, 6           # encoding: [0x54,0x62,0x21,0x4d]
+         rlwinm. 2, 3, 4, 5, 6
+# CHECK: rlwnm 2, 3, 4, 5, 6             # encoding: [0x5c,0x62,0x21,0x4c]
+         rlwnm 2, 3, 4, 5, 6
+# CHECK: rlwnm. 2, 3, 4, 5, 6            # encoding: [0x5c,0x62,0x21,0x4d]
+         rlwnm. 2, 3, 4, 5, 6
+# CHECK: rlwimi 2, 3, 4, 5, 6            # encoding: [0x50,0x62,0x21,0x4c]
+         rlwimi 2, 3, 4, 5, 6
+# CHECK: rlwimi. 2, 3, 4, 5, 6           # encoding: [0x50,0x62,0x21,0x4d]
+         rlwimi. 2, 3, 4, 5, 6
+# CHECK: rldicl 2, 3, 4, 5               # encoding: [0x78,0x62,0x21,0x40]
+         rldicl 2, 3, 4, 5
+# CHECK: rldicl. 2, 3, 4, 5              # encoding: [0x78,0x62,0x21,0x41]
+         rldicl. 2, 3, 4, 5
+# CHECK: rldicr 2, 3, 4, 5               # encoding: [0x78,0x62,0x21,0x44]
+         rldicr 2, 3, 4, 5
+# CHECK: rldicr. 2, 3, 4, 5              # encoding: [0x78,0x62,0x21,0x45]
+         rldicr. 2, 3, 4, 5
+# FIXME: rldic 2, 3, 4, 5
+# FIXME: rldic. 2, 3, 4, 5
+# CHECK: rldcl 2, 3, 4, 5                # encoding: [0x78,0x62,0x21,0x50]
+         rldcl 2, 3, 4, 5
+# CHECK: rldcl. 2, 3, 4, 5               # encoding: [0x78,0x62,0x21,0x51]
+         rldcl. 2, 3, 4, 5
+# FIXME: rldcr 2, 3, 4, 5
+# FIXME: rldcr. 2, 3, 4, 5
+# CHECK: rldimi 2, 3, 4, 5               # encoding: [0x78,0x62,0x21,0x4c]
+         rldimi 2, 3, 4, 5
+# CHECK: rldimi. 2, 3, 4, 5              # encoding: [0x78,0x62,0x21,0x4d]
+         rldimi. 2, 3, 4, 5
+
+# CHECK: slw 2, 3, 4                     # encoding: [0x7c,0x62,0x20,0x30]
+         slw 2, 3, 4
+# CHECK: slw. 2, 3, 4                    # encoding: [0x7c,0x62,0x20,0x31]
+         slw. 2, 3, 4
+# CHECK: srw 2, 3, 4                     # encoding: [0x7c,0x62,0x24,0x30]
+         srw 2, 3, 4
+# CHECK: srw. 2, 3, 4                    # encoding: [0x7c,0x62,0x24,0x31]
+         srw. 2, 3, 4
+# CHECK: srawi 2, 3, 4                   # encoding: [0x7c,0x62,0x26,0x70]
+         srawi 2, 3, 4
+# CHECK: srawi. 2, 3, 4                  # encoding: [0x7c,0x62,0x26,0x71]
+         srawi. 2, 3, 4
+# CHECK: sraw 2, 3, 4                    # encoding: [0x7c,0x62,0x26,0x30]
+         sraw 2, 3, 4
+# CHECK: sraw. 2, 3, 4                   # encoding: [0x7c,0x62,0x26,0x31]
+         sraw. 2, 3, 4
+# CHECK: sld 2, 3, 4                     # encoding: [0x7c,0x62,0x20,0x36]
+         sld 2, 3, 4
+# CHECK: sld. 2, 3, 4                    # encoding: [0x7c,0x62,0x20,0x37]
+         sld. 2, 3, 4
+# CHECK: srd 2, 3, 4                     # encoding: [0x7c,0x62,0x24,0x36]
+         srd 2, 3, 4
+# CHECK: srd. 2, 3, 4                    # encoding: [0x7c,0x62,0x24,0x37]
+         srd. 2, 3, 4
+# CHECK: sradi 2, 3, 4                   # encoding: [0x7c,0x62,0x26,0x74]
+         sradi 2, 3, 4
+# CHECK: sradi. 2, 3, 4                  # encoding: [0x7c,0x62,0x26,0x75]
+         sradi. 2, 3, 4
+# CHECK: srad 2, 3, 4                    # encoding: [0x7c,0x62,0x26,0x34]
+         srad 2, 3, 4
+# CHECK: srad. 2, 3, 4                   # encoding: [0x7c,0x62,0x26,0x35]
+         srad. 2, 3, 4
+
+# FIXME: BCD assist instructions
+
+# Move to/from system register instructions
+
+# FIXME: mtspr 256, 2
+# FIXME: mfspr 2, 256
+# CHECK: mtcrf 16, 2                     # encoding: [0x7c,0x41,0x01,0x20]
+         mtcrf 16, 2
+# CHECK: mfcr 2                          # encoding: [0x7c,0x40,0x00,0x26]
+         mfcr 2
+# FIXME: mtocrf 16, 2
+# CHECK: mfocrf 16, 8                    # encoding: [0x7e,0x10,0x80,0x26]
+         mfocrf 16, 8
+# FIXME: mcrxr 2
+
diff --git a/test/MC/PowerPC/ppc64-errors.s b/test/MC/PowerPC/ppc64-errors.s
new file mode 100644
index 0000000..1da5753
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-errors.s
@@ -0,0 +1,80 @@
+
+# RUN: not llvm-mc -triple powerpc64-unknown-unknown < %s 2> %t
+# RUN: FileCheck < %t %s
+
+# Register operands
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: add 32, 32, 32
+              add 32, 32, 32
+
+# CHECK: error: invalid register name
+# CHECK-NEXT: add %r32, %r32, %r32
+              add %r32, %r32, %r32
+
+# Signed 16-bit immediate operands
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: addi 1, 0, -32769
+              addi 1, 0, -32769
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: addi 1, 0, 32768
+              addi 1, 0, 32768
+
+# Unsigned 16-bit immediate operands
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: ori 1, 2, -1
+              ori 1, 2, -1
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: ori 1, 2, 65536
+              ori 1, 2, 65536
+
+# D-Form memory operands
+
+# CHECK: error: invalid register number
+# CHECK-NEXT: lwz 1, 0(32)
+              lwz 1, 0(32)
+
+# CHECK: error: invalid register name
+# CHECK-NEXT: lwz 1, 0(%r32)
+              lwz 1, 0(%r32)
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: lwz 1, -32769(2)
+              lwz 1, -32769(2)
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: lwz 1, 32768(2)
+              lwz 1, 32768(2)
+
+# CHECK: error: invalid register number
+# CHECK-NEXT: ld 1, 0(32)
+              ld 1, 0(32)
+
+# CHECK: error: invalid register name
+# CHECK-NEXT: ld 1, 0(%r32)
+              ld 1, 0(%r32)
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: ld 1, 1(2)
+              ld 1, 1(2)
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: ld 1, 2(2)
+              ld 1, 2(2)
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: ld 1, 3(2)
+              ld 1, 3(2)
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: ld 1, -32772(2)
+              ld 1, -32772(2)
+
+# CHECK: error: invalid operand for instruction
+# CHECK-NEXT: ld 1, 32768(2)
+              ld 1, 32768(2)
+
diff --git a/test/MC/PowerPC/ppc64-fixups.s b/test/MC/PowerPC/ppc64-fixups.s
new file mode 100644
index 0000000..1dcbca8
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-fixups.s
@@ -0,0 +1,95 @@
+
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
+
+# FIXME: .TOC.@tocbase
+
+# CHECK: li 3, target@l                  # encoding: [0x38,0x60,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@l, kind: fixup_ppc_lo16
+         li 3, target@l
+
+# CHECK: addis 3, 3, target@ha           # encoding: [0x3c,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@ha, kind: fixup_ppc_ha16
+         addis 3, 3, target@ha
+
+# CHECK: lis 3, target@ha                # encoding: [0x3c,0x60,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@ha, kind: fixup_ppc_ha16
+         lis 3, target@ha
+
+# CHECK: addi 4, 3, target@l             # encoding: [0x38,0x83,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@l, kind: fixup_ppc_lo16
+         addi 4, 3, target@l
+
+# CHECK: lwz 1, target@l(3)              # encoding: [0x80,0x23,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@l, kind: fixup_ppc_lo16
+         lwz 1, target@l(3)
+
+# CHECK: ld 1, target@l(3)               # encoding: [0xe8,0x23,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@l, kind: fixup_ppc_lo16_ds
+         ld 1, target@l(3)
+
+# CHECK: ld 1, target@toc(2)             # encoding: [0xe8,0x22,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@toc, kind: fixup_ppc_lo16_ds
+         ld 1, target@toc(2)
+
+# CHECK: addis 3, 2, target@toc@ha       # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@toc@ha, kind: fixup_ppc_ha16
+         addis 3, 2, target@toc@ha
+
+# CHECK: addi 4, 3, target@toc@l         # encoding: [0x38,0x83,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@toc@l, kind: fixup_ppc_lo16
+         addi 4, 3, target@toc@l
+
+# CHECK: lwz 1, target@toc@l(3)          # encoding: [0x80,0x23,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@toc@l, kind: fixup_ppc_lo16
+         lwz 1, target@toc@l(3)
+
+# CHECK: ld 1, target@toc@l(3)           # encoding: [0xe8,0x23,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@toc@l, kind: fixup_ppc_lo16_ds
+         ld 1, target@toc@l(3)
+
+# FIXME: @tls
+
+
+# CHECK: addis 3, 2, target@tprel@ha     # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@tprel@ha, kind: fixup_ppc_ha16
+         addis 3, 2, target@tprel@ha
+
+# CHECK: addi 3, 3, target@tprel@l       # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@tprel@l, kind: fixup_ppc_lo16
+         addi 3, 3, target@tprel@l
+
+# CHECK: addis 3, 2, target@dtprel@ha    # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@dtprel@ha, kind: fixup_ppc_ha16
+         addis 3, 2, target@dtprel@ha
+
+# CHECK: addi 3, 3, target@dtprel@l      # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@dtprel@l, kind: fixup_ppc_lo16
+         addi 3, 3, target@dtprel@l
+
+
+# CHECK: addis 3, 2, target@got@tprel@ha # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@got@tprel@ha, kind: fixup_ppc_ha16
+         addis 3, 2, target@got@tprel@ha
+
+# CHECK: ld 1, target@got@tprel@l(3)     # encoding: [0xe8,0x23,A,0bAAAAAA00]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@got@tprel@l, kind: fixup_ppc_lo16_ds
+         ld 1, target@got@tprel@l(3)
+
+
+# CHECK: addis 3, 2, target@got@tlsgd@ha # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@got@tlsgd@ha, kind: fixup_ppc_ha16
+         addis 3, 2, target@got@tlsgd@ha
+
+# CHECK: addi 3, 3, target@got@tlsgd@l   # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@got@tlsgd@l, kind: fixup_ppc_lo16
+         addi 3, 3, target@got@tlsgd@l
+
+
+# CHECK: addis 3, 2, target@got@tlsld@ha # encoding: [0x3c,0x62,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@got@tlsld@ha, kind: fixup_ppc_ha16
+         addis 3, 2, target@got@tlsld@ha
+
+# CHECK: addi 3, 3, target@got@tlsld@l   # encoding: [0x38,0x63,A,A]
+# CHECK-NEXT:                            #   fixup A - offset: 0, value: target@got@tlsld@l, kind: fixup_ppc_lo16
+         addi 3, 3, target@got@tlsld@l
+
diff --git a/test/MC/PowerPC/ppc64-initial-cfa.ll b/test/MC/PowerPC/ppc64-initial-cfa.ll
index 16236c9..23a7738 100644
--- a/test/MC/PowerPC/ppc64-initial-cfa.ll
+++ b/test/MC/PowerPC/ppc64-initial-cfa.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj -relocation-model=static %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s -check-prefix=STATIC
+; RUN: llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=STATIC
 ; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj -relocation-model=pic %s -o - | \
-; RUN: elf-dump --dump-section-data | FileCheck %s -check-prefix=PIC
+; RUN: llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=PIC
 
 ; FIXME: this file should be in .s form, change when asm parser is available.
 
@@ -10,69 +10,75 @@ entry:
   ret void
 }
 
-; STATIC:      ('sh_name', 0x{{.*}}) # '.eh_frame'
-; STATIC-NEXT: ('sh_type', 0x00000001)
-; STATIC-NEXT: ('sh_flags', 0x0000000000000002)
-; STATIC-NEXT: ('sh_addr', 0x{{.*}})
-; STATIC-NEXT: ('sh_offset', 0x{{.*}})
-; STATIC-NEXT: ('sh_size', 0x0000000000000028)
-; STATIC-NEXT: ('sh_link', 0x00000000)
-; STATIC-NEXT: ('sh_info', 0x00000000)
-; STATIC-NEXT: ('sh_addralign', 0x0000000000000008)
-; STATIC-NEXT: ('sh_entsize', 0x0000000000000000)
-; STATIC-NEXT: ('_section_data', '00000010 00000000 017a5200 01784101 1b0c0100 00000010 00000018 00000000 00000010 00000000')
+; STATIC:      Section {
+; STATIC:        Name: .eh_frame
+; STATIC-NEXT:   Type: SHT_PROGBITS
+; STATIC-NEXT:   Flags [ (0x2)
+; STATIC-NEXT:     SHF_ALLOC
+; STATIC-NEXT:   ]
+; STATIC-NEXT:   Address:
+; STATIC-NEXT:   Offset:
+; STATIC-NEXT:   Size: 40
+; STATIC-NEXT:   Link: 0
+; STATIC-NEXT:   Info: 0
+; STATIC-NEXT:   AddressAlignment: 8
+; STATIC-NEXT:   EntrySize: 
+; STATIC-NEXT:   Relocations [
+; STATIC-NEXT:     0x1C R_PPC64_REL32 .text 0x0
+; STATIC-NEXT:   ]
+; STATIC-NEXT:   SectionData (
+; STATIC-NEXT:     0000: 00000010 00000000 017A5200 01784101
+; STATIC-NEXT:     0010: 1B0C0100 00000010 00000018 00000000
+; STATIC-NEXT:     0020: 00000010 00000000
+; STATIC-NEXT:   )
+; STATIC-NEXT: }
 
-; STATIC:      ('sh_name', 0x{{.*}}) # '.rela.eh_frame'
-; STATIC-NEXT: ('sh_type', 0x00000004)
-; STATIC-NEXT: ('sh_flags', 0x0000000000000000)
-; STATIC-NEXT: ('sh_addr', 0x{{.*}})
-; STATIC-NEXT: ('sh_offset', 0x{{.*}})
-; STATIC-NEXT: ('sh_size', 0x0000000000000018)
-; STATIC-NEXT: ('sh_link', 0x{{.*}})
-; STATIC-NEXT: ('sh_info', 0x{{.*}})
-; STATIC-NEXT: ('sh_addralign', 0x0000000000000008)
-; STATIC-NEXT: ('sh_entsize', 0x0000000000000018)
-; STATIC-NEXT: ('_relocations', [
+; STATIC:      Section {
+; STATIC:        Name: .rela.eh_frame
+; STATIC-NEXT:   Type: SHT_RELA
+; STATIC-NEXT:   Flags [ (0x0)
+; STATIC-NEXT:   ]
+; STATIC-NEXT:   Address:
+; STATIC-NEXT:   Offset:
+; STATIC-NEXT:   Size: 24
+; STATIC-NEXT:   Link:
+; STATIC-NEXT:   Info:
+; STATIC-NEXT:   AddressAlignment: 8
+; STATIC-NEXT:   EntrySize: 24
 
-; Static build should create R_PPC64_REL32 relocations
-; STATIC-NEXT:  # Relocation 0
-; STATIC-NEXT:  (('r_offset', 0x000000000000001c)
-; STATIC-NEXT:   ('r_sym', 0x{{.*}})
-; STATIC-NEXT:   ('r_type', 0x0000001a)
-; STATIC-NEXT:   ('r_addend', 0x0000000000000000)
-; STATIC-NEXT:  ),
-; STATIC-NEXT: ])
 
+; PIC:      Section {
+; PIC:        Name: .eh_frame
+; PIC-NEXT:   Type: SHT_PROGBITS
+; PIC-NEXT:   Flags [ (0x2)
+; PIC-NEXT:     SHF_ALLOC
+; PIC-NEXT:   ]
+; PIC-NEXT:   Address:
+; PIC-NEXT:   Offset:
+; PIC-NEXT:   Size: 40
+; PIC-NEXT:   Link: 0
+; PIC-NEXT:   Info: 0
+; PIC-NEXT:   AddressAlignment: 8
+; PIC-NEXT:   EntrySize: 0
+; PIC-NEXT:   Relocations [
+; PIC-NEXT:     0x1C R_PPC64_REL32 .text 0x0
+; PIC-NEXT:   ]
+; PIC-NEXT:   SectionData (
+; PIC-NEXT:     0000: 00000010 00000000 017A5200 01784101
+; PIC-NEXT:     0010: 1B0C0100 00000010 00000018 00000000
+; PIC-NEXT:     0020: 00000010 00000000
+; PIC-NEXT:   )
+; PIC-NEXT: }
 
-; PIC:      ('sh_name', 0x{{.*}}) # '.eh_frame'
-; PIC-NEXT: ('sh_type', 0x00000001)
-; PIC-NEXT: ('sh_flags', 0x0000000000000002)
-; PIC-NEXT: ('sh_addr', 0x{{.*}})
-; PIC-NEXT: ('sh_offset', 0x{{.*}})
-; PIC-NEXT: ('sh_size', 0x0000000000000028)
-; PIC-NEXT: ('sh_link', 0x00000000)
-; PIC-NEXT: ('sh_info', 0x00000000)
-; PIC-NEXT: ('sh_addralign', 0x0000000000000008)
-; PIC-NEXT: ('sh_entsize', 0x0000000000000000)
-; PIC-NEXT: ('_section_data', '00000010 00000000 017a5200 01784101 1b0c0100 00000010 00000018 00000000 00000010 00000000')
-
-; PIC:      ('sh_name', 0x{{.*}}) # '.rela.eh_frame'
-; PIC-NEXT: ('sh_type', 0x00000004)
-; PIC-NEXT: ('sh_flags', 0x0000000000000000)
-; PIC-NEXT: ('sh_addr', 0x{{.*}})
-; PIC-NEXT: ('sh_offset', 0x{{.*}})
-; PIC-NEXT: ('sh_size', 0x0000000000000018)
-; PIC-NEXT: ('sh_link', 0x{{.*}})
-; PIC-NEXT: ('sh_info', 0x{{.*}})
-; PIC-NEXT: ('sh_addralign', 0x0000000000000008)
-; PIC-NEXT: ('sh_entsize', 0x0000000000000018)
-; PIC-NEXT: ('_relocations', [
-
-; PIC build should create R_PPC64_REL32 relocations
-; PIC-NEXT:  # Relocation 0
-; PIC-NEXT:  (('r_offset', 0x000000000000001c)
-; PIC-NEXT:   ('r_sym', 0x{{.*}})
-; PIC-NEXT:   ('r_type', 0x0000001a)
-; PIC-NEXT:   ('r_addend', 0x0000000000000000)
-; PIC-NEXT:  ),
-; PIC-NEXT: ])
+; PIC:      Section {
+; PIC:        Name: .rela.eh_frame
+; PIC-NEXT:   Type: SHT_RELA
+; PIC-NEXT:   Flags [ (0x0)
+; PIC-NEXT:   ]
+; PIC-NEXT:   Address:
+; PIC-NEXT:   Offset:
+; PIC-NEXT:   Size: 24
+; PIC-NEXT:   Link:
+; PIC-NEXT:   Info:
+; PIC-NEXT:   AddressAlignment: 8
+; PIC-NEXT:   EntrySize: 24
diff --git a/test/MC/PowerPC/ppc64-operands.s b/test/MC/PowerPC/ppc64-operands.s
new file mode 100644
index 0000000..de5fcb0
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-operands.s
@@ -0,0 +1,87 @@
+
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck %s
+
+# Register operands
+
+# CHECK: add 1, 2, 3                     # encoding: [0x7c,0x22,0x1a,0x14]
+         add 1, 2, 3
+
+# CHECK: add 1, 2, 3                     # encoding: [0x7c,0x22,0x1a,0x14]
+         add %r1, %r2, %r3
+
+# CHECK: add 0, 0, 0                     # encoding: [0x7c,0x00,0x02,0x14]
+         add 0, 0, 0
+
+# CHECK: add 31, 31, 31                  # encoding: [0x7f,0xff,0xfa,0x14]
+         add 31, 31, 31
+
+# CHECK: addi 1, 0, 0                    # encoding: [0x38,0x20,0x00,0x00]
+         addi 1, 0, 0
+
+# CHECK: addi 1, 0, 0                    # encoding: [0x38,0x20,0x00,0x00]
+         addi 1, %r0, 0
+
+# Signed 16-bit immediate operands
+
+# CHECK: addi 1, 2, 0                    # encoding: [0x38,0x22,0x00,0x00]
+         addi 1, 2, 0
+
+# CHECK: addi 1, 0, -32768               # encoding: [0x38,0x20,0x80,0x00]
+         addi 1, 0, -32768
+
+# CHECK: addi 1, 0, 32767                # encoding: [0x38,0x20,0x7f,0xff]
+         addi 1, 0, 32767
+
+# Unsigned 16-bit immediate operands
+
+# CHECK: ori 1, 2, 0                     # encoding: [0x60,0x41,0x00,0x00]
+         ori 1, 2, 0
+
+# CHECK: ori 1, 2, 65535                 # encoding: [0x60,0x41,0xff,0xff]
+         ori 1, 2, 65535
+
+# D-Form memory operands
+
+# CHECK: lwz 1, 0(0)                     # encoding: [0x80,0x20,0x00,0x00]
+         lwz 1, 0(0)
+
+# CHECK: lwz 1, 0(0)                     # encoding: [0x80,0x20,0x00,0x00]
+         lwz 1, 0(%r0)
+
+# CHECK: lwz 1, 0(31)                    # encoding: [0x80,0x3f,0x00,0x00]
+         lwz 1, 0(31)
+
+# CHECK: lwz 1, 0(31)                    # encoding: [0x80,0x3f,0x00,0x00]
+         lwz 1, 0(%r31)
+
+# CHECK: lwz 1, -32768(2)                # encoding: [0x80,0x22,0x80,0x00]
+         lwz 1, -32768(2)
+
+# CHECK: lwz 1, 32767(2)                 # encoding: [0x80,0x22,0x7f,0xff]
+         lwz 1, 32767(2)
+
+
+# CHECK: ld 1, 0(0)                      # encoding: [0xe8,0x20,0x00,0x00]
+         ld 1, 0(0)
+
+# CHECK: ld 1, 0(0)                      # encoding: [0xe8,0x20,0x00,0x00]
+         ld 1, 0(%r0)
+
+# CHECK: ld 1, 0(31)                     # encoding: [0xe8,0x3f,0x00,0x00]
+         ld 1, 0(31)
+
+# CHECK: ld 1, 0(31)                     # encoding: [0xe8,0x3f,0x00,0x00]
+         ld 1, 0(%r31)
+
+# CHECK: ld 1, -32768(2)                 # encoding: [0xe8,0x22,0x80,0x00]
+         ld 1, -32768(2)
+
+# CHECK: ld 1, 32764(2)                  # encoding: [0xe8,0x22,0x7f,0xfc]
+         ld 1, 32764(2)
+
+# CHECK: ld 1, 4(2)                      # encoding: [0xe8,0x22,0x00,0x04]
+         ld 1, 4(2)
+
+# CHECK: ld 1, -4(2)                     # encoding: [0xe8,0x22,0xff,0xfc]
+         ld 1, -4(2)
+
diff --git a/test/MC/PowerPC/ppc64-relocs-01.ll b/test/MC/PowerPC/ppc64-relocs-01.ll
index 4919e91..ac8d303 100644
--- a/test/MC/PowerPC/ppc64-relocs-01.ll
+++ b/test/MC/PowerPC/ppc64-relocs-01.ll
@@ -1,6 +1,6 @@
 ;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -O3 -code-model=small  \
 ;; RUN:  -filetype=obj %s -o - | \
-;; RUN: elf-dump --dump-section-data | FileCheck %s
+;; RUN: llvm-readobj -r | FileCheck %s
 
 ;; FIXME: this file need to be in .s form, change when asm parse is done.
 
@@ -22,45 +22,28 @@ entry:
   ret double %add
 }
 
+;; CHECK:      Relocations [
+
 ;; The relocations in .rela.text are the 'number64' load using a
 ;; R_PPC64_TOC16_DS against the .toc and the 'sin' external function
 ;; address using a R_PPC64_REL24
-;; CHECK:       '.rela.text'
-;; CHECK:       Relocation 0
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x00000006
-;; CHECK-NEXT:  'r_type', 0x0000003f
-;; CHECK:       Relocation 1
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x0000000a
-;; CHECK-NEXT:  'r_type', 0x0000000a
+;; CHECK:        Section ({{[0-9]+}}) .text {
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC16_DS .toc
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_REL24    sin
+;; CHECK-NEXT:   }
 
 ;; The .opd entry for the 'access_int64' function creates 2 relocations:
 ;; 1. A R_PPC64_ADDR64 against the .text segment plus addend (the function
 ;    address itself);
 ;; 2. And a R_PPC64_TOC against no symbol (the linker will replace for the
 ;;    module's TOC base).
-;; CHECK:       '.rela.opd'
-;; CHECK:       Relocation 0
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x00000002
-;; CHECK-NEXT:  'r_type', 0x00000026
-;; CHECK:       Relocation 1
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x00000000
-;; CHECK-NEXT:  'r_type', 0x00000033
+;; CHECK:        Section ({{[0-9]+}}) .opd {
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_ADDR64 .text 0x0
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TOC - 0x0
 
 ;; Finally the TOC creates the relocation for the 'number64'.
-;; CHECK:       '.rela.toc'
-;; CHECK:       Relocation 0
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x00000008
-;; CHECK-NEXT:  'r_type', 0x00000026
+;; CHECK:        Section ({{[0-9]+}}) .toc {
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_ADDR64 number64 0x0
+;; CHECK-NEXT:   }
 
-;; Check if the relocation references are for correct symbols.
-;; CHECK:       Symbol 7
-;; CHECK-NEXT:  'access_int64'
-;; CHECK:       Symbol 8
-;; CHECK-NEXT:  'number64'
-;; CHECK:       Symbol 10
-;; CHECK-NEXT:  'sin'
+;; CHECK-NEXT: ]
diff --git a/test/MC/PowerPC/ppc64-tls-relocs-01.ll b/test/MC/PowerPC/ppc64-tls-relocs-01.ll
index 5e37311..4e901e8 100644
--- a/test/MC/PowerPC/ppc64-tls-relocs-01.ll
+++ b/test/MC/PowerPC/ppc64-tls-relocs-01.ll
@@ -1,5 +1,5 @@
 ;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj %s -o - | \
-;; RUN: elf-dump --dump-section-data | FileCheck %s
+;; RUN: llvm-readobj -r | FileCheck %s
 
 ;; FIXME: this file should be in .s form, change when asm parser is available.
 
@@ -12,17 +12,8 @@ entry:
 
 ;; Check for a pair of R_PPC64_TPREL16_HA / R_PPC64_TPREL16_LO relocs
 ;; against the thread-local symbol 't'.
-;; CHECK:       '.rela.text'
-;; CHECK:       Relocation 0
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x00000008
-;; CHECK-NEXT:  'r_type', 0x00000048
-;; CHECK:       Relocation 1
-;; CHECK-NEXT:  'r_offset',
-;; CHECK-NEXT:  'r_sym', 0x00000008
-;; CHECK-NEXT:  'r_type', 0x00000046
-
-;; Check that we got the correct symbol.
-;; CHECK:       Symbol 8
-;; CHECK-NEXT:  't'
-
+;; CHECK:      Relocations [
+;; CHECK:        Section ({{[0-9]+}}) .text {
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TPREL16_HA t
+;; CHECK-NEXT:     0x{{[0-9,A-F]+}} R_PPC64_TPREL16_LO t
+;; CHECK-NEXT:   }
diff --git a/test/MC/SystemZ/insn-a-01.s b/test/MC/SystemZ/insn-a-01.s
new file mode 100644
index 0000000..7bb94b3
--- /dev/null
+++ b/test/MC/SystemZ/insn-a-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: a	%r0, 0                  # encoding: [0x5a,0x00,0x00,0x00]
+#CHECK: a	%r0, 4095               # encoding: [0x5a,0x00,0x0f,0xff]
+#CHECK: a	%r0, 0(%r1)             # encoding: [0x5a,0x00,0x10,0x00]
+#CHECK: a	%r0, 0(%r15)            # encoding: [0x5a,0x00,0xf0,0x00]
+#CHECK: a	%r0, 4095(%r1,%r15)     # encoding: [0x5a,0x01,0xff,0xff]
+#CHECK: a	%r0, 4095(%r15,%r1)     # encoding: [0x5a,0x0f,0x1f,0xff]
+#CHECK: a	%r15, 0                 # encoding: [0x5a,0xf0,0x00,0x00]
+
+	a	%r0, 0
+	a	%r0, 4095
+	a	%r0, 0(%r1)
+	a	%r0, 0(%r15)
+	a	%r0, 4095(%r1,%r15)
+	a	%r0, 4095(%r15,%r1)
+	a	%r15, 0
diff --git a/test/MC/SystemZ/insn-a-02.s b/test/MC/SystemZ/insn-a-02.s
new file mode 100644
index 0000000..9cc967e
--- /dev/null
+++ b/test/MC/SystemZ/insn-a-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: a	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: a	%r0, 4096
+
+	a	%r0, -1
+	a	%r0, 4096
diff --git a/test/MC/SystemZ/insn-adb-01.s b/test/MC/SystemZ/insn-adb-01.s
new file mode 100644
index 0000000..b54be60
--- /dev/null
+++ b/test/MC/SystemZ/insn-adb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: adb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x1a]
+#CHECK: adb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1a]
+#CHECK: adb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x1a]
+#CHECK: adb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1a]
+#CHECK: adb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x1a]
+#CHECK: adb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1a]
+#CHECK: adb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1a]
+
+	adb	%f0, 0
+	adb	%f0, 4095
+	adb	%f0, 0(%r1)
+	adb	%f0, 0(%r15)
+	adb	%f0, 4095(%r1,%r15)
+	adb	%f0, 4095(%r15,%r1)
+	adb	%f15, 0
diff --git a/test/MC/SystemZ/insn-adb-02.s b/test/MC/SystemZ/insn-adb-02.s
new file mode 100644
index 0000000..ff97a51
--- /dev/null
+++ b/test/MC/SystemZ/insn-adb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: adb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: adb	%f0, 4096
+
+	adb	%f0, -1
+	adb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-adbr-01.s b/test/MC/SystemZ/insn-adbr-01.s
new file mode 100644
index 0000000..05724d2
--- /dev/null
+++ b/test/MC/SystemZ/insn-adbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: adbr	%f0, %f0                # encoding: [0xb3,0x1a,0x00,0x00]
+#CHECK: adbr	%f0, %f15               # encoding: [0xb3,0x1a,0x00,0x0f]
+#CHECK: adbr	%f7, %f8                # encoding: [0xb3,0x1a,0x00,0x78]
+#CHECK: adbr	%f15, %f0               # encoding: [0xb3,0x1a,0x00,0xf0]
+
+	adbr	%f0, %f0
+	adbr	%f0, %f15
+	adbr	%f7, %f8
+	adbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-aeb-01.s b/test/MC/SystemZ/insn-aeb-01.s
new file mode 100644
index 0000000..b4268e5
--- /dev/null
+++ b/test/MC/SystemZ/insn-aeb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: aeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x0a]
+#CHECK: aeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0a]
+#CHECK: aeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x0a]
+#CHECK: aeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0a]
+#CHECK: aeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x0a]
+#CHECK: aeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0a]
+#CHECK: aeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0a]
+
+	aeb	%f0, 0
+	aeb	%f0, 4095
+	aeb	%f0, 0(%r1)
+	aeb	%f0, 0(%r15)
+	aeb	%f0, 4095(%r1,%r15)
+	aeb	%f0, 4095(%r15,%r1)
+	aeb	%f15, 0
diff --git a/test/MC/SystemZ/insn-aeb-02.s b/test/MC/SystemZ/insn-aeb-02.s
new file mode 100644
index 0000000..4fade8e
--- /dev/null
+++ b/test/MC/SystemZ/insn-aeb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: aeb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: aeb	%f0, 4096
+
+	aeb	%f0, -1
+	aeb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-aebr-01.s b/test/MC/SystemZ/insn-aebr-01.s
new file mode 100644
index 0000000..2147627
--- /dev/null
+++ b/test/MC/SystemZ/insn-aebr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: aebr	%f0, %f0                # encoding: [0xb3,0x0a,0x00,0x00]
+#CHECK: aebr	%f0, %f15               # encoding: [0xb3,0x0a,0x00,0x0f]
+#CHECK: aebr	%f7, %f8                # encoding: [0xb3,0x0a,0x00,0x78]
+#CHECK: aebr	%f15, %f0               # encoding: [0xb3,0x0a,0x00,0xf0]
+
+	aebr	%f0, %f0
+	aebr	%f0, %f15
+	aebr	%f7, %f8
+	aebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-afi-01.s b/test/MC/SystemZ/insn-afi-01.s
new file mode 100644
index 0000000..f9a9118
--- /dev/null
+++ b/test/MC/SystemZ/insn-afi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: afi	%r0, -2147483648        # encoding: [0xc2,0x09,0x80,0x00,0x00,0x00]
+#CHECK: afi	%r0, -1                 # encoding: [0xc2,0x09,0xff,0xff,0xff,0xff]
+#CHECK: afi	%r0, 0                  # encoding: [0xc2,0x09,0x00,0x00,0x00,0x00]
+#CHECK: afi	%r0, 1                  # encoding: [0xc2,0x09,0x00,0x00,0x00,0x01]
+#CHECK: afi	%r0, 2147483647         # encoding: [0xc2,0x09,0x7f,0xff,0xff,0xff]
+#CHECK: afi	%r15, 0                 # encoding: [0xc2,0xf9,0x00,0x00,0x00,0x00]
+
+	afi	%r0, -1 << 31
+	afi	%r0, -1
+	afi	%r0, 0
+	afi	%r0, 1
+	afi	%r0, (1 << 31) - 1
+	afi	%r15, 0
diff --git a/test/MC/SystemZ/insn-afi-02.s b/test/MC/SystemZ/insn-afi-02.s
new file mode 100644
index 0000000..f848e19
--- /dev/null
+++ b/test/MC/SystemZ/insn-afi-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: afi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: afi	%r0, (1 << 31)
+
+	afi	%r0, (-1 << 31) - 1
+	afi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-ag-01.s b/test/MC/SystemZ/insn-ag-01.s
new file mode 100644
index 0000000..63029d7
--- /dev/null
+++ b/test/MC/SystemZ/insn-ag-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ag	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x08]
+#CHECK: ag	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x08]
+#CHECK: ag	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x08]
+#CHECK: ag	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x08]
+#CHECK: ag	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x08]
+#CHECK: ag	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x08]
+#CHECK: ag	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x08]
+#CHECK: ag	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x08]
+#CHECK: ag	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x08]
+#CHECK: ag	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x08]
+
+	ag	%r0, -524288
+	ag	%r0, -1
+	ag	%r0, 0
+	ag	%r0, 1
+	ag	%r0, 524287
+	ag	%r0, 0(%r1)
+	ag	%r0, 0(%r15)
+	ag	%r0, 524287(%r1,%r15)
+	ag	%r0, 524287(%r15,%r1)
+	ag	%r15, 0
diff --git a/test/MC/SystemZ/insn-ag-02.s b/test/MC/SystemZ/insn-ag-02.s
new file mode 100644
index 0000000..59694cd
--- /dev/null
+++ b/test/MC/SystemZ/insn-ag-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ag	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ag	%r0, 524288
+
+	ag	%r0, -524289
+	ag	%r0, 524288
diff --git a/test/MC/SystemZ/insn-agf-01.s b/test/MC/SystemZ/insn-agf-01.s
new file mode 100644
index 0000000..40a9858
--- /dev/null
+++ b/test/MC/SystemZ/insn-agf-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: agf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x18]
+#CHECK: agf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x18]
+#CHECK: agf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x18]
+#CHECK: agf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x18]
+#CHECK: agf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x18]
+#CHECK: agf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x18]
+#CHECK: agf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x18]
+#CHECK: agf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x18]
+#CHECK: agf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x18]
+#CHECK: agf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x18]
+
+	agf	%r0, -524288
+	agf	%r0, -1
+	agf	%r0, 0
+	agf	%r0, 1
+	agf	%r0, 524287
+	agf	%r0, 0(%r1)
+	agf	%r0, 0(%r15)
+	agf	%r0, 524287(%r1,%r15)
+	agf	%r0, 524287(%r15,%r1)
+	agf	%r15, 0
diff --git a/test/MC/SystemZ/insn-agf-02.s b/test/MC/SystemZ/insn-agf-02.s
new file mode 100644
index 0000000..dee31dc
--- /dev/null
+++ b/test/MC/SystemZ/insn-agf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: agf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: agf	%r0, 524288
+
+	agf	%r0, -524289
+	agf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-agfi-01.s b/test/MC/SystemZ/insn-agfi-01.s
new file mode 100644
index 0000000..a64721d
--- /dev/null
+++ b/test/MC/SystemZ/insn-agfi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: agfi	%r0, -2147483648        # encoding: [0xc2,0x08,0x80,0x00,0x00,0x00]
+#CHECK: agfi	%r0, -1                 # encoding: [0xc2,0x08,0xff,0xff,0xff,0xff]
+#CHECK: agfi	%r0, 0                  # encoding: [0xc2,0x08,0x00,0x00,0x00,0x00]
+#CHECK: agfi	%r0, 1                  # encoding: [0xc2,0x08,0x00,0x00,0x00,0x01]
+#CHECK: agfi	%r0, 2147483647         # encoding: [0xc2,0x08,0x7f,0xff,0xff,0xff]
+#CHECK: agfi	%r15, 0                 # encoding: [0xc2,0xf8,0x00,0x00,0x00,0x00]
+
+	agfi	%r0, -1 << 31
+	agfi	%r0, -1
+	agfi	%r0, 0
+	agfi	%r0, 1
+	agfi	%r0, (1 << 31) - 1
+	agfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-agfi-02.s b/test/MC/SystemZ/insn-agfi-02.s
new file mode 100644
index 0000000..1db3eaa
--- /dev/null
+++ b/test/MC/SystemZ/insn-agfi-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: agfi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: agfi	%r0, (1 << 31)
+
+	agfi	%r0, (-1 << 31) - 1
+	agfi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-agfr-01.s b/test/MC/SystemZ/insn-agfr-01.s
new file mode 100644
index 0000000..cd17db9
--- /dev/null
+++ b/test/MC/SystemZ/insn-agfr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: agfr	%r0, %r0                # encoding: [0xb9,0x18,0x00,0x00]
+#CHECK: agfr	%r0, %r15               # encoding: [0xb9,0x18,0x00,0x0f]
+#CHECK: agfr	%r15, %r0               # encoding: [0xb9,0x18,0x00,0xf0]
+#CHECK: agfr	%r7, %r8                # encoding: [0xb9,0x18,0x00,0x78]
+
+	agfr	%r0,%r0
+	agfr	%r0,%r15
+	agfr	%r15,%r0
+	agfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-aghi-01.s b/test/MC/SystemZ/insn-aghi-01.s
new file mode 100644
index 0000000..cd77c35
--- /dev/null
+++ b/test/MC/SystemZ/insn-aghi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: aghi	%r0, -32768             # encoding: [0xa7,0x0b,0x80,0x00]
+#CHECK: aghi	%r0, -1                 # encoding: [0xa7,0x0b,0xff,0xff]
+#CHECK: aghi	%r0, 0                  # encoding: [0xa7,0x0b,0x00,0x00]
+#CHECK: aghi	%r0, 1                  # encoding: [0xa7,0x0b,0x00,0x01]
+#CHECK: aghi	%r0, 32767              # encoding: [0xa7,0x0b,0x7f,0xff]
+#CHECK: aghi	%r15, 0                 # encoding: [0xa7,0xfb,0x00,0x00]
+
+	aghi	%r0, -32768
+	aghi	%r0, -1
+	aghi	%r0, 0
+	aghi	%r0, 1
+	aghi	%r0, 32767
+	aghi	%r15, 0
diff --git a/test/MC/SystemZ/insn-aghi-02.s b/test/MC/SystemZ/insn-aghi-02.s
new file mode 100644
index 0000000..e2657c6
--- /dev/null
+++ b/test/MC/SystemZ/insn-aghi-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: aghi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: aghi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: aghi	%r0, foo
+
+	aghi	%r0, -32769
+	aghi	%r0, 32768
+	aghi	%r0, foo
diff --git a/test/MC/SystemZ/insn-agr-01.s b/test/MC/SystemZ/insn-agr-01.s
new file mode 100644
index 0000000..a84ff3d
--- /dev/null
+++ b/test/MC/SystemZ/insn-agr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: agr	%r0, %r0                # encoding: [0xb9,0x08,0x00,0x00]
+#CHECK: agr	%r0, %r15               # encoding: [0xb9,0x08,0x00,0x0f]
+#CHECK: agr	%r15, %r0               # encoding: [0xb9,0x08,0x00,0xf0]
+#CHECK: agr	%r7, %r8                # encoding: [0xb9,0x08,0x00,0x78]
+
+	agr	%r0,%r0
+	agr	%r0,%r15
+	agr	%r15,%r0
+	agr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-agsi-01.s b/test/MC/SystemZ/insn-agsi-01.s
new file mode 100644
index 0000000..9b2fe4b
--- /dev/null
+++ b/test/MC/SystemZ/insn-agsi-01.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: agsi	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x7a]
+#CHECK: agsi	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x7a]
+#CHECK: agsi	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x7a]
+#CHECK: agsi	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x7a]
+#CHECK: agsi	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x7a]
+#CHECK: agsi	0, -128                 # encoding: [0xeb,0x80,0x00,0x00,0x00,0x7a]
+#CHECK: agsi	0, -1                   # encoding: [0xeb,0xff,0x00,0x00,0x00,0x7a]
+#CHECK: agsi	0, 1                    # encoding: [0xeb,0x01,0x00,0x00,0x00,0x7a]
+#CHECK: agsi	0, 127                  # encoding: [0xeb,0x7f,0x00,0x00,0x00,0x7a]
+#CHECK: agsi	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x7a]
+#CHECK: agsi	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x7a]
+#CHECK: agsi	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x7a]
+#CHECK: agsi	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x7a]
+
+	agsi	-524288, 0
+	agsi	-1, 0
+	agsi	0, 0
+	agsi	1, 0
+	agsi	524287, 0
+	agsi	0, -128
+	agsi	0, -1
+	agsi	0, 1
+	agsi	0, 127
+	agsi	0(%r1), 42
+	agsi	0(%r15), 42
+	agsi	524287(%r1), 42
+	agsi	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-agsi-02.s b/test/MC/SystemZ/insn-agsi-02.s
new file mode 100644
index 0000000..a4b3d9a
--- /dev/null
+++ b/test/MC/SystemZ/insn-agsi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: agsi	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: agsi	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: agsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: agsi	0, -129
+#CHECK: error: invalid operand
+#CHECK: agsi	0, 128
+
+	agsi	-524289, 0
+	agsi	524288, 0
+	agsi	0(%r1,%r2), 0
+	agsi	0, -129
+	agsi	0, 128
diff --git a/test/MC/SystemZ/insn-ah-01.s b/test/MC/SystemZ/insn-ah-01.s
new file mode 100644
index 0000000..35012f0
--- /dev/null
+++ b/test/MC/SystemZ/insn-ah-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ah	%r0, 0                  # encoding: [0x4a,0x00,0x00,0x00]
+#CHECK: ah	%r0, 4095               # encoding: [0x4a,0x00,0x0f,0xff]
+#CHECK: ah	%r0, 0(%r1)             # encoding: [0x4a,0x00,0x10,0x00]
+#CHECK: ah	%r0, 0(%r15)            # encoding: [0x4a,0x00,0xf0,0x00]
+#CHECK: ah	%r0, 4095(%r1,%r15)     # encoding: [0x4a,0x01,0xff,0xff]
+#CHECK: ah	%r0, 4095(%r15,%r1)     # encoding: [0x4a,0x0f,0x1f,0xff]
+#CHECK: ah	%r15, 0                 # encoding: [0x4a,0xf0,0x00,0x00]
+
+	ah	%r0, 0
+	ah	%r0, 4095
+	ah	%r0, 0(%r1)
+	ah	%r0, 0(%r15)
+	ah	%r0, 4095(%r1,%r15)
+	ah	%r0, 4095(%r15,%r1)
+	ah	%r15, 0
diff --git a/test/MC/SystemZ/insn-ah-02.s b/test/MC/SystemZ/insn-ah-02.s
new file mode 100644
index 0000000..1a20cd7
--- /dev/null
+++ b/test/MC/SystemZ/insn-ah-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ah	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: ah	%r0, 4096
+
+	ah	%r0, -1
+	ah	%r0, 4096
diff --git a/test/MC/SystemZ/insn-ahi-01.s b/test/MC/SystemZ/insn-ahi-01.s
new file mode 100644
index 0000000..e0a5fb3
--- /dev/null
+++ b/test/MC/SystemZ/insn-ahi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ahi	%r0, -32768             # encoding: [0xa7,0x0a,0x80,0x00]
+#CHECK: ahi	%r0, -1                 # encoding: [0xa7,0x0a,0xff,0xff]
+#CHECK: ahi	%r0, 0                  # encoding: [0xa7,0x0a,0x00,0x00]
+#CHECK: ahi	%r0, 1                  # encoding: [0xa7,0x0a,0x00,0x01]
+#CHECK: ahi	%r0, 32767              # encoding: [0xa7,0x0a,0x7f,0xff]
+#CHECK: ahi	%r15, 0                 # encoding: [0xa7,0xfa,0x00,0x00]
+
+	ahi	%r0, -32768
+	ahi	%r0, -1
+	ahi	%r0, 0
+	ahi	%r0, 1
+	ahi	%r0, 32767
+	ahi	%r15, 0
diff --git a/test/MC/SystemZ/insn-ahi-02.s b/test/MC/SystemZ/insn-ahi-02.s
new file mode 100644
index 0000000..d41e2da
--- /dev/null
+++ b/test/MC/SystemZ/insn-ahi-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ahi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: ahi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: ahi	%r0, foo
+
+	ahi	%r0, -32769
+	ahi	%r0, 32768
+	ahi	%r0, foo
diff --git a/test/MC/SystemZ/insn-ahy-01.s b/test/MC/SystemZ/insn-ahy-01.s
new file mode 100644
index 0000000..ff25dc5
--- /dev/null
+++ b/test/MC/SystemZ/insn-ahy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ahy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x7a]
+#CHECK: ahy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x7a]
+#CHECK: ahy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x7a]
+#CHECK: ahy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x7a]
+#CHECK: ahy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x7a]
+#CHECK: ahy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x7a]
+#CHECK: ahy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x7a]
+#CHECK: ahy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x7a]
+#CHECK: ahy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x7a]
+#CHECK: ahy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x7a]
+
+	ahy	%r0, -524288
+	ahy	%r0, -1
+	ahy	%r0, 0
+	ahy	%r0, 1
+	ahy	%r0, 524287
+	ahy	%r0, 0(%r1)
+	ahy	%r0, 0(%r15)
+	ahy	%r0, 524287(%r1,%r15)
+	ahy	%r0, 524287(%r15,%r1)
+	ahy	%r15, 0
diff --git a/test/MC/SystemZ/insn-ahy-02.s b/test/MC/SystemZ/insn-ahy-02.s
new file mode 100644
index 0000000..e725e14
--- /dev/null
+++ b/test/MC/SystemZ/insn-ahy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ahy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ahy	%r0, 524288
+
+	ahy	%r0, -524289
+	ahy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-al-01.s b/test/MC/SystemZ/insn-al-01.s
new file mode 100644
index 0000000..1efc33f
--- /dev/null
+++ b/test/MC/SystemZ/insn-al-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: al	%r0, 0                  # encoding: [0x5e,0x00,0x00,0x00]
+#CHECK: al	%r0, 4095               # encoding: [0x5e,0x00,0x0f,0xff]
+#CHECK: al	%r0, 0(%r1)             # encoding: [0x5e,0x00,0x10,0x00]
+#CHECK: al	%r0, 0(%r15)            # encoding: [0x5e,0x00,0xf0,0x00]
+#CHECK: al	%r0, 4095(%r1,%r15)     # encoding: [0x5e,0x01,0xff,0xff]
+#CHECK: al	%r0, 4095(%r15,%r1)     # encoding: [0x5e,0x0f,0x1f,0xff]
+#CHECK: al	%r15, 0                 # encoding: [0x5e,0xf0,0x00,0x00]
+
+	al	%r0, 0
+	al	%r0, 4095
+	al	%r0, 0(%r1)
+	al	%r0, 0(%r15)
+	al	%r0, 4095(%r1,%r15)
+	al	%r0, 4095(%r15,%r1)
+	al	%r15, 0
diff --git a/test/MC/SystemZ/insn-al-02.s b/test/MC/SystemZ/insn-al-02.s
new file mode 100644
index 0000000..39b1b06
--- /dev/null
+++ b/test/MC/SystemZ/insn-al-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: al	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: al	%r0, 4096
+
+	al	%r0, -1
+	al	%r0, 4096
diff --git a/test/MC/SystemZ/insn-alc-01.s b/test/MC/SystemZ/insn-alc-01.s
new file mode 100644
index 0000000..5f8be6a
--- /dev/null
+++ b/test/MC/SystemZ/insn-alc-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: alc	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x98]
+#CHECK: alc	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x98]
+#CHECK: alc	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x98]
+#CHECK: alc	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x98]
+#CHECK: alc	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x98]
+#CHECK: alc	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x98]
+#CHECK: alc	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x98]
+#CHECK: alc	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x98]
+#CHECK: alc	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x98]
+#CHECK: alc	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x98]
+
+	alc	%r0, -524288
+	alc	%r0, -1
+	alc	%r0, 0
+	alc	%r0, 1
+	alc	%r0, 524287
+	alc	%r0, 0(%r1)
+	alc	%r0, 0(%r15)
+	alc	%r0, 524287(%r1,%r15)
+	alc	%r0, 524287(%r15,%r1)
+	alc	%r15, 0
diff --git a/test/MC/SystemZ/insn-alc-02.s b/test/MC/SystemZ/insn-alc-02.s
new file mode 100644
index 0000000..9c082f2
--- /dev/null
+++ b/test/MC/SystemZ/insn-alc-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: alc	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: alc	%r0, 524288
+
+	alc	%r0, -524289
+	alc	%r0, 524288
diff --git a/test/MC/SystemZ/insn-alcg-01.s b/test/MC/SystemZ/insn-alcg-01.s
new file mode 100644
index 0000000..c05207e
--- /dev/null
+++ b/test/MC/SystemZ/insn-alcg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: alcg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x88]
+#CHECK: alcg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x88]
+#CHECK: alcg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x88]
+#CHECK: alcg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x88]
+#CHECK: alcg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x88]
+#CHECK: alcg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x88]
+#CHECK: alcg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x88]
+#CHECK: alcg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x88]
+#CHECK: alcg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x88]
+#CHECK: alcg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x88]
+
+	alcg	%r0, -524288
+	alcg	%r0, -1
+	alcg	%r0, 0
+	alcg	%r0, 1
+	alcg	%r0, 524287
+	alcg	%r0, 0(%r1)
+	alcg	%r0, 0(%r15)
+	alcg	%r0, 524287(%r1,%r15)
+	alcg	%r0, 524287(%r15,%r1)
+	alcg	%r15, 0
diff --git a/test/MC/SystemZ/insn-alcg-02.s b/test/MC/SystemZ/insn-alcg-02.s
new file mode 100644
index 0000000..3dab6dd
--- /dev/null
+++ b/test/MC/SystemZ/insn-alcg-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: alcg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: alcg	%r0, 524288
+
+	alcg	%r0, -524289
+	alcg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-alcgr-01.s b/test/MC/SystemZ/insn-alcgr-01.s
new file mode 100644
index 0000000..c9f3ce2
--- /dev/null
+++ b/test/MC/SystemZ/insn-alcgr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: alcgr	%r0, %r0                # encoding: [0xb9,0x88,0x00,0x00]
+#CHECK: alcgr	%r0, %r15               # encoding: [0xb9,0x88,0x00,0x0f]
+#CHECK: alcgr	%r15, %r0               # encoding: [0xb9,0x88,0x00,0xf0]
+#CHECK: alcgr	%r7, %r8                # encoding: [0xb9,0x88,0x00,0x78]
+
+	alcgr	%r0,%r0
+	alcgr	%r0,%r15
+	alcgr	%r15,%r0
+	alcgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-alcr-01.s b/test/MC/SystemZ/insn-alcr-01.s
new file mode 100644
index 0000000..7369224
--- /dev/null
+++ b/test/MC/SystemZ/insn-alcr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: alcr	%r0, %r0                # encoding: [0xb9,0x98,0x00,0x00]
+#CHECK: alcr	%r0, %r15               # encoding: [0xb9,0x98,0x00,0x0f]
+#CHECK: alcr	%r15, %r0               # encoding: [0xb9,0x98,0x00,0xf0]
+#CHECK: alcr	%r7, %r8                # encoding: [0xb9,0x98,0x00,0x78]
+
+	alcr	%r0,%r0
+	alcr	%r0,%r15
+	alcr	%r15,%r0
+	alcr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-alfi-01.s b/test/MC/SystemZ/insn-alfi-01.s
new file mode 100644
index 0000000..332a74f
--- /dev/null
+++ b/test/MC/SystemZ/insn-alfi-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: alfi	%r0, 0                  # encoding: [0xc2,0x0b,0x00,0x00,0x00,0x00]
+#CHECK: alfi	%r0, 4294967295         # encoding: [0xc2,0x0b,0xff,0xff,0xff,0xff]
+#CHECK: alfi	%r15, 0                 # encoding: [0xc2,0xfb,0x00,0x00,0x00,0x00]
+
+	alfi	%r0, 0
+	alfi	%r0, (1 << 32) - 1
+	alfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-alfi-02.s b/test/MC/SystemZ/insn-alfi-02.s
new file mode 100644
index 0000000..a5d3894
--- /dev/null
+++ b/test/MC/SystemZ/insn-alfi-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: alfi	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: alfi	%r0, (1 << 32)
+
+	alfi	%r0, -1
+	alfi	%r0, (1 << 32)
diff --git a/test/MC/SystemZ/insn-alg-01.s b/test/MC/SystemZ/insn-alg-01.s
new file mode 100644
index 0000000..6df084c
--- /dev/null
+++ b/test/MC/SystemZ/insn-alg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: alg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0a]
+#CHECK: alg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0a]
+#CHECK: alg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0a]
+#CHECK: alg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0a]
+#CHECK: alg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0a]
+#CHECK: alg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0a]
+#CHECK: alg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0a]
+#CHECK: alg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0a]
+#CHECK: alg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0a]
+#CHECK: alg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x0a]
+
+	alg	%r0, -524288
+	alg	%r0, -1
+	alg	%r0, 0
+	alg	%r0, 1
+	alg	%r0, 524287
+	alg	%r0, 0(%r1)
+	alg	%r0, 0(%r15)
+	alg	%r0, 524287(%r1,%r15)
+	alg	%r0, 524287(%r15,%r1)
+	alg	%r15, 0
diff --git a/test/MC/SystemZ/insn-alg-02.s b/test/MC/SystemZ/insn-alg-02.s
new file mode 100644
index 0000000..407d73d
--- /dev/null
+++ b/test/MC/SystemZ/insn-alg-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: alg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: alg	%r0, 524288
+
+	alg	%r0, -524289
+	alg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-algf-01.s b/test/MC/SystemZ/insn-algf-01.s
new file mode 100644
index 0000000..751b590
--- /dev/null
+++ b/test/MC/SystemZ/insn-algf-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: algf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1a]
+#CHECK: algf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1a]
+#CHECK: algf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1a]
+#CHECK: algf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1a]
+#CHECK: algf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1a]
+#CHECK: algf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1a]
+#CHECK: algf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1a]
+#CHECK: algf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1a]
+#CHECK: algf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1a]
+#CHECK: algf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x1a]
+
+	algf	%r0, -524288
+	algf	%r0, -1
+	algf	%r0, 0
+	algf	%r0, 1
+	algf	%r0, 524287
+	algf	%r0, 0(%r1)
+	algf	%r0, 0(%r15)
+	algf	%r0, 524287(%r1,%r15)
+	algf	%r0, 524287(%r15,%r1)
+	algf	%r15, 0
diff --git a/test/MC/SystemZ/insn-algf-02.s b/test/MC/SystemZ/insn-algf-02.s
new file mode 100644
index 0000000..64ef1c9
--- /dev/null
+++ b/test/MC/SystemZ/insn-algf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: algf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: algf	%r0, 524288
+
+	algf	%r0, -524289
+	algf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-algfi-01.s b/test/MC/SystemZ/insn-algfi-01.s
new file mode 100644
index 0000000..b6ccb33
--- /dev/null
+++ b/test/MC/SystemZ/insn-algfi-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: algfi	%r0, 0                  # encoding: [0xc2,0x0a,0x00,0x00,0x00,0x00]
+#CHECK: algfi	%r0, 4294967295         # encoding: [0xc2,0x0a,0xff,0xff,0xff,0xff]
+#CHECK: algfi	%r15, 0                 # encoding: [0xc2,0xfa,0x00,0x00,0x00,0x00]
+
+	algfi	%r0, 0
+	algfi	%r0, (1 << 32) - 1
+	algfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-algfi-02.s b/test/MC/SystemZ/insn-algfi-02.s
new file mode 100644
index 0000000..a5ed4b0
--- /dev/null
+++ b/test/MC/SystemZ/insn-algfi-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: algfi	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: algfi	%r0, (1 << 32)
+
+	algfi	%r0, -1
+	algfi	%r0, (1 << 32)
diff --git a/test/MC/SystemZ/insn-algfr-01.s b/test/MC/SystemZ/insn-algfr-01.s
new file mode 100644
index 0000000..3ccb692
--- /dev/null
+++ b/test/MC/SystemZ/insn-algfr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: algfr	%r0, %r0                # encoding: [0xb9,0x1a,0x00,0x00]
+#CHECK: algfr	%r0, %r15               # encoding: [0xb9,0x1a,0x00,0x0f]
+#CHECK: algfr	%r15, %r0               # encoding: [0xb9,0x1a,0x00,0xf0]
+#CHECK: algfr	%r7, %r8                # encoding: [0xb9,0x1a,0x00,0x78]
+
+	algfr	%r0,%r0
+	algfr	%r0,%r15
+	algfr	%r15,%r0
+	algfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-algr-01.s b/test/MC/SystemZ/insn-algr-01.s
new file mode 100644
index 0000000..c3758ee
--- /dev/null
+++ b/test/MC/SystemZ/insn-algr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: algr	%r0, %r0                # encoding: [0xb9,0x0a,0x00,0x00]
+#CHECK: algr	%r0, %r15               # encoding: [0xb9,0x0a,0x00,0x0f]
+#CHECK: algr	%r15, %r0               # encoding: [0xb9,0x0a,0x00,0xf0]
+#CHECK: algr	%r7, %r8                # encoding: [0xb9,0x0a,0x00,0x78]
+
+	algr	%r0,%r0
+	algr	%r0,%r15
+	algr	%r15,%r0
+	algr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-alr-01.s b/test/MC/SystemZ/insn-alr-01.s
new file mode 100644
index 0000000..e85173e
--- /dev/null
+++ b/test/MC/SystemZ/insn-alr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: alr	%r0, %r0                # encoding: [0x1e,0x00]
+#CHECK: alr	%r0, %r15               # encoding: [0x1e,0x0f]
+#CHECK: alr	%r15, %r0               # encoding: [0x1e,0xf0]
+#CHECK: alr	%r7, %r8                # encoding: [0x1e,0x78]
+
+	alr	%r0,%r0
+	alr	%r0,%r15
+	alr	%r15,%r0
+	alr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-aly-01.s b/test/MC/SystemZ/insn-aly-01.s
new file mode 100644
index 0000000..94afb07
--- /dev/null
+++ b/test/MC/SystemZ/insn-aly-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: aly	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x5e]
+#CHECK: aly	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x5e]
+#CHECK: aly	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x5e]
+#CHECK: aly	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x5e]
+#CHECK: aly	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x5e]
+#CHECK: aly	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x5e]
+#CHECK: aly	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x5e]
+#CHECK: aly	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x5e]
+#CHECK: aly	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x5e]
+#CHECK: aly	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x5e]
+
+	aly	%r0, -524288
+	aly	%r0, -1
+	aly	%r0, 0
+	aly	%r0, 1
+	aly	%r0, 524287
+	aly	%r0, 0(%r1)
+	aly	%r0, 0(%r15)
+	aly	%r0, 524287(%r1,%r15)
+	aly	%r0, 524287(%r15,%r1)
+	aly	%r15, 0
diff --git a/test/MC/SystemZ/insn-aly-02.s b/test/MC/SystemZ/insn-aly-02.s
new file mode 100644
index 0000000..01c6f3d
--- /dev/null
+++ b/test/MC/SystemZ/insn-aly-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: aly	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: aly	%r0, 524288
+
+	aly	%r0, -524289
+	aly	%r0, 524288
diff --git a/test/MC/SystemZ/insn-ar-01.s b/test/MC/SystemZ/insn-ar-01.s
new file mode 100644
index 0000000..7cd627a
--- /dev/null
+++ b/test/MC/SystemZ/insn-ar-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ar	%r0, %r0                # encoding: [0x1a,0x00]
+#CHECK: ar	%r0, %r15               # encoding: [0x1a,0x0f]
+#CHECK: ar	%r15, %r0               # encoding: [0x1a,0xf0]
+#CHECK: ar	%r7, %r8                # encoding: [0x1a,0x78]
+
+	ar	%r0,%r0
+	ar	%r0,%r15
+	ar	%r15,%r0
+	ar	%r7,%r8
diff --git a/test/MC/SystemZ/insn-asi-01.s b/test/MC/SystemZ/insn-asi-01.s
new file mode 100644
index 0000000..7a1d241
--- /dev/null
+++ b/test/MC/SystemZ/insn-asi-01.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: asi	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x6a]
+#CHECK: asi	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x6a]
+#CHECK: asi	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x6a]
+#CHECK: asi	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x6a]
+#CHECK: asi	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x6a]
+#CHECK: asi	0, -128                 # encoding: [0xeb,0x80,0x00,0x00,0x00,0x6a]
+#CHECK: asi	0, -1                   # encoding: [0xeb,0xff,0x00,0x00,0x00,0x6a]
+#CHECK: asi	0, 1                    # encoding: [0xeb,0x01,0x00,0x00,0x00,0x6a]
+#CHECK: asi	0, 127                  # encoding: [0xeb,0x7f,0x00,0x00,0x00,0x6a]
+#CHECK: asi	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x6a]
+#CHECK: asi	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x6a]
+#CHECK: asi	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x6a]
+#CHECK: asi	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x6a]
+
+	asi	-524288, 0
+	asi	-1, 0
+	asi	0, 0
+	asi	1, 0
+	asi	524287, 0
+	asi	0, -128
+	asi	0, -1
+	asi	0, 1
+	asi	0, 127
+	asi	0(%r1), 42
+	asi	0(%r15), 42
+	asi	524287(%r1), 42
+	asi	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-asi-02.s b/test/MC/SystemZ/insn-asi-02.s
new file mode 100644
index 0000000..3c09f90
--- /dev/null
+++ b/test/MC/SystemZ/insn-asi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: asi	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: asi	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: asi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: asi	0, -129
+#CHECK: error: invalid operand
+#CHECK: asi	0, 128
+
+	asi	-524289, 0
+	asi	524288, 0
+	asi	0(%r1,%r2), 0
+	asi	0, -129
+	asi	0, 128
diff --git a/test/MC/SystemZ/insn-axbr-01.s b/test/MC/SystemZ/insn-axbr-01.s
new file mode 100644
index 0000000..cb592ef
--- /dev/null
+++ b/test/MC/SystemZ/insn-axbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: axbr	%f0, %f0                # encoding: [0xb3,0x4a,0x00,0x00]
+#CHECK: axbr	%f0, %f13               # encoding: [0xb3,0x4a,0x00,0x0d]
+#CHECK: axbr	%f8, %f8                # encoding: [0xb3,0x4a,0x00,0x88]
+#CHECK: axbr	%f13, %f0               # encoding: [0xb3,0x4a,0x00,0xd0]
+
+	axbr	%f0, %f0
+	axbr	%f0, %f13
+	axbr	%f8, %f8
+	axbr	%f13, %f0
diff --git a/test/MC/SystemZ/insn-axbr-02.s b/test/MC/SystemZ/insn-axbr-02.s
new file mode 100644
index 0000000..307664d
--- /dev/null
+++ b/test/MC/SystemZ/insn-axbr-02.s
@@ -0,0 +1,17 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: axbr	%f0, %f2
+#CHECK: error: invalid register
+#CHECK: axbr	%f0, %f14
+#CHECK: error: invalid register
+#CHECK: axbr	%f2, %f0
+#CHECK: error: invalid register
+#CHECK: axbr	%f14, %f0
+
+	axbr	%f0, %f2
+	axbr	%f0, %f14
+	axbr	%f2, %f0
+	axbr	%f14, %f0
+
diff --git a/test/MC/SystemZ/insn-ay-01.s b/test/MC/SystemZ/insn-ay-01.s
new file mode 100644
index 0000000..3b65c9a
--- /dev/null
+++ b/test/MC/SystemZ/insn-ay-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ay	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x5a]
+#CHECK: ay	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x5a]
+#CHECK: ay	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x5a]
+#CHECK: ay	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x5a]
+#CHECK: ay	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x5a]
+#CHECK: ay	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x5a]
+#CHECK: ay	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x5a]
+#CHECK: ay	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x5a]
+#CHECK: ay	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x5a]
+#CHECK: ay	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x5a]
+
+	ay	%r0, -524288
+	ay	%r0, -1
+	ay	%r0, 0
+	ay	%r0, 1
+	ay	%r0, 524287
+	ay	%r0, 0(%r1)
+	ay	%r0, 0(%r15)
+	ay	%r0, 524287(%r1,%r15)
+	ay	%r0, 524287(%r15,%r1)
+	ay	%r15, 0
diff --git a/test/MC/SystemZ/insn-ay-02.s b/test/MC/SystemZ/insn-ay-02.s
new file mode 100644
index 0000000..09704dfb
--- /dev/null
+++ b/test/MC/SystemZ/insn-ay-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ay	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ay	%r0, 524288
+
+	ay	%r0, -524289
+	ay	%r0, 524288
diff --git a/test/MC/SystemZ/insn-basr-01.s b/test/MC/SystemZ/insn-basr-01.s
new file mode 100644
index 0000000..a66cee8
--- /dev/null
+++ b/test/MC/SystemZ/insn-basr-01.s
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: basr	%r0, %r1                # encoding: [0x0d,0x01]
+#CHECK: basr	%r0, %r15               # encoding: [0x0d,0x0f]
+#CHECK: basr	%r14, %r9               # encoding: [0x0d,0xe9]
+#CHECK: basr	%r15, %r1               # encoding: [0x0d,0xf1]
+
+	basr	%r0,%r1
+	basr	%r0,%r15
+	basr	%r14,%r9
+	basr	%r15,%r1
+
diff --git a/test/MC/SystemZ/insn-br-01.s b/test/MC/SystemZ/insn-br-01.s
new file mode 100644
index 0000000..8e2f2aa
--- /dev/null
+++ b/test/MC/SystemZ/insn-br-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: br	%r1                     # encoding: [0x07,0xf1]
+#CHECK: br	%r14                    # encoding: [0x07,0xfe]
+#CHECK: br	%r15                    # encoding: [0x07,0xff]
+
+	br	%r1
+	br	%r14
+	br	%r15
diff --git a/test/MC/SystemZ/insn-bras-01.s b/test/MC/SystemZ/insn-bras-01.s
new file mode 100644
index 0000000..89f7f77
--- /dev/null
+++ b/test/MC/SystemZ/insn-bras-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: bras	%r0, foo                # encoding: [0xa7,0x05,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: bras	%r14, foo               # encoding: [0xa7,0xe5,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: bras	%r15, foo               # encoding: [0xa7,0xf5,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	bras	%r0,foo
+	bras	%r14,foo
+	bras	%r15,foo
+
+#CHECK: bras	%r0, bar+100                # encoding: [0xa7,0x05,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+#CHECK: bras	%r14, bar+100               # encoding: [0xa7,0xe5,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+#CHECK: bras	%r15, bar+100               # encoding: [0xa7,0xf5,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	bras	%r0,bar+100
+	bras	%r14,bar+100
+	bras	%r15,bar+100
+
+#CHECK: bras	%r0, bar@PLT                # encoding: [0xa7,0x05,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+#CHECK: bras	%r14, bar@PLT               # encoding: [0xa7,0xe5,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+#CHECK: bras	%r15, bar@PLT               # encoding: [0xa7,0xf5,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	bras	%r0,bar@PLT
+	bras	%r14,bar@PLT
+	bras	%r15,bar@PLT
diff --git a/test/MC/SystemZ/insn-brasl-01.s b/test/MC/SystemZ/insn-brasl-01.s
new file mode 100644
index 0000000..86d0ced9
--- /dev/null
+++ b/test/MC/SystemZ/insn-brasl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: brasl	%r0, foo                # encoding: [0xc0,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: brasl	%r14, foo               # encoding: [0xc0,0xe5,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: brasl	%r15, foo               # encoding: [0xc0,0xf5,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brasl	%r0,foo
+	brasl	%r14,foo
+	brasl	%r15,foo
+
+#CHECK: brasl	%r0, bar+100                # encoding: [0xc0,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: brasl	%r14, bar+100               # encoding: [0xc0,0xe5,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: brasl	%r15, bar+100               # encoding: [0xc0,0xf5,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	brasl	%r0,bar+100
+	brasl	%r14,bar+100
+	brasl	%r15,bar+100
+
+#CHECK: brasl	%r0, bar@PLT                # encoding: [0xc0,0x05,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+#CHECK: brasl	%r14, bar@PLT               # encoding: [0xc0,0xe5,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+#CHECK: brasl	%r15, bar@PLT               # encoding: [0xc0,0xf5,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	brasl	%r0,bar@PLT
+	brasl	%r14,bar@PLT
+	brasl	%r15,bar@PLT
diff --git a/test/MC/SystemZ/insn-brc-01.s b/test/MC/SystemZ/insn-brc-01.s
new file mode 100644
index 0000000..a92ea45
--- /dev/null
+++ b/test/MC/SystemZ/insn-brc-01.s
@@ -0,0 +1,238 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: brc	0, foo                  # encoding: [0xa7,0x04,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	0, foo
+
+#CHECK: brc	1, foo                  # encoding: [0xa7,0x14,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jo	foo                     # encoding: [0xa7,0x14,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	1, foo
+	jo	foo
+
+#CHECK: brc	2, foo                  # encoding: [0xa7,0x24,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jh	foo                     # encoding: [0xa7,0x24,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	2, foo
+	jh	foo
+
+#CHECK: brc	3, foo                  # encoding: [0xa7,0x34,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jnle	foo                     # encoding: [0xa7,0x34,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	3, foo
+	jnle	foo
+
+#CHECK: brc	4, foo                  # encoding: [0xa7,0x44,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jl	foo                     # encoding: [0xa7,0x44,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	4, foo
+	jl	foo
+
+#CHECK: brc	5, foo                  # encoding: [0xa7,0x54,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jnhe	foo                     # encoding: [0xa7,0x54,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	5, foo
+	jnhe	foo
+
+#CHECK: brc	6, foo                  # encoding: [0xa7,0x64,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jlh	foo                     # encoding: [0xa7,0x64,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	6, foo
+	jlh	foo
+
+#CHECK: brc	7, foo                  # encoding: [0xa7,0x74,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jne	foo                     # encoding: [0xa7,0x74,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	7, foo
+	jne	foo
+
+#CHECK: brc	8, foo                  # encoding: [0xa7,0x84,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: je	foo                     # encoding: [0xa7,0x84,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	8, foo
+	je	foo
+
+#CHECK: brc	9, foo                  # encoding: [0xa7,0x94,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jnlh	foo                     # encoding: [0xa7,0x94,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	9, foo
+	jnlh	foo
+
+#CHECK: brc	10, foo                 # encoding: [0xa7,0xa4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jhe	foo                     # encoding: [0xa7,0xa4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	10, foo
+	jhe	foo
+
+#CHECK: brc	11, foo                 # encoding: [0xa7,0xb4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jnl	foo                     # encoding: [0xa7,0xb4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	11, foo
+	jnl	foo
+
+#CHECK: brc	12, foo                 # encoding: [0xa7,0xc4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jle	foo                     # encoding: [0xa7,0xc4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	12, foo
+	jle	foo
+
+#CHECK: brc	13, foo                 # encoding: [0xa7,0xd4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jnh	foo                     # encoding: [0xa7,0xd4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	13, foo
+	jnh	foo
+
+#CHECK: brc	14, foo                 # encoding: [0xa7,0xe4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: jno	foo                     # encoding: [0xa7,0xe4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	14, foo
+	jno	foo
+
+#CHECK: brc	15, foo                 # encoding: [0xa7,0xf4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+#CHECK: j	foo                     # encoding: [0xa7,0xf4,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC16DBL
+	brc	15, foo
+	j	foo
+
+#CHECK: brc	0, bar+100              # encoding: [0xa7,0x04,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	brc	0, bar+100
+
+#CHECK: jo	bar+100                 # encoding: [0xa7,0x14,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jo	bar+100
+
+#CHECK: jh	bar+100                 # encoding: [0xa7,0x24,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jh	bar+100
+
+#CHECK: jnle	bar+100                 # encoding: [0xa7,0x34,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jnle	bar+100
+
+#CHECK: jl	bar+100                 # encoding: [0xa7,0x44,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jl	bar+100
+
+#CHECK: jnhe	bar+100                 # encoding: [0xa7,0x54,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jnhe	bar+100
+
+#CHECK: jlh	bar+100                 # encoding: [0xa7,0x64,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jlh	bar+100
+
+#CHECK: jne	bar+100                 # encoding: [0xa7,0x74,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jne	bar+100
+
+#CHECK: je	bar+100                 # encoding: [0xa7,0x84,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	je	bar+100
+
+#CHECK: jnlh	bar+100                 # encoding: [0xa7,0x94,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jnlh	bar+100
+
+#CHECK: jhe	bar+100                 # encoding: [0xa7,0xa4,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jhe	bar+100
+
+#CHECK: jnl	bar+100                 # encoding: [0xa7,0xb4,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jnl	bar+100
+
+#CHECK: jle	bar+100                 # encoding: [0xa7,0xc4,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jle	bar+100
+
+#CHECK: jnh	bar+100                 # encoding: [0xa7,0xd4,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jnh	bar+100
+
+#CHECK: jno	bar+100                 # encoding: [0xa7,0xe4,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	jno	bar+100
+
+#CHECK: j	bar+100                 # encoding: [0xa7,0xf4,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC16DBL
+	j	bar+100
+
+#CHECK: brc	0, bar@PLT              # encoding: [0xa7,0x04,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	brc	0, bar@PLT
+
+#CHECK: jo	bar@PLT                 # encoding: [0xa7,0x14,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jo	bar@PLT
+
+#CHECK: jh	bar@PLT                 # encoding: [0xa7,0x24,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jh	bar@PLT
+
+#CHECK: jnle	bar@PLT                 # encoding: [0xa7,0x34,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jnle	bar@PLT
+
+#CHECK: jl	bar@PLT                 # encoding: [0xa7,0x44,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jl	bar@PLT
+
+#CHECK: jnhe	bar@PLT                 # encoding: [0xa7,0x54,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jnhe	bar@PLT
+
+#CHECK: jlh	bar@PLT                 # encoding: [0xa7,0x64,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jlh	bar@PLT
+
+#CHECK: jne	bar@PLT                 # encoding: [0xa7,0x74,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jne	bar@PLT
+
+#CHECK: je	bar@PLT                 # encoding: [0xa7,0x84,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	je	bar@PLT
+
+#CHECK: jnlh	bar@PLT                 # encoding: [0xa7,0x94,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jnlh	bar@PLT
+
+#CHECK: jhe	bar@PLT                 # encoding: [0xa7,0xa4,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jhe	bar@PLT
+
+#CHECK: jnl	bar@PLT                 # encoding: [0xa7,0xb4,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jnl	bar@PLT
+
+#CHECK: jle	bar@PLT                 # encoding: [0xa7,0xc4,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jle	bar@PLT
+
+#CHECK: jnh	bar@PLT                 # encoding: [0xa7,0xd4,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jnh	bar@PLT
+
+#CHECK: jno	bar@PLT                 # encoding: [0xa7,0xe4,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	jno	bar@PLT
+
+#CHECK: j	bar@PLT                 # encoding: [0xa7,0xf4,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC16DBL
+	j	bar@PLT
diff --git a/test/MC/SystemZ/insn-brc-02.s b/test/MC/SystemZ/insn-brc-02.s
new file mode 100644
index 0000000..941cc45
--- /dev/null
+++ b/test/MC/SystemZ/insn-brc-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: brc	foo, bar
+#CHECK: error: invalid operand
+#CHECK: brc	-1, bar
+#CHECK: error: invalid operand
+#CHECK: brc	16, bar
+
+	brc	foo, bar
+	brc	-1, bar
+	brc	16, bar
diff --git a/test/MC/SystemZ/insn-brcl-01.s b/test/MC/SystemZ/insn-brcl-01.s
new file mode 100644
index 0000000..f7138bf
--- /dev/null
+++ b/test/MC/SystemZ/insn-brcl-01.s
@@ -0,0 +1,238 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: brcl	0, foo                  # encoding: [0xc0,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	0, foo
+
+#CHECK: brcl	1, foo                  # encoding: [0xc0,0x14,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgo	foo                     # encoding: [0xc0,0x14,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	1, foo
+	jgo	foo
+
+#CHECK: brcl	2, foo                  # encoding: [0xc0,0x24,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgh	foo                     # encoding: [0xc0,0x24,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	2, foo
+	jgh	foo
+
+#CHECK: brcl	3, foo                  # encoding: [0xc0,0x34,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgnle	foo                     # encoding: [0xc0,0x34,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	3, foo
+	jgnle	foo
+
+#CHECK: brcl	4, foo                  # encoding: [0xc0,0x44,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgl	foo                     # encoding: [0xc0,0x44,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	4, foo
+	jgl	foo
+
+#CHECK: brcl	5, foo                  # encoding: [0xc0,0x54,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgnhe	foo                     # encoding: [0xc0,0x54,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	5, foo
+	jgnhe	foo
+
+#CHECK: brcl	6, foo                  # encoding: [0xc0,0x64,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jglh	foo                     # encoding: [0xc0,0x64,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	6, foo
+	jglh	foo
+
+#CHECK: brcl	7, foo                  # encoding: [0xc0,0x74,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgne	foo                     # encoding: [0xc0,0x74,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	7, foo
+	jgne	foo
+
+#CHECK: brcl	8, foo                  # encoding: [0xc0,0x84,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jge	foo                     # encoding: [0xc0,0x84,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	8, foo
+	jge	foo
+
+#CHECK: brcl	9, foo                  # encoding: [0xc0,0x94,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgnlh	foo                     # encoding: [0xc0,0x94,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	9, foo
+	jgnlh	foo
+
+#CHECK: brcl	10, foo                 # encoding: [0xc0,0xa4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jghe	foo                     # encoding: [0xc0,0xa4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	10, foo
+	jghe	foo
+
+#CHECK: brcl	11, foo                 # encoding: [0xc0,0xb4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgnl	foo                     # encoding: [0xc0,0xb4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	11, foo
+	jgnl	foo
+
+#CHECK: brcl	12, foo                 # encoding: [0xc0,0xc4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgle	foo                     # encoding: [0xc0,0xc4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	12, foo
+	jgle	foo
+
+#CHECK: brcl	13, foo                 # encoding: [0xc0,0xd4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgnh	foo                     # encoding: [0xc0,0xd4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	13, foo
+	jgnh	foo
+
+#CHECK: brcl	14, foo                 # encoding: [0xc0,0xe4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jgno	foo                     # encoding: [0xc0,0xe4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	14, foo
+	jgno	foo
+
+#CHECK: brcl	15, foo                 # encoding: [0xc0,0xf4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: jg	foo                     # encoding: [0xc0,0xf4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+	brcl	15, foo
+	jg	foo
+
+#CHECK: brcl	0, bar+100              # encoding: [0xc0,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	brcl	0, bar+100
+
+#CHECK: jgo	bar+100                 # encoding: [0xc0,0x14,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgo	bar+100
+
+#CHECK: jgh	bar+100                 # encoding: [0xc0,0x24,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgh	bar+100
+
+#CHECK: jgnle	bar+100                 # encoding: [0xc0,0x34,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgnle	bar+100
+
+#CHECK: jgl	bar+100                 # encoding: [0xc0,0x44,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgl	bar+100
+
+#CHECK: jgnhe	bar+100                 # encoding: [0xc0,0x54,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgnhe	bar+100
+
+#CHECK: jglh	bar+100                 # encoding: [0xc0,0x64,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jglh	bar+100
+
+#CHECK: jgne	bar+100                 # encoding: [0xc0,0x74,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgne	bar+100
+
+#CHECK: jge	bar+100                 # encoding: [0xc0,0x84,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jge	bar+100
+
+#CHECK: jgnlh	bar+100                 # encoding: [0xc0,0x94,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgnlh	bar+100
+
+#CHECK: jghe	bar+100                 # encoding: [0xc0,0xa4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jghe	bar+100
+
+#CHECK: jgnl	bar+100                 # encoding: [0xc0,0xb4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgnl	bar+100
+
+#CHECK: jgle	bar+100                 # encoding: [0xc0,0xc4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgle	bar+100
+
+#CHECK: jgnh	bar+100                 # encoding: [0xc0,0xd4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgnh	bar+100
+
+#CHECK: jgno	bar+100                 # encoding: [0xc0,0xe4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jgno	bar+100
+
+#CHECK: jg	bar+100                 # encoding: [0xc0,0xf4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+	jg	bar+100
+
+#CHECK: brcl	0, bar@PLT              # encoding: [0xc0,0x04,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	brcl	0, bar@PLT
+
+#CHECK: jgo	bar@PLT                 # encoding: [0xc0,0x14,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgo	bar@PLT
+
+#CHECK: jgh	bar@PLT                 # encoding: [0xc0,0x24,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgh	bar@PLT
+
+#CHECK: jgnle	bar@PLT                 # encoding: [0xc0,0x34,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgnle	bar@PLT
+
+#CHECK: jgl	bar@PLT                 # encoding: [0xc0,0x44,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgl	bar@PLT
+
+#CHECK: jgnhe	bar@PLT                 # encoding: [0xc0,0x54,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgnhe	bar@PLT
+
+#CHECK: jglh	bar@PLT                 # encoding: [0xc0,0x64,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jglh	bar@PLT
+
+#CHECK: jgne	bar@PLT                 # encoding: [0xc0,0x74,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgne	bar@PLT
+
+#CHECK: jge	bar@PLT                 # encoding: [0xc0,0x84,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jge	bar@PLT
+
+#CHECK: jgnlh	bar@PLT                 # encoding: [0xc0,0x94,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgnlh	bar@PLT
+
+#CHECK: jghe	bar@PLT                 # encoding: [0xc0,0xa4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jghe	bar@PLT
+
+#CHECK: jgnl	bar@PLT                 # encoding: [0xc0,0xb4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgnl	bar@PLT
+
+#CHECK: jgle	bar@PLT                 # encoding: [0xc0,0xc4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgle	bar@PLT
+
+#CHECK: jgnh	bar@PLT                 # encoding: [0xc0,0xd4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgnh	bar@PLT
+
+#CHECK: jgno	bar@PLT                 # encoding: [0xc0,0xe4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jgno	bar@PLT
+
+#CHECK: jg	bar@PLT                 # encoding: [0xc0,0xf4,A,A,A,A]
+#CHECK:  fixup A - offset: 2, value: bar@PLT+2, kind: FK_390_PC32DBL
+	jg	bar@PLT
diff --git a/test/MC/SystemZ/insn-brcl-02.s b/test/MC/SystemZ/insn-brcl-02.s
new file mode 100644
index 0000000..ded5f7e
--- /dev/null
+++ b/test/MC/SystemZ/insn-brcl-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: brcl	foo, bar
+#CHECK: error: invalid operand
+#CHECK: brcl	-1, bar
+#CHECK: error: invalid operand
+#CHECK: brcl	16, bar
+
+	brcl	foo, bar
+	brcl	-1, bar
+	brcl	16, bar
diff --git a/test/MC/SystemZ/insn-c-01.s b/test/MC/SystemZ/insn-c-01.s
new file mode 100644
index 0000000..e8a8ada
--- /dev/null
+++ b/test/MC/SystemZ/insn-c-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: c	%r0, 0                  # encoding: [0x59,0x00,0x00,0x00]
+#CHECK: c	%r0, 4095               # encoding: [0x59,0x00,0x0f,0xff]
+#CHECK: c	%r0, 0(%r1)             # encoding: [0x59,0x00,0x10,0x00]
+#CHECK: c	%r0, 0(%r15)            # encoding: [0x59,0x00,0xf0,0x00]
+#CHECK: c	%r0, 4095(%r1,%r15)     # encoding: [0x59,0x01,0xff,0xff]
+#CHECK: c	%r0, 4095(%r15,%r1)     # encoding: [0x59,0x0f,0x1f,0xff]
+#CHECK: c	%r15, 0                 # encoding: [0x59,0xf0,0x00,0x00]
+
+	c	%r0, 0
+	c	%r0, 4095
+	c	%r0, 0(%r1)
+	c	%r0, 0(%r15)
+	c	%r0, 4095(%r1,%r15)
+	c	%r0, 4095(%r15,%r1)
+	c	%r15, 0
diff --git a/test/MC/SystemZ/insn-c-02.s b/test/MC/SystemZ/insn-c-02.s
new file mode 100644
index 0000000..81fe251
--- /dev/null
+++ b/test/MC/SystemZ/insn-c-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: c	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: c	%r0, 4096
+
+	c	%r0, -1
+	c	%r0, 4096
diff --git a/test/MC/SystemZ/insn-cdb-01.s b/test/MC/SystemZ/insn-cdb-01.s
new file mode 100644
index 0000000..7f6bb59
--- /dev/null
+++ b/test/MC/SystemZ/insn-cdb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x19]
+#CHECK: cdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x19]
+#CHECK: cdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x19]
+#CHECK: cdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x19]
+#CHECK: cdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x19]
+#CHECK: cdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x19]
+#CHECK: cdb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x19]
+
+	cdb	%f0, 0
+	cdb	%f0, 4095
+	cdb	%f0, 0(%r1)
+	cdb	%f0, 0(%r15)
+	cdb	%f0, 4095(%r1,%r15)
+	cdb	%f0, 4095(%r15,%r1)
+	cdb	%f15, 0
diff --git a/test/MC/SystemZ/insn-cdb-02.s b/test/MC/SystemZ/insn-cdb-02.s
new file mode 100644
index 0000000..5f02b84
--- /dev/null
+++ b/test/MC/SystemZ/insn-cdb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cdb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: cdb	%f0, 4096
+
+	cdb	%f0, -1
+	cdb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-cdbr-01.s b/test/MC/SystemZ/insn-cdbr-01.s
new file mode 100644
index 0000000..d2acfc0
--- /dev/null
+++ b/test/MC/SystemZ/insn-cdbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cdbr	%f0, %f0                # encoding: [0xb3,0x19,0x00,0x00]
+#CHECK: cdbr	%f0, %f15               # encoding: [0xb3,0x19,0x00,0x0f]
+#CHECK: cdbr	%f7, %f8                # encoding: [0xb3,0x19,0x00,0x78]
+#CHECK: cdbr	%f15, %f0               # encoding: [0xb3,0x19,0x00,0xf0]
+
+	cdbr	%f0, %f0
+	cdbr	%f0, %f15
+	cdbr	%f7, %f8
+	cdbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-cdfbr-01.s b/test/MC/SystemZ/insn-cdfbr-01.s
new file mode 100644
index 0000000..94c9b07
--- /dev/null
+++ b/test/MC/SystemZ/insn-cdfbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cdfbr	%f0, %r0                # encoding: [0xb3,0x95,0x00,0x00]
+#CHECK: cdfbr	%f0, %r15               # encoding: [0xb3,0x95,0x00,0x0f]
+#CHECK: cdfbr	%f15, %r0               # encoding: [0xb3,0x95,0x00,0xf0]
+#CHECK: cdfbr	%f7, %r8                # encoding: [0xb3,0x95,0x00,0x78]
+#CHECK: cdfbr	%f15, %r15              # encoding: [0xb3,0x95,0x00,0xff]
+
+	cdfbr	%f0, %r0
+	cdfbr	%f0, %r15
+	cdfbr	%f15, %r0
+	cdfbr	%f7, %r8
+	cdfbr	%f15, %r15
diff --git a/test/MC/SystemZ/insn-cdfbr-02.s b/test/MC/SystemZ/insn-cdfbr-02.s
new file mode 100644
index 0000000..14caa1e
--- /dev/null
+++ b/test/MC/SystemZ/insn-cdfbr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: cdfbr	%r0, %r0
+#CHECK: error: invalid register
+#CHECK: cdfbr	%f0, %f0
+#CHECK: error: invalid register
+#CHECK: cdfbr	%f0, %a0
+#CHECK: error: invalid register
+#CHECK: cdfbr	%a0, %r0
+
+	cdfbr	%r0, %r0
+	cdfbr	%f0, %f0
+	cdfbr	%f0, %a0
+	cdfbr	%a0, %r0
diff --git a/test/MC/SystemZ/insn-cdgbr-01.s b/test/MC/SystemZ/insn-cdgbr-01.s
new file mode 100644
index 0000000..6a994af
--- /dev/null
+++ b/test/MC/SystemZ/insn-cdgbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cdgbr	%f0, %r0                # encoding: [0xb3,0xa5,0x00,0x00]
+#CHECK: cdgbr	%f0, %r15               # encoding: [0xb3,0xa5,0x00,0x0f]
+#CHECK: cdgbr	%f15, %r0               # encoding: [0xb3,0xa5,0x00,0xf0]
+#CHECK: cdgbr	%f7, %r8                # encoding: [0xb3,0xa5,0x00,0x78]
+#CHECK: cdgbr	%f15, %r15              # encoding: [0xb3,0xa5,0x00,0xff]
+
+	cdgbr	%f0, %r0
+	cdgbr	%f0, %r15
+	cdgbr	%f15, %r0
+	cdgbr	%f7, %r8
+	cdgbr	%f15, %r15
diff --git a/test/MC/SystemZ/insn-cdgbr-02.s b/test/MC/SystemZ/insn-cdgbr-02.s
new file mode 100644
index 0000000..8fa9d4f
--- /dev/null
+++ b/test/MC/SystemZ/insn-cdgbr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: cdgbr	%r0, %r0
+#CHECK: error: invalid register
+#CHECK: cdgbr	%f0, %f0
+#CHECK: error: invalid register
+#CHECK: cdgbr	%f0, %a0
+#CHECK: error: invalid register
+#CHECK: cdgbr	%a0, %r0
+
+	cdgbr	%r0, %r0
+	cdgbr	%f0, %f0
+	cdgbr	%f0, %a0
+	cdgbr	%a0, %r0
diff --git a/test/MC/SystemZ/insn-ceb-01.s b/test/MC/SystemZ/insn-ceb-01.s
new file mode 100644
index 0000000..d576e9d
--- /dev/null
+++ b/test/MC/SystemZ/insn-ceb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ceb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x09]
+#CHECK: ceb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x09]
+#CHECK: ceb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x09]
+#CHECK: ceb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x09]
+#CHECK: ceb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x09]
+#CHECK: ceb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x09]
+#CHECK: ceb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x09]
+
+	ceb	%f0, 0
+	ceb	%f0, 4095
+	ceb	%f0, 0(%r1)
+	ceb	%f0, 0(%r15)
+	ceb	%f0, 4095(%r1,%r15)
+	ceb	%f0, 4095(%r15,%r1)
+	ceb	%f15, 0
diff --git a/test/MC/SystemZ/insn-ceb-02.s b/test/MC/SystemZ/insn-ceb-02.s
new file mode 100644
index 0000000..90829db
--- /dev/null
+++ b/test/MC/SystemZ/insn-ceb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ceb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: ceb	%f0, 4096
+
+	ceb	%f0, -1
+	ceb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-cebr-01.s b/test/MC/SystemZ/insn-cebr-01.s
new file mode 100644
index 0000000..b820e39
--- /dev/null
+++ b/test/MC/SystemZ/insn-cebr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cebr	%f0, %f0                # encoding: [0xb3,0x09,0x00,0x00]
+#CHECK: cebr	%f0, %f15               # encoding: [0xb3,0x09,0x00,0x0f]
+#CHECK: cebr	%f7, %f8                # encoding: [0xb3,0x09,0x00,0x78]
+#CHECK: cebr	%f15, %f0               # encoding: [0xb3,0x09,0x00,0xf0]
+
+	cebr	%f0, %f0
+	cebr	%f0, %f15
+	cebr	%f7, %f8
+	cebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-cefbr-01.s b/test/MC/SystemZ/insn-cefbr-01.s
new file mode 100644
index 0000000..f1068f5
--- /dev/null
+++ b/test/MC/SystemZ/insn-cefbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cefbr	%f0, %r0                # encoding: [0xb3,0x94,0x00,0x00]
+#CHECK: cefbr	%f0, %r15               # encoding: [0xb3,0x94,0x00,0x0f]
+#CHECK: cefbr	%f15, %r0               # encoding: [0xb3,0x94,0x00,0xf0]
+#CHECK: cefbr	%f7, %r8                # encoding: [0xb3,0x94,0x00,0x78]
+#CHECK: cefbr	%f15, %r15              # encoding: [0xb3,0x94,0x00,0xff]
+
+	cefbr	%f0, %r0
+	cefbr	%f0, %r15
+	cefbr	%f15, %r0
+	cefbr	%f7, %r8
+	cefbr	%f15, %r15
diff --git a/test/MC/SystemZ/insn-cefbr-02.s b/test/MC/SystemZ/insn-cefbr-02.s
new file mode 100644
index 0000000..b894fb9
--- /dev/null
+++ b/test/MC/SystemZ/insn-cefbr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: cefbr	%r0, %r0
+#CHECK: error: invalid register
+#CHECK: cefbr	%f0, %f0
+#CHECK: error: invalid register
+#CHECK: cefbr	%f0, %a0
+#CHECK: error: invalid register
+#CHECK: cefbr	%a0, %r0
+
+	cefbr	%r0, %r0
+	cefbr	%f0, %f0
+	cefbr	%f0, %a0
+	cefbr	%a0, %r0
diff --git a/test/MC/SystemZ/insn-cegbr-01.s b/test/MC/SystemZ/insn-cegbr-01.s
new file mode 100644
index 0000000..5b2e6ca
--- /dev/null
+++ b/test/MC/SystemZ/insn-cegbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cegbr	%f0, %r0                # encoding: [0xb3,0xa4,0x00,0x00]
+#CHECK: cegbr	%f0, %r15               # encoding: [0xb3,0xa4,0x00,0x0f]
+#CHECK: cegbr	%f15, %r0               # encoding: [0xb3,0xa4,0x00,0xf0]
+#CHECK: cegbr	%f7, %r8                # encoding: [0xb3,0xa4,0x00,0x78]
+#CHECK: cegbr	%f15, %r15              # encoding: [0xb3,0xa4,0x00,0xff]
+
+	cegbr	%f0, %r0
+	cegbr	%f0, %r15
+	cegbr	%f15, %r0
+	cegbr	%f7, %r8
+	cegbr	%f15, %r15
diff --git a/test/MC/SystemZ/insn-cegbr-02.s b/test/MC/SystemZ/insn-cegbr-02.s
new file mode 100644
index 0000000..bf0c31a
--- /dev/null
+++ b/test/MC/SystemZ/insn-cegbr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: cegbr	%r0, %r0
+#CHECK: error: invalid register
+#CHECK: cegbr	%f0, %f0
+#CHECK: error: invalid register
+#CHECK: cegbr	%f0, %a0
+#CHECK: error: invalid register
+#CHECK: cegbr	%a0, %r0
+
+	cegbr	%r0, %r0
+	cegbr	%f0, %f0
+	cegbr	%f0, %a0
+	cegbr	%a0, %r0
diff --git a/test/MC/SystemZ/insn-cfdbr-01.s b/test/MC/SystemZ/insn-cfdbr-01.s
new file mode 100644
index 0000000..be4f87f
--- /dev/null
+++ b/test/MC/SystemZ/insn-cfdbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cfdbr	%r0, 0, %f0             # encoding: [0xb3,0x99,0x00,0x00]
+#CHECK: cfdbr	%r0, 0, %f15            # encoding: [0xb3,0x99,0x00,0x0f]
+#CHECK: cfdbr	%r0, 15, %f0            # encoding: [0xb3,0x99,0xf0,0x00]
+#CHECK: cfdbr	%r4, 5, %f6             # encoding: [0xb3,0x99,0x50,0x46]
+#CHECK: cfdbr	%r15, 0, %f0            # encoding: [0xb3,0x99,0x00,0xf0]
+
+	cfdbr	%r0, 0, %f0
+	cfdbr	%r0, 0, %f15
+	cfdbr	%r0, 15, %f0
+	cfdbr	%r4, 5, %f6
+	cfdbr	%r15, 0, %f0
diff --git a/test/MC/SystemZ/insn-cfdbr-02.s b/test/MC/SystemZ/insn-cfdbr-02.s
new file mode 100644
index 0000000..0017595
--- /dev/null
+++ b/test/MC/SystemZ/insn-cfdbr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: cfdbr	%r0, 0, %r0
+#CHECK: error: invalid register
+#CHECK: cfdbr	%f0, 0, %f0
+#CHECK: error: invalid operand
+#CHECK: cfdbr	%r0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: cfdbr	%r0, 16, %f0
+
+	cfdbr	%r0, 0, %r0
+	cfdbr	%f0, 0, %f0
+	cfdbr	%r0, -1, %f0
+	cfdbr	%r0, 16, %f0
diff --git a/test/MC/SystemZ/insn-cfebr-01.s b/test/MC/SystemZ/insn-cfebr-01.s
new file mode 100644
index 0000000..6f7ab2c
--- /dev/null
+++ b/test/MC/SystemZ/insn-cfebr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cfebr	%r0, 0, %f0             # encoding: [0xb3,0x98,0x00,0x00]
+#CHECK: cfebr	%r0, 0, %f15            # encoding: [0xb3,0x98,0x00,0x0f]
+#CHECK: cfebr	%r0, 15, %f0            # encoding: [0xb3,0x98,0xf0,0x00]
+#CHECK: cfebr	%r4, 5, %f6             # encoding: [0xb3,0x98,0x50,0x46]
+#CHECK: cfebr	%r15, 0, %f0            # encoding: [0xb3,0x98,0x00,0xf0]
+
+	cfebr	%r0, 0, %f0
+	cfebr	%r0, 0, %f15
+	cfebr	%r0, 15, %f0
+	cfebr	%r4, 5, %f6
+	cfebr	%r15, 0, %f0
diff --git a/test/MC/SystemZ/insn-cfebr-02.s b/test/MC/SystemZ/insn-cfebr-02.s
new file mode 100644
index 0000000..c3c5ada
--- /dev/null
+++ b/test/MC/SystemZ/insn-cfebr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: cfebr	%r0, 0, %r0
+#CHECK: error: invalid register
+#CHECK: cfebr	%f0, 0, %f0
+#CHECK: error: invalid operand
+#CHECK: cfebr	%r0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: cfebr	%r0, 16, %f0
+
+	cfebr	%r0, 0, %r0
+	cfebr	%f0, 0, %f0
+	cfebr	%r0, -1, %f0
+	cfebr	%r0, 16, %f0
diff --git a/test/MC/SystemZ/insn-cfi-01.s b/test/MC/SystemZ/insn-cfi-01.s
new file mode 100644
index 0000000..52e34c0
--- /dev/null
+++ b/test/MC/SystemZ/insn-cfi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cfi	%r0, -2147483648        # encoding: [0xc2,0x0d,0x80,0x00,0x00,0x00]
+#CHECK: cfi	%r0, -1                 # encoding: [0xc2,0x0d,0xff,0xff,0xff,0xff]
+#CHECK: cfi	%r0, 0                  # encoding: [0xc2,0x0d,0x00,0x00,0x00,0x00]
+#CHECK: cfi	%r0, 1                  # encoding: [0xc2,0x0d,0x00,0x00,0x00,0x01]
+#CHECK: cfi	%r0, 2147483647         # encoding: [0xc2,0x0d,0x7f,0xff,0xff,0xff]
+#CHECK: cfi	%r15, 0                 # encoding: [0xc2,0xfd,0x00,0x00,0x00,0x00]
+
+	cfi	%r0, -1 << 31
+	cfi	%r0, -1
+	cfi	%r0, 0
+	cfi	%r0, 1
+	cfi	%r0, (1 << 31) - 1
+	cfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-cfi-02.s b/test/MC/SystemZ/insn-cfi-02.s
new file mode 100644
index 0000000..cf7c726
--- /dev/null
+++ b/test/MC/SystemZ/insn-cfi-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cfi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: cfi	%r0, (1 << 31)
+
+	cfi	%r0, (-1 << 31) - 1
+	cfi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-cfxbr-01.s b/test/MC/SystemZ/insn-cfxbr-01.s
new file mode 100644
index 0000000..c509106
--- /dev/null
+++ b/test/MC/SystemZ/insn-cfxbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cfxbr	%r0, 0, %f0             # encoding: [0xb3,0x9a,0x00,0x00]
+#CHECK: cfxbr	%r0, 0, %f13            # encoding: [0xb3,0x9a,0x00,0x0d]
+#CHECK: cfxbr	%r0, 15, %f0            # encoding: [0xb3,0x9a,0xf0,0x00]
+#CHECK: cfxbr	%r4, 5, %f8             # encoding: [0xb3,0x9a,0x50,0x48]
+#CHECK: cfxbr	%r15, 0, %f0            # encoding: [0xb3,0x9a,0x00,0xf0]
+
+	cfxbr	%r0, 0, %f0
+	cfxbr	%r0, 0, %f13
+	cfxbr	%r0, 15, %f0
+	cfxbr	%r4, 5, %f8
+	cfxbr	%r15, 0, %f0
diff --git a/test/MC/SystemZ/insn-cfxbr-02.s b/test/MC/SystemZ/insn-cfxbr-02.s
new file mode 100644
index 0000000..3802c51
--- /dev/null
+++ b/test/MC/SystemZ/insn-cfxbr-02.s
@@ -0,0 +1,23 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: cfxbr	%r0, 0, %r0
+#CHECK: error: invalid register
+#CHECK: cfxbr	%f0, 0, %f0
+#CHECK: error: invalid operand
+#CHECK: cfxbr	%r0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: cfxbr	%r0, 16, %f0
+#CHECK: error: invalid register
+#CHECK: cfxbr	%r0, 0, %f2
+#CHECK: error: invalid register
+#CHECK: cfxbr	%r0, 0, %f14
+
+	cfxbr	%r0, 0, %r0
+	cfxbr	%f0, 0, %f0
+	cfxbr	%r0, -1, %f0
+	cfxbr	%r0, 16, %f0
+	cfxbr	%r0, 0, %f2
+	cfxbr	%r0, 0, %f14
+
diff --git a/test/MC/SystemZ/insn-cg-01.s b/test/MC/SystemZ/insn-cg-01.s
new file mode 100644
index 0000000..1eb185f
--- /dev/null
+++ b/test/MC/SystemZ/insn-cg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x20]
+#CHECK: cg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x20]
+#CHECK: cg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x20]
+#CHECK: cg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x20]
+#CHECK: cg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x20]
+#CHECK: cg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x20]
+#CHECK: cg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x20]
+#CHECK: cg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x20]
+#CHECK: cg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x20]
+#CHECK: cg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x20]
+
+	cg	%r0, -524288
+	cg	%r0, -1
+	cg	%r0, 0
+	cg	%r0, 1
+	cg	%r0, 524287
+	cg	%r0, 0(%r1)
+	cg	%r0, 0(%r15)
+	cg	%r0, 524287(%r1,%r15)
+	cg	%r0, 524287(%r15,%r1)
+	cg	%r15, 0
diff --git a/test/MC/SystemZ/insn-cg-02.s b/test/MC/SystemZ/insn-cg-02.s
new file mode 100644
index 0000000..e093ccd
--- /dev/null
+++ b/test/MC/SystemZ/insn-cg-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: cg	%r0, 524288
+
+	cg	%r0, -524289
+	cg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-cgdbr-01.s b/test/MC/SystemZ/insn-cgdbr-01.s
new file mode 100644
index 0000000..718f50a
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgdbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cgdbr	%r0, 0, %f0             # encoding: [0xb3,0xa9,0x00,0x00]
+#CHECK: cgdbr	%r0, 0, %f15            # encoding: [0xb3,0xa9,0x00,0x0f]
+#CHECK: cgdbr	%r0, 15, %f0            # encoding: [0xb3,0xa9,0xf0,0x00]
+#CHECK: cgdbr	%r4, 5, %f6             # encoding: [0xb3,0xa9,0x50,0x46]
+#CHECK: cgdbr	%r15, 0, %f0            # encoding: [0xb3,0xa9,0x00,0xf0]
+
+	cgdbr	%r0, 0, %f0
+	cgdbr	%r0, 0, %f15
+	cgdbr	%r0, 15, %f0
+	cgdbr	%r4, 5, %f6
+	cgdbr	%r15, 0, %f0
diff --git a/test/MC/SystemZ/insn-cgdbr-02.s b/test/MC/SystemZ/insn-cgdbr-02.s
new file mode 100644
index 0000000..3a3e01f
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgdbr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: cgdbr	%r0, 0, %r0
+#CHECK: error: invalid register
+#CHECK: cgdbr	%f0, 0, %f0
+#CHECK: error: invalid operand
+#CHECK: cgdbr	%r0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: cgdbr	%r0, 16, %f0
+
+	cgdbr	%r0, 0, %r0
+	cgdbr	%f0, 0, %f0
+	cgdbr	%r0, -1, %f0
+	cgdbr	%r0, 16, %f0
diff --git a/test/MC/SystemZ/insn-cgebr-01.s b/test/MC/SystemZ/insn-cgebr-01.s
new file mode 100644
index 0000000..dc6a7db
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgebr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cgebr	%r0, 0, %f0             # encoding: [0xb3,0xa8,0x00,0x00]
+#CHECK: cgebr	%r0, 0, %f15            # encoding: [0xb3,0xa8,0x00,0x0f]
+#CHECK: cgebr	%r0, 15, %f0            # encoding: [0xb3,0xa8,0xf0,0x00]
+#CHECK: cgebr	%r4, 5, %f6             # encoding: [0xb3,0xa8,0x50,0x46]
+#CHECK: cgebr	%r15, 0, %f0            # encoding: [0xb3,0xa8,0x00,0xf0]
+
+	cgebr	%r0, 0, %f0
+	cgebr	%r0, 0, %f15
+	cgebr	%r0, 15, %f0
+	cgebr	%r4, 5, %f6
+	cgebr	%r15, 0, %f0
diff --git a/test/MC/SystemZ/insn-cgebr-02.s b/test/MC/SystemZ/insn-cgebr-02.s
new file mode 100644
index 0000000..9b817a4
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgebr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: cgebr	%r0, 0, %r0
+#CHECK: error: invalid register
+#CHECK: cgebr	%f0, 0, %f0
+#CHECK: error: invalid operand
+#CHECK: cgebr	%r0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: cgebr	%r0, 16, %f0
+
+	cgebr	%r0, 0, %r0
+	cgebr	%f0, 0, %f0
+	cgebr	%r0, -1, %f0
+	cgebr	%r0, 16, %f0
diff --git a/test/MC/SystemZ/insn-cgf-01.s b/test/MC/SystemZ/insn-cgf-01.s
new file mode 100644
index 0000000..03c439f
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgf-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x30]
+#CHECK: cgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x30]
+#CHECK: cgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x30]
+#CHECK: cgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x30]
+#CHECK: cgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x30]
+#CHECK: cgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x30]
+#CHECK: cgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x30]
+#CHECK: cgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x30]
+#CHECK: cgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x30]
+#CHECK: cgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x30]
+
+	cgf	%r0, -524288
+	cgf	%r0, -1
+	cgf	%r0, 0
+	cgf	%r0, 1
+	cgf	%r0, 524287
+	cgf	%r0, 0(%r1)
+	cgf	%r0, 0(%r15)
+	cgf	%r0, 524287(%r1,%r15)
+	cgf	%r0, 524287(%r15,%r1)
+	cgf	%r15, 0
diff --git a/test/MC/SystemZ/insn-cgf-02.s b/test/MC/SystemZ/insn-cgf-02.s
new file mode 100644
index 0000000..7171c6e
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: cgf	%r0, 524288
+
+	cgf	%r0, -524289
+	cgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-cgfi-01.s b/test/MC/SystemZ/insn-cgfi-01.s
new file mode 100644
index 0000000..d6f72d5
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgfi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cgfi	%r0, -2147483648        # encoding: [0xc2,0x0c,0x80,0x00,0x00,0x00]
+#CHECK: cgfi	%r0, -1                 # encoding: [0xc2,0x0c,0xff,0xff,0xff,0xff]
+#CHECK: cgfi	%r0, 0                  # encoding: [0xc2,0x0c,0x00,0x00,0x00,0x00]
+#CHECK: cgfi	%r0, 1                  # encoding: [0xc2,0x0c,0x00,0x00,0x00,0x01]
+#CHECK: cgfi	%r0, 2147483647         # encoding: [0xc2,0x0c,0x7f,0xff,0xff,0xff]
+#CHECK: cgfi	%r15, 0                 # encoding: [0xc2,0xfc,0x00,0x00,0x00,0x00]
+
+	cgfi	%r0, -1 << 31
+	cgfi	%r0, -1
+	cgfi	%r0, 0
+	cgfi	%r0, 1
+	cgfi	%r0, (1 << 31) - 1
+	cgfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-cgfi-02.s b/test/MC/SystemZ/insn-cgfi-02.s
new file mode 100644
index 0000000..6f72a24
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgfi-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cgfi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: cgfi	%r0, (1 << 31)
+
+	cgfi	%r0, (-1 << 31) - 1
+	cgfi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-cgfr-01.s b/test/MC/SystemZ/insn-cgfr-01.s
new file mode 100644
index 0000000..6bd1792
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgfr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cgfr	%r0, %r0                # encoding: [0xb9,0x30,0x00,0x00]
+#CHECK: cgfr	%r0, %r15               # encoding: [0xb9,0x30,0x00,0x0f]
+#CHECK: cgfr	%r15, %r0               # encoding: [0xb9,0x30,0x00,0xf0]
+#CHECK: cgfr	%r7, %r8                # encoding: [0xb9,0x30,0x00,0x78]
+
+	cgfr	%r0,%r0
+	cgfr	%r0,%r15
+	cgfr	%r15,%r0
+	cgfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-cgfrl-01.s b/test/MC/SystemZ/insn-cgfrl-01.s
new file mode 100644
index 0000000..2792fb4
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgfrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cgfrl	%r0, 2864434397         # encoding: [0xc6,0x0c,0x55,0x5d,0xe6,0x6e]
+#CHECK: cgfrl	%r15, 2864434397        # encoding: [0xc6,0xfc,0x55,0x5d,0xe6,0x6e]
+
+	cgfrl	%r0,0xaabbccdd
+	cgfrl	%r15,0xaabbccdd
+
+#CHECK: cgfrl	%r0, foo                # encoding: [0xc6,0x0c,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: cgfrl	%r15, foo               # encoding: [0xc6,0xfc,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	cgfrl	%r0,foo
+	cgfrl	%r15,foo
+
+#CHECK: cgfrl	%r3, bar+100            # encoding: [0xc6,0x3c,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: cgfrl	%r4, bar+100            # encoding: [0xc6,0x4c,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	cgfrl	%r3,bar+100
+	cgfrl	%r4,bar+100
+
+#CHECK: cgfrl	%r7, frob@PLT           # encoding: [0xc6,0x7c,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: cgfrl	%r8, frob@PLT           # encoding: [0xc6,0x8c,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	cgfrl	%r7,frob@PLT
+	cgfrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-cgh-01.s b/test/MC/SystemZ/insn-cgh-01.s
new file mode 100644
index 0000000..31c86ab
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgh-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cgh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x34]
+#CHECK: cgh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x34]
+#CHECK: cgh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x34]
+#CHECK: cgh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x34]
+#CHECK: cgh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x34]
+#CHECK: cgh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x34]
+#CHECK: cgh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x34]
+#CHECK: cgh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x34]
+#CHECK: cgh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x34]
+#CHECK: cgh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x34]
+
+	cgh	%r0, -524288
+	cgh	%r0, -1
+	cgh	%r0, 0
+	cgh	%r0, 1
+	cgh	%r0, 524287
+	cgh	%r0, 0(%r1)
+	cgh	%r0, 0(%r15)
+	cgh	%r0, 524287(%r1,%r15)
+	cgh	%r0, 524287(%r15,%r1)
+	cgh	%r15, 0
diff --git a/test/MC/SystemZ/insn-cgh-02.s b/test/MC/SystemZ/insn-cgh-02.s
new file mode 100644
index 0000000..60e665f2
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cgh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: cgh	%r0, 524288
+
+	cgh	%r0, -524289
+	cgh	%r0, 524288
diff --git a/test/MC/SystemZ/insn-cghi-01.s b/test/MC/SystemZ/insn-cghi-01.s
new file mode 100644
index 0000000..575ad89
--- /dev/null
+++ b/test/MC/SystemZ/insn-cghi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cghi	%r0, -32768             # encoding: [0xa7,0x0f,0x80,0x00]
+#CHECK: cghi	%r0, -1                 # encoding: [0xa7,0x0f,0xff,0xff]
+#CHECK: cghi	%r0, 0                  # encoding: [0xa7,0x0f,0x00,0x00]
+#CHECK: cghi	%r0, 1                  # encoding: [0xa7,0x0f,0x00,0x01]
+#CHECK: cghi	%r0, 32767              # encoding: [0xa7,0x0f,0x7f,0xff]
+#CHECK: cghi	%r15, 0                 # encoding: [0xa7,0xff,0x00,0x00]
+
+	cghi	%r0, -32768
+	cghi	%r0, -1
+	cghi	%r0, 0
+	cghi	%r0, 1
+	cghi	%r0, 32767
+	cghi	%r15, 0
diff --git a/test/MC/SystemZ/insn-cghi-02.s b/test/MC/SystemZ/insn-cghi-02.s
new file mode 100644
index 0000000..bd4a52a
--- /dev/null
+++ b/test/MC/SystemZ/insn-cghi-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cghi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: cghi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: cghi	%r0, foo
+
+	cghi	%r0, -32769
+	cghi	%r0, 32768
+	cghi	%r0, foo
diff --git a/test/MC/SystemZ/insn-cghrl-01.s b/test/MC/SystemZ/insn-cghrl-01.s
new file mode 100644
index 0000000..c48c5ec
--- /dev/null
+++ b/test/MC/SystemZ/insn-cghrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cghrl	%r0, 2864434397         # encoding: [0xc6,0x04,0x55,0x5d,0xe6,0x6e]
+#CHECK: cghrl	%r15, 2864434397        # encoding: [0xc6,0xf4,0x55,0x5d,0xe6,0x6e]
+
+	cghrl	%r0,0xaabbccdd
+	cghrl	%r15,0xaabbccdd
+
+#CHECK: cghrl	%r0, foo                # encoding: [0xc6,0x04,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: cghrl	%r15, foo               # encoding: [0xc6,0xf4,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	cghrl	%r0,foo
+	cghrl	%r15,foo
+
+#CHECK: cghrl	%r3, bar+100            # encoding: [0xc6,0x34,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: cghrl	%r4, bar+100            # encoding: [0xc6,0x44,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	cghrl	%r3,bar+100
+	cghrl	%r4,bar+100
+
+#CHECK: cghrl	%r7, frob@PLT           # encoding: [0xc6,0x74,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: cghrl	%r8, frob@PLT           # encoding: [0xc6,0x84,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	cghrl	%r7,frob@PLT
+	cghrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-cghsi-01.s b/test/MC/SystemZ/insn-cghsi-01.s
new file mode 100644
index 0000000..7d67e20
--- /dev/null
+++ b/test/MC/SystemZ/insn-cghsi-01.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cghsi	0, 0                    # encoding: [0xe5,0x58,0x00,0x00,0x00,0x00]
+#CHECK: cghsi	4095, 0                 # encoding: [0xe5,0x58,0x0f,0xff,0x00,0x00]
+#CHECK: cghsi	0, -32768               # encoding: [0xe5,0x58,0x00,0x00,0x80,0x00]
+#CHECK: cghsi	0, -1                   # encoding: [0xe5,0x58,0x00,0x00,0xff,0xff]
+#CHECK: cghsi	0, 0                    # encoding: [0xe5,0x58,0x00,0x00,0x00,0x00]
+#CHECK: cghsi	0, 1                    # encoding: [0xe5,0x58,0x00,0x00,0x00,0x01]
+#CHECK: cghsi	0, 32767                # encoding: [0xe5,0x58,0x00,0x00,0x7f,0xff]
+#CHECK: cghsi	0(%r1), 42              # encoding: [0xe5,0x58,0x10,0x00,0x00,0x2a]
+#CHECK: cghsi	0(%r15), 42             # encoding: [0xe5,0x58,0xf0,0x00,0x00,0x2a]
+#CHECK: cghsi	4095(%r1), 42           # encoding: [0xe5,0x58,0x1f,0xff,0x00,0x2a]
+#CHECK: cghsi	4095(%r15), 42          # encoding: [0xe5,0x58,0xff,0xff,0x00,0x2a]
+
+	cghsi	0, 0
+	cghsi	4095, 0
+	cghsi	0, -32768
+	cghsi	0, -1
+	cghsi	0, 0
+	cghsi	0, 1
+	cghsi	0, 32767
+	cghsi	0(%r1), 42
+	cghsi	0(%r15), 42
+	cghsi	4095(%r1), 42
+	cghsi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-cghsi-02.s b/test/MC/SystemZ/insn-cghsi-02.s
new file mode 100644
index 0000000..773ee5c
--- /dev/null
+++ b/test/MC/SystemZ/insn-cghsi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cghsi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: cghsi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: cghsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: cghsi	0, -32769
+#CHECK: error: invalid operand
+#CHECK: cghsi	0, 32768
+
+	cghsi	-1, 0
+	cghsi	4096, 0
+	cghsi	0(%r1,%r2), 0
+	cghsi	0, -32769
+	cghsi	0, 32768
diff --git a/test/MC/SystemZ/insn-cgr-01.s b/test/MC/SystemZ/insn-cgr-01.s
new file mode 100644
index 0000000..334a0f6
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cgr	%r0, %r0                # encoding: [0xb9,0x20,0x00,0x00]
+#CHECK: cgr	%r0, %r15               # encoding: [0xb9,0x20,0x00,0x0f]
+#CHECK: cgr	%r15, %r0               # encoding: [0xb9,0x20,0x00,0xf0]
+#CHECK: cgr	%r7, %r8                # encoding: [0xb9,0x20,0x00,0x78]
+
+	cgr	%r0,%r0
+	cgr	%r0,%r15
+	cgr	%r15,%r0
+	cgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-cgrl-01.s b/test/MC/SystemZ/insn-cgrl-01.s
new file mode 100644
index 0000000..af878cb
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cgrl	%r0, 2864434397         # encoding: [0xc6,0x08,0x55,0x5d,0xe6,0x6e]
+#CHECK: cgrl	%r15, 2864434397        # encoding: [0xc6,0xf8,0x55,0x5d,0xe6,0x6e]
+
+	cgrl	%r0,0xaabbccdd
+	cgrl	%r15,0xaabbccdd
+
+#CHECK: cgrl	%r0, foo                # encoding: [0xc6,0x08,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: cgrl	%r15, foo               # encoding: [0xc6,0xf8,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	cgrl	%r0,foo
+	cgrl	%r15,foo
+
+#CHECK: cgrl	%r3, bar+100            # encoding: [0xc6,0x38,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: cgrl	%r4, bar+100            # encoding: [0xc6,0x48,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	cgrl	%r3,bar+100
+	cgrl	%r4,bar+100
+
+#CHECK: cgrl	%r7, frob@PLT           # encoding: [0xc6,0x78,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: cgrl	%r8, frob@PLT           # encoding: [0xc6,0x88,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	cgrl	%r7,frob@PLT
+	cgrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-cgxbr-01.s b/test/MC/SystemZ/insn-cgxbr-01.s
new file mode 100644
index 0000000..0250b52
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgxbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cgxbr	%r0, 0, %f0             # encoding: [0xb3,0xaa,0x00,0x00]
+#CHECK: cgxbr	%r0, 0, %f13            # encoding: [0xb3,0xaa,0x00,0x0d]
+#CHECK: cgxbr	%r0, 15, %f0            # encoding: [0xb3,0xaa,0xf0,0x00]
+#CHECK: cgxbr	%r4, 5, %f8             # encoding: [0xb3,0xaa,0x50,0x48]
+#CHECK: cgxbr	%r15, 0, %f0            # encoding: [0xb3,0xaa,0x00,0xf0]
+
+	cgxbr	%r0, 0, %f0
+	cgxbr	%r0, 0, %f13
+	cgxbr	%r0, 15, %f0
+	cgxbr	%r4, 5, %f8
+	cgxbr	%r15, 0, %f0
diff --git a/test/MC/SystemZ/insn-cgxbr-02.s b/test/MC/SystemZ/insn-cgxbr-02.s
new file mode 100644
index 0000000..9caab9f
--- /dev/null
+++ b/test/MC/SystemZ/insn-cgxbr-02.s
@@ -0,0 +1,23 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: cgxbr	%r0, 0, %r0
+#CHECK: error: invalid register
+#CHECK: cgxbr	%f0, 0, %f0
+#CHECK: error: invalid operand
+#CHECK: cgxbr	%r0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: cgxbr	%r0, 16, %f0
+#CHECK: error: invalid register
+#CHECK: cgxbr	%r0, 0, %f2
+#CHECK: error: invalid register
+#CHECK: cgxbr	%r0, 0, %f14
+
+	cgxbr	%r0, 0, %r0
+	cgxbr	%f0, 0, %f0
+	cgxbr	%r0, -1, %f0
+	cgxbr	%r0, 16, %f0
+	cgxbr	%r0, 0, %f2
+	cgxbr	%r0, 0, %f14
+
diff --git a/test/MC/SystemZ/insn-ch-01.s b/test/MC/SystemZ/insn-ch-01.s
new file mode 100644
index 0000000..dfb0b7f
--- /dev/null
+++ b/test/MC/SystemZ/insn-ch-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ch	%r0, 0                  # encoding: [0x49,0x00,0x00,0x00]
+#CHECK: ch	%r0, 4095               # encoding: [0x49,0x00,0x0f,0xff]
+#CHECK: ch	%r0, 0(%r1)             # encoding: [0x49,0x00,0x10,0x00]
+#CHECK: ch	%r0, 0(%r15)            # encoding: [0x49,0x00,0xf0,0x00]
+#CHECK: ch	%r0, 4095(%r1,%r15)     # encoding: [0x49,0x01,0xff,0xff]
+#CHECK: ch	%r0, 4095(%r15,%r1)     # encoding: [0x49,0x0f,0x1f,0xff]
+#CHECK: ch	%r15, 0                 # encoding: [0x49,0xf0,0x00,0x00]
+
+	ch	%r0, 0
+	ch	%r0, 4095
+	ch	%r0, 0(%r1)
+	ch	%r0, 0(%r15)
+	ch	%r0, 4095(%r1,%r15)
+	ch	%r0, 4095(%r15,%r1)
+	ch	%r15, 0
diff --git a/test/MC/SystemZ/insn-ch-02.s b/test/MC/SystemZ/insn-ch-02.s
new file mode 100644
index 0000000..2034c2b
--- /dev/null
+++ b/test/MC/SystemZ/insn-ch-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ch	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: ch	%r0, 4096
+
+	ch	%r0, -1
+	ch	%r0, 4096
diff --git a/test/MC/SystemZ/insn-chhsi-01.s b/test/MC/SystemZ/insn-chhsi-01.s
new file mode 100644
index 0000000..0fd50bc
--- /dev/null
+++ b/test/MC/SystemZ/insn-chhsi-01.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: chhsi	0, 0                    # encoding: [0xe5,0x54,0x00,0x00,0x00,0x00]
+#CHECK: chhsi	4095, 0                 # encoding: [0xe5,0x54,0x0f,0xff,0x00,0x00]
+#CHECK: chhsi	0, -32768               # encoding: [0xe5,0x54,0x00,0x00,0x80,0x00]
+#CHECK: chhsi	0, -1                   # encoding: [0xe5,0x54,0x00,0x00,0xff,0xff]
+#CHECK: chhsi	0, 0                    # encoding: [0xe5,0x54,0x00,0x00,0x00,0x00]
+#CHECK: chhsi	0, 1                    # encoding: [0xe5,0x54,0x00,0x00,0x00,0x01]
+#CHECK: chhsi	0, 32767                # encoding: [0xe5,0x54,0x00,0x00,0x7f,0xff]
+#CHECK: chhsi	0(%r1), 42              # encoding: [0xe5,0x54,0x10,0x00,0x00,0x2a]
+#CHECK: chhsi	0(%r15), 42             # encoding: [0xe5,0x54,0xf0,0x00,0x00,0x2a]
+#CHECK: chhsi	4095(%r1), 42           # encoding: [0xe5,0x54,0x1f,0xff,0x00,0x2a]
+#CHECK: chhsi	4095(%r15), 42          # encoding: [0xe5,0x54,0xff,0xff,0x00,0x2a]
+
+	chhsi	0, 0
+	chhsi	4095, 0
+	chhsi	0, -32768
+	chhsi	0, -1
+	chhsi	0, 0
+	chhsi	0, 1
+	chhsi	0, 32767
+	chhsi	0(%r1), 42
+	chhsi	0(%r15), 42
+	chhsi	4095(%r1), 42
+	chhsi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-chhsi-02.s b/test/MC/SystemZ/insn-chhsi-02.s
new file mode 100644
index 0000000..24e8c0c
--- /dev/null
+++ b/test/MC/SystemZ/insn-chhsi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: chhsi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: chhsi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: chhsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: chhsi	0, -32769
+#CHECK: error: invalid operand
+#CHECK: chhsi	0, 32768
+
+	chhsi	-1, 0
+	chhsi	4096, 0
+	chhsi	0(%r1,%r2), 0
+	chhsi	0, -32769
+	chhsi	0, 32768
diff --git a/test/MC/SystemZ/insn-chi-01.s b/test/MC/SystemZ/insn-chi-01.s
new file mode 100644
index 0000000..fb44cfc
--- /dev/null
+++ b/test/MC/SystemZ/insn-chi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: chi	%r0, -32768             # encoding: [0xa7,0x0e,0x80,0x00]
+#CHECK: chi	%r0, -1                 # encoding: [0xa7,0x0e,0xff,0xff]
+#CHECK: chi	%r0, 0                  # encoding: [0xa7,0x0e,0x00,0x00]
+#CHECK: chi	%r0, 1                  # encoding: [0xa7,0x0e,0x00,0x01]
+#CHECK: chi	%r0, 32767              # encoding: [0xa7,0x0e,0x7f,0xff]
+#CHECK: chi	%r15, 0                 # encoding: [0xa7,0xfe,0x00,0x00]
+
+	chi	%r0, -32768
+	chi	%r0, -1
+	chi	%r0, 0
+	chi	%r0, 1
+	chi	%r0, 32767
+	chi	%r15, 0
diff --git a/test/MC/SystemZ/insn-chi-02.s b/test/MC/SystemZ/insn-chi-02.s
new file mode 100644
index 0000000..bb9ffdc
--- /dev/null
+++ b/test/MC/SystemZ/insn-chi-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: chi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: chi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: chi	%r0, foo
+
+	chi	%r0, -32769
+	chi	%r0, 32768
+	chi	%r0, foo
diff --git a/test/MC/SystemZ/insn-chrl-01.s b/test/MC/SystemZ/insn-chrl-01.s
new file mode 100644
index 0000000..c133a32
--- /dev/null
+++ b/test/MC/SystemZ/insn-chrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: chrl	%r0, 2864434397         # encoding: [0xc6,0x05,0x55,0x5d,0xe6,0x6e]
+#CHECK: chrl	%r15, 2864434397        # encoding: [0xc6,0xf5,0x55,0x5d,0xe6,0x6e]
+
+	chrl	%r0,0xaabbccdd
+	chrl	%r15,0xaabbccdd
+
+#CHECK: chrl	%r0, foo                # encoding: [0xc6,0x05,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: chrl	%r15, foo               # encoding: [0xc6,0xf5,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	chrl	%r0,foo
+	chrl	%r15,foo
+
+#CHECK: chrl	%r3, bar+100            # encoding: [0xc6,0x35,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: chrl	%r4, bar+100            # encoding: [0xc6,0x45,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	chrl	%r3,bar+100
+	chrl	%r4,bar+100
+
+#CHECK: chrl	%r7, frob@PLT           # encoding: [0xc6,0x75,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: chrl	%r8, frob@PLT           # encoding: [0xc6,0x85,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	chrl	%r7,frob@PLT
+	chrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-chsi-01.s b/test/MC/SystemZ/insn-chsi-01.s
new file mode 100644
index 0000000..6d92202
--- /dev/null
+++ b/test/MC/SystemZ/insn-chsi-01.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: chsi	0, 0                    # encoding: [0xe5,0x5c,0x00,0x00,0x00,0x00]
+#CHECK: chsi	4095, 0                 # encoding: [0xe5,0x5c,0x0f,0xff,0x00,0x00]
+#CHECK: chsi	0, -32768               # encoding: [0xe5,0x5c,0x00,0x00,0x80,0x00]
+#CHECK: chsi	0, -1                   # encoding: [0xe5,0x5c,0x00,0x00,0xff,0xff]
+#CHECK: chsi	0, 0                    # encoding: [0xe5,0x5c,0x00,0x00,0x00,0x00]
+#CHECK: chsi	0, 1                    # encoding: [0xe5,0x5c,0x00,0x00,0x00,0x01]
+#CHECK: chsi	0, 32767                # encoding: [0xe5,0x5c,0x00,0x00,0x7f,0xff]
+#CHECK: chsi	0(%r1), 42              # encoding: [0xe5,0x5c,0x10,0x00,0x00,0x2a]
+#CHECK: chsi	0(%r15), 42             # encoding: [0xe5,0x5c,0xf0,0x00,0x00,0x2a]
+#CHECK: chsi	4095(%r1), 42           # encoding: [0xe5,0x5c,0x1f,0xff,0x00,0x2a]
+#CHECK: chsi	4095(%r15), 42          # encoding: [0xe5,0x5c,0xff,0xff,0x00,0x2a]
+
+	chsi	0, 0
+	chsi	4095, 0
+	chsi	0, -32768
+	chsi	0, -1
+	chsi	0, 0
+	chsi	0, 1
+	chsi	0, 32767
+	chsi	0(%r1), 42
+	chsi	0(%r15), 42
+	chsi	4095(%r1), 42
+	chsi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-chsi-02.s b/test/MC/SystemZ/insn-chsi-02.s
new file mode 100644
index 0000000..16ace53
--- /dev/null
+++ b/test/MC/SystemZ/insn-chsi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: chsi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: chsi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: chsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: chsi	0, -32769
+#CHECK: error: invalid operand
+#CHECK: chsi	0, 32768
+
+	chsi	-1, 0
+	chsi	4096, 0
+	chsi	0(%r1,%r2), 0
+	chsi	0, -32769
+	chsi	0, 32768
diff --git a/test/MC/SystemZ/insn-chy-01.s b/test/MC/SystemZ/insn-chy-01.s
new file mode 100644
index 0000000..9ecc055
--- /dev/null
+++ b/test/MC/SystemZ/insn-chy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: chy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x79]
+#CHECK: chy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x79]
+#CHECK: chy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x79]
+#CHECK: chy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x79]
+#CHECK: chy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x79]
+#CHECK: chy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x79]
+#CHECK: chy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x79]
+#CHECK: chy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x79]
+#CHECK: chy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x79]
+#CHECK: chy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x79]
+
+	chy	%r0, -524288
+	chy	%r0, -1
+	chy	%r0, 0
+	chy	%r0, 1
+	chy	%r0, 524287
+	chy	%r0, 0(%r1)
+	chy	%r0, 0(%r15)
+	chy	%r0, 524287(%r1,%r15)
+	chy	%r0, 524287(%r15,%r1)
+	chy	%r15, 0
diff --git a/test/MC/SystemZ/insn-chy-02.s b/test/MC/SystemZ/insn-chy-02.s
new file mode 100644
index 0000000..8ab849f
--- /dev/null
+++ b/test/MC/SystemZ/insn-chy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: chy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: chy	%r0, 524288
+
+	chy	%r0, -524289
+	chy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-cl-01.s b/test/MC/SystemZ/insn-cl-01.s
new file mode 100644
index 0000000..7face8f
--- /dev/null
+++ b/test/MC/SystemZ/insn-cl-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cl	%r0, 0                  # encoding: [0x55,0x00,0x00,0x00]
+#CHECK: cl	%r0, 4095               # encoding: [0x55,0x00,0x0f,0xff]
+#CHECK: cl	%r0, 0(%r1)             # encoding: [0x55,0x00,0x10,0x00]
+#CHECK: cl	%r0, 0(%r15)            # encoding: [0x55,0x00,0xf0,0x00]
+#CHECK: cl	%r0, 4095(%r1,%r15)     # encoding: [0x55,0x01,0xff,0xff]
+#CHECK: cl	%r0, 4095(%r15,%r1)     # encoding: [0x55,0x0f,0x1f,0xff]
+#CHECK: cl	%r15, 0                 # encoding: [0x55,0xf0,0x00,0x00]
+
+	cl	%r0, 0
+	cl	%r0, 4095
+	cl	%r0, 0(%r1)
+	cl	%r0, 0(%r15)
+	cl	%r0, 4095(%r1,%r15)
+	cl	%r0, 4095(%r15,%r1)
+	cl	%r15, 0
diff --git a/test/MC/SystemZ/insn-cl-02.s b/test/MC/SystemZ/insn-cl-02.s
new file mode 100644
index 0000000..ec3e1df
--- /dev/null
+++ b/test/MC/SystemZ/insn-cl-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: cl	%r0, 4096
+
+	cl	%r0, -1
+	cl	%r0, 4096
diff --git a/test/MC/SystemZ/insn-clfhsi-01.s b/test/MC/SystemZ/insn-clfhsi-01.s
new file mode 100644
index 0000000..910515d
--- /dev/null
+++ b/test/MC/SystemZ/insn-clfhsi-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clfhsi	0, 0                    # encoding: [0xe5,0x5d,0x00,0x00,0x00,0x00]
+#CHECK: clfhsi	4095, 0                 # encoding: [0xe5,0x5d,0x0f,0xff,0x00,0x00]
+#CHECK: clfhsi	0, 65535                # encoding: [0xe5,0x5d,0x00,0x00,0xff,0xff]
+#CHECK: clfhsi	0(%r1), 42              # encoding: [0xe5,0x5d,0x10,0x00,0x00,0x2a]
+#CHECK: clfhsi	0(%r15), 42             # encoding: [0xe5,0x5d,0xf0,0x00,0x00,0x2a]
+#CHECK: clfhsi	4095(%r1), 42           # encoding: [0xe5,0x5d,0x1f,0xff,0x00,0x2a]
+#CHECK: clfhsi	4095(%r15), 42          # encoding: [0xe5,0x5d,0xff,0xff,0x00,0x2a]
+
+	clfhsi	0, 0
+	clfhsi	4095, 0
+	clfhsi	0, 65535
+	clfhsi	0(%r1), 42
+	clfhsi	0(%r15), 42
+	clfhsi	4095(%r1), 42
+	clfhsi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-clfhsi-02.s b/test/MC/SystemZ/insn-clfhsi-02.s
new file mode 100644
index 0000000..4a6f404
--- /dev/null
+++ b/test/MC/SystemZ/insn-clfhsi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: clfhsi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: clfhsi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: clfhsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: clfhsi	0, -1
+#CHECK: error: invalid operand
+#CHECK: clfhsi	0, 65536
+
+	clfhsi	-1, 0
+	clfhsi	4096, 0
+	clfhsi	0(%r1,%r2), 0
+	clfhsi	0, -1
+	clfhsi	0, 65536
diff --git a/test/MC/SystemZ/insn-clfi-01.s b/test/MC/SystemZ/insn-clfi-01.s
new file mode 100644
index 0000000..4156c7f
--- /dev/null
+++ b/test/MC/SystemZ/insn-clfi-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clfi	%r0, 0                  # encoding: [0xc2,0x0f,0x00,0x00,0x00,0x00]
+#CHECK: clfi	%r0, 4294967295         # encoding: [0xc2,0x0f,0xff,0xff,0xff,0xff]
+#CHECK: clfi	%r15, 0                 # encoding: [0xc2,0xff,0x00,0x00,0x00,0x00]
+
+	clfi	%r0, 0
+	clfi	%r0, (1 << 32) - 1
+	clfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-clfi-02.s b/test/MC/SystemZ/insn-clfi-02.s
new file mode 100644
index 0000000..9d3f806
--- /dev/null
+++ b/test/MC/SystemZ/insn-clfi-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: clfi	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: clfi	%r0, (1 << 32)
+
+	clfi	%r0, -1
+	clfi	%r0, (1 << 32)
diff --git a/test/MC/SystemZ/insn-clg-01.s b/test/MC/SystemZ/insn-clg-01.s
new file mode 100644
index 0000000..596bae1
--- /dev/null
+++ b/test/MC/SystemZ/insn-clg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x21]
+#CHECK: clg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x21]
+#CHECK: clg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x21]
+#CHECK: clg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x21]
+#CHECK: clg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x21]
+#CHECK: clg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x21]
+#CHECK: clg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x21]
+#CHECK: clg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x21]
+#CHECK: clg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x21]
+#CHECK: clg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x21]
+
+	clg	%r0, -524288
+	clg	%r0, -1
+	clg	%r0, 0
+	clg	%r0, 1
+	clg	%r0, 524287
+	clg	%r0, 0(%r1)
+	clg	%r0, 0(%r15)
+	clg	%r0, 524287(%r1,%r15)
+	clg	%r0, 524287(%r15,%r1)
+	clg	%r15, 0
diff --git a/test/MC/SystemZ/insn-clg-02.s b/test/MC/SystemZ/insn-clg-02.s
new file mode 100644
index 0000000..a17aab5
--- /dev/null
+++ b/test/MC/SystemZ/insn-clg-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: clg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: clg	%r0, 524288
+
+	clg	%r0, -524289
+	clg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-clgf-01.s b/test/MC/SystemZ/insn-clgf-01.s
new file mode 100644
index 0000000..003ba83
--- /dev/null
+++ b/test/MC/SystemZ/insn-clgf-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x31]
+#CHECK: clgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x31]
+#CHECK: clgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x31]
+#CHECK: clgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x31]
+#CHECK: clgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x31]
+#CHECK: clgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x31]
+#CHECK: clgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x31]
+#CHECK: clgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x31]
+#CHECK: clgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x31]
+#CHECK: clgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x31]
+
+	clgf	%r0, -524288
+	clgf	%r0, -1
+	clgf	%r0, 0
+	clgf	%r0, 1
+	clgf	%r0, 524287
+	clgf	%r0, 0(%r1)
+	clgf	%r0, 0(%r15)
+	clgf	%r0, 524287(%r1,%r15)
+	clgf	%r0, 524287(%r15,%r1)
+	clgf	%r15, 0
diff --git a/test/MC/SystemZ/insn-clgf-02.s b/test/MC/SystemZ/insn-clgf-02.s
new file mode 100644
index 0000000..9de49ae
--- /dev/null
+++ b/test/MC/SystemZ/insn-clgf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: clgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: clgf	%r0, 524288
+
+	clgf	%r0, -524289
+	clgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-clgfi-01.s b/test/MC/SystemZ/insn-clgfi-01.s
new file mode 100644
index 0000000..dbf4a0e
--- /dev/null
+++ b/test/MC/SystemZ/insn-clgfi-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clgfi	%r0, 0                  # encoding: [0xc2,0x0e,0x00,0x00,0x00,0x00]
+#CHECK: clgfi	%r0, 4294967295         # encoding: [0xc2,0x0e,0xff,0xff,0xff,0xff]
+#CHECK: clgfi	%r15, 0                 # encoding: [0xc2,0xfe,0x00,0x00,0x00,0x00]
+
+	clgfi	%r0, 0
+	clgfi	%r0, (1 << 32) - 1
+	clgfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-clgfi-02.s b/test/MC/SystemZ/insn-clgfi-02.s
new file mode 100644
index 0000000..3f2db33
--- /dev/null
+++ b/test/MC/SystemZ/insn-clgfi-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: clgfi	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: clgfi	%r0, (1 << 32)
+
+	clgfi	%r0, -1
+	clgfi	%r0, (1 << 32)
diff --git a/test/MC/SystemZ/insn-clgfr-01.s b/test/MC/SystemZ/insn-clgfr-01.s
new file mode 100644
index 0000000..37f1e24
--- /dev/null
+++ b/test/MC/SystemZ/insn-clgfr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clgfr	%r0, %r0                # encoding: [0xb9,0x31,0x00,0x00]
+#CHECK: clgfr	%r0, %r15               # encoding: [0xb9,0x31,0x00,0x0f]
+#CHECK: clgfr	%r15, %r0               # encoding: [0xb9,0x31,0x00,0xf0]
+#CHECK: clgfr	%r7, %r8                # encoding: [0xb9,0x31,0x00,0x78]
+
+	clgfr	%r0,%r0
+	clgfr	%r0,%r15
+	clgfr	%r15,%r0
+	clgfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-clgfrl-01.s b/test/MC/SystemZ/insn-clgfrl-01.s
new file mode 100644
index 0000000..6fc6d5e
--- /dev/null
+++ b/test/MC/SystemZ/insn-clgfrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clgfrl	%r0, 2864434397         # encoding: [0xc6,0x0e,0x55,0x5d,0xe6,0x6e]
+#CHECK: clgfrl	%r15, 2864434397        # encoding: [0xc6,0xfe,0x55,0x5d,0xe6,0x6e]
+
+	clgfrl	%r0,0xaabbccdd
+	clgfrl	%r15,0xaabbccdd
+
+#CHECK: clgfrl	%r0, foo                # encoding: [0xc6,0x0e,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: clgfrl	%r15, foo               # encoding: [0xc6,0xfe,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	clgfrl	%r0,foo
+	clgfrl	%r15,foo
+
+#CHECK: clgfrl	%r3, bar+100            # encoding: [0xc6,0x3e,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: clgfrl	%r4, bar+100            # encoding: [0xc6,0x4e,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	clgfrl	%r3,bar+100
+	clgfrl	%r4,bar+100
+
+#CHECK: clgfrl	%r7, frob@PLT           # encoding: [0xc6,0x7e,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: clgfrl	%r8, frob@PLT           # encoding: [0xc6,0x8e,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	clgfrl	%r7,frob@PLT
+	clgfrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-clghrl-01.s b/test/MC/SystemZ/insn-clghrl-01.s
new file mode 100644
index 0000000..41c2580
--- /dev/null
+++ b/test/MC/SystemZ/insn-clghrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clghrl	%r0, 2864434397         # encoding: [0xc6,0x06,0x55,0x5d,0xe6,0x6e]
+#CHECK: clghrl	%r15, 2864434397        # encoding: [0xc6,0xf6,0x55,0x5d,0xe6,0x6e]
+
+	clghrl	%r0,0xaabbccdd
+	clghrl	%r15,0xaabbccdd
+
+#CHECK: clghrl	%r0, foo                # encoding: [0xc6,0x06,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: clghrl	%r15, foo               # encoding: [0xc6,0xf6,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	clghrl	%r0,foo
+	clghrl	%r15,foo
+
+#CHECK: clghrl	%r3, bar+100            # encoding: [0xc6,0x36,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: clghrl	%r4, bar+100            # encoding: [0xc6,0x46,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	clghrl	%r3,bar+100
+	clghrl	%r4,bar+100
+
+#CHECK: clghrl	%r7, frob@PLT           # encoding: [0xc6,0x76,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: clghrl	%r8, frob@PLT           # encoding: [0xc6,0x86,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	clghrl	%r7,frob@PLT
+	clghrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-clghsi-01.s b/test/MC/SystemZ/insn-clghsi-01.s
new file mode 100644
index 0000000..05e0c58
--- /dev/null
+++ b/test/MC/SystemZ/insn-clghsi-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clghsi	0, 0                    # encoding: [0xe5,0x59,0x00,0x00,0x00,0x00]
+#CHECK: clghsi	4095, 0                 # encoding: [0xe5,0x59,0x0f,0xff,0x00,0x00]
+#CHECK: clghsi	0, 65535                # encoding: [0xe5,0x59,0x00,0x00,0xff,0xff]
+#CHECK: clghsi	0(%r1), 42              # encoding: [0xe5,0x59,0x10,0x00,0x00,0x2a]
+#CHECK: clghsi	0(%r15), 42             # encoding: [0xe5,0x59,0xf0,0x00,0x00,0x2a]
+#CHECK: clghsi	4095(%r1), 42           # encoding: [0xe5,0x59,0x1f,0xff,0x00,0x2a]
+#CHECK: clghsi	4095(%r15), 42          # encoding: [0xe5,0x59,0xff,0xff,0x00,0x2a]
+
+	clghsi	0, 0
+	clghsi	4095, 0
+	clghsi	0, 65535
+	clghsi	0(%r1), 42
+	clghsi	0(%r15), 42
+	clghsi	4095(%r1), 42
+	clghsi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-clghsi-02.s b/test/MC/SystemZ/insn-clghsi-02.s
new file mode 100644
index 0000000..f036128
--- /dev/null
+++ b/test/MC/SystemZ/insn-clghsi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: clghsi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: clghsi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: clghsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: clghsi	0, -1
+#CHECK: error: invalid operand
+#CHECK: clghsi	0, 65536
+
+	clghsi	-1, 0
+	clghsi	4096, 0
+	clghsi	0(%r1,%r2), 0
+	clghsi	0, -1
+	clghsi	0, 65536
diff --git a/test/MC/SystemZ/insn-clgr-01.s b/test/MC/SystemZ/insn-clgr-01.s
new file mode 100644
index 0000000..7e9d2ad
--- /dev/null
+++ b/test/MC/SystemZ/insn-clgr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clgr	%r0, %r0                # encoding: [0xb9,0x21,0x00,0x00]
+#CHECK: clgr	%r0, %r15               # encoding: [0xb9,0x21,0x00,0x0f]
+#CHECK: clgr	%r15, %r0               # encoding: [0xb9,0x21,0x00,0xf0]
+#CHECK: clgr	%r7, %r8                # encoding: [0xb9,0x21,0x00,0x78]
+
+	clgr	%r0,%r0
+	clgr	%r0,%r15
+	clgr	%r15,%r0
+	clgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-clgrl-01.s b/test/MC/SystemZ/insn-clgrl-01.s
new file mode 100644
index 0000000..439bcd9
--- /dev/null
+++ b/test/MC/SystemZ/insn-clgrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clgrl	%r0, 2864434397         # encoding: [0xc6,0x0a,0x55,0x5d,0xe6,0x6e]
+#CHECK: clgrl	%r15, 2864434397        # encoding: [0xc6,0xfa,0x55,0x5d,0xe6,0x6e]
+
+	clgrl	%r0,0xaabbccdd
+	clgrl	%r15,0xaabbccdd
+
+#CHECK: clgrl	%r0, foo                # encoding: [0xc6,0x0a,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: clgrl	%r15, foo               # encoding: [0xc6,0xfa,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	clgrl	%r0,foo
+	clgrl	%r15,foo
+
+#CHECK: clgrl	%r3, bar+100            # encoding: [0xc6,0x3a,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: clgrl	%r4, bar+100            # encoding: [0xc6,0x4a,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	clgrl	%r3,bar+100
+	clgrl	%r4,bar+100
+
+#CHECK: clgrl	%r7, frob@PLT           # encoding: [0xc6,0x7a,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: clgrl	%r8, frob@PLT           # encoding: [0xc6,0x8a,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	clgrl	%r7,frob@PLT
+	clgrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-clhhsi-01.s b/test/MC/SystemZ/insn-clhhsi-01.s
new file mode 100644
index 0000000..ae72ffa
--- /dev/null
+++ b/test/MC/SystemZ/insn-clhhsi-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clhhsi	0, 0                    # encoding: [0xe5,0x55,0x00,0x00,0x00,0x00]
+#CHECK: clhhsi	4095, 0                 # encoding: [0xe5,0x55,0x0f,0xff,0x00,0x00]
+#CHECK: clhhsi	0, 65535                # encoding: [0xe5,0x55,0x00,0x00,0xff,0xff]
+#CHECK: clhhsi	0(%r1), 42              # encoding: [0xe5,0x55,0x10,0x00,0x00,0x2a]
+#CHECK: clhhsi	0(%r15), 42             # encoding: [0xe5,0x55,0xf0,0x00,0x00,0x2a]
+#CHECK: clhhsi	4095(%r1), 42           # encoding: [0xe5,0x55,0x1f,0xff,0x00,0x2a]
+#CHECK: clhhsi	4095(%r15), 42          # encoding: [0xe5,0x55,0xff,0xff,0x00,0x2a]
+
+	clhhsi	0, 0
+	clhhsi	4095, 0
+	clhhsi	0, 65535
+	clhhsi	0(%r1), 42
+	clhhsi	0(%r15), 42
+	clhhsi	4095(%r1), 42
+	clhhsi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-clhhsi-02.s b/test/MC/SystemZ/insn-clhhsi-02.s
new file mode 100644
index 0000000..bbdf7cd
--- /dev/null
+++ b/test/MC/SystemZ/insn-clhhsi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: clhhsi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: clhhsi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: clhhsi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: clhhsi	0, -1
+#CHECK: error: invalid operand
+#CHECK: clhhsi	0, 65536
+
+	clhhsi	-1, 0
+	clhhsi	4096, 0
+	clhhsi	0(%r1,%r2), 0
+	clhhsi	0, -1
+	clhhsi	0, 65536
diff --git a/test/MC/SystemZ/insn-clhrl-01.s b/test/MC/SystemZ/insn-clhrl-01.s
new file mode 100644
index 0000000..b424de8
--- /dev/null
+++ b/test/MC/SystemZ/insn-clhrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clhrl	%r0, 2864434397         # encoding: [0xc6,0x07,0x55,0x5d,0xe6,0x6e]
+#CHECK: clhrl	%r15, 2864434397        # encoding: [0xc6,0xf7,0x55,0x5d,0xe6,0x6e]
+
+	clhrl	%r0,0xaabbccdd
+	clhrl	%r15,0xaabbccdd
+
+#CHECK: clhrl	%r0, foo                # encoding: [0xc6,0x07,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: clhrl	%r15, foo               # encoding: [0xc6,0xf7,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	clhrl	%r0,foo
+	clhrl	%r15,foo
+
+#CHECK: clhrl	%r3, bar+100            # encoding: [0xc6,0x37,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: clhrl	%r4, bar+100            # encoding: [0xc6,0x47,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	clhrl	%r3,bar+100
+	clhrl	%r4,bar+100
+
+#CHECK: clhrl	%r7, frob@PLT           # encoding: [0xc6,0x77,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: clhrl	%r8, frob@PLT           # encoding: [0xc6,0x87,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	clhrl	%r7,frob@PLT
+	clhrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-cli-01.s b/test/MC/SystemZ/insn-cli-01.s
new file mode 100644
index 0000000..23bccfa
--- /dev/null
+++ b/test/MC/SystemZ/insn-cli-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cli	0, 0                    # encoding: [0x95,0x00,0x00,0x00]
+#CHECK: cli	4095, 0                 # encoding: [0x95,0x00,0x0f,0xff]
+#CHECK: cli	0, 255                  # encoding: [0x95,0xff,0x00,0x00]
+#CHECK: cli	0(%r1), 42              # encoding: [0x95,0x2a,0x10,0x00]
+#CHECK: cli	0(%r15), 42             # encoding: [0x95,0x2a,0xf0,0x00]
+#CHECK: cli	4095(%r1), 42           # encoding: [0x95,0x2a,0x1f,0xff]
+#CHECK: cli	4095(%r15), 42          # encoding: [0x95,0x2a,0xff,0xff]
+
+	cli	0, 0
+	cli	4095, 0
+	cli	0, 255
+	cli	0(%r1), 42
+	cli	0(%r15), 42
+	cli	4095(%r1), 42
+	cli	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-cli-02.s b/test/MC/SystemZ/insn-cli-02.s
new file mode 100644
index 0000000..7fe6fda
--- /dev/null
+++ b/test/MC/SystemZ/insn-cli-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cli	-1, 0
+#CHECK: error: invalid operand
+#CHECK: cli	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: cli	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: cli	0, -1
+#CHECK: error: invalid operand
+#CHECK: cli	0, 256
+
+	cli	-1, 0
+	cli	4096, 0
+	cli	0(%r1,%r2), 0
+	cli	0, -1
+	cli	0, 256
diff --git a/test/MC/SystemZ/insn-cliy-01.s b/test/MC/SystemZ/insn-cliy-01.s
new file mode 100644
index 0000000..1a26f60
--- /dev/null
+++ b/test/MC/SystemZ/insn-cliy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cliy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x55]
+#CHECK: cliy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x55]
+#CHECK: cliy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x55]
+#CHECK: cliy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x55]
+#CHECK: cliy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x55]
+#CHECK: cliy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x55]
+#CHECK: cliy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x55]
+#CHECK: cliy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x55]
+#CHECK: cliy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x55]
+#CHECK: cliy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x55]
+
+	cliy	-524288, 0
+	cliy	-1, 0
+	cliy	0, 0
+	cliy	1, 0
+	cliy	524287, 0
+	cliy	0, 255
+	cliy	0(%r1), 42
+	cliy	0(%r15), 42
+	cliy	524287(%r1), 42
+	cliy	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-cliy-02.s b/test/MC/SystemZ/insn-cliy-02.s
new file mode 100644
index 0000000..3e80563
--- /dev/null
+++ b/test/MC/SystemZ/insn-cliy-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cliy	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: cliy	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: cliy	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: cliy	0, -1
+#CHECK: error: invalid operand
+#CHECK: cliy	0, 256
+
+	cliy	-524289, 0
+	cliy	524288, 0
+	cliy	0(%r1,%r2), 0
+	cliy	0, -1
+	cliy	0, 256
diff --git a/test/MC/SystemZ/insn-clr-01.s b/test/MC/SystemZ/insn-clr-01.s
new file mode 100644
index 0000000..d187d4e
--- /dev/null
+++ b/test/MC/SystemZ/insn-clr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clr	%r0, %r0                # encoding: [0x15,0x00]
+#CHECK: clr	%r0, %r15               # encoding: [0x15,0x0f]
+#CHECK: clr	%r15, %r0               # encoding: [0x15,0xf0]
+#CHECK: clr	%r7, %r8                # encoding: [0x15,0x78]
+
+	clr	%r0,%r0
+	clr	%r0,%r15
+	clr	%r15,%r0
+	clr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-clrl-01.s b/test/MC/SystemZ/insn-clrl-01.s
new file mode 100644
index 0000000..4c6e649
--- /dev/null
+++ b/test/MC/SystemZ/insn-clrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: clrl	%r0, 2864434397         # encoding: [0xc6,0x0f,0x55,0x5d,0xe6,0x6e]
+#CHECK: clrl	%r15, 2864434397        # encoding: [0xc6,0xff,0x55,0x5d,0xe6,0x6e]
+
+	clrl	%r0,0xaabbccdd
+	clrl	%r15,0xaabbccdd
+
+#CHECK: clrl	%r0, foo                # encoding: [0xc6,0x0f,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: clrl	%r15, foo               # encoding: [0xc6,0xff,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	clrl	%r0,foo
+	clrl	%r15,foo
+
+#CHECK: clrl	%r3, bar+100            # encoding: [0xc6,0x3f,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: clrl	%r4, bar+100            # encoding: [0xc6,0x4f,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	clrl	%r3,bar+100
+	clrl	%r4,bar+100
+
+#CHECK: clrl	%r7, frob@PLT           # encoding: [0xc6,0x7f,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: clrl	%r8, frob@PLT           # encoding: [0xc6,0x8f,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	clrl	%r7,frob@PLT
+	clrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-cly-01.s b/test/MC/SystemZ/insn-cly-01.s
new file mode 100644
index 0000000..8fb4af6
--- /dev/null
+++ b/test/MC/SystemZ/insn-cly-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cly	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x55]
+#CHECK: cly	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x55]
+#CHECK: cly	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x55]
+#CHECK: cly	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x55]
+#CHECK: cly	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x55]
+#CHECK: cly	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x55]
+#CHECK: cly	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x55]
+#CHECK: cly	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x55]
+#CHECK: cly	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x55]
+#CHECK: cly	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x55]
+
+	cly	%r0, -524288
+	cly	%r0, -1
+	cly	%r0, 0
+	cly	%r0, 1
+	cly	%r0, 524287
+	cly	%r0, 0(%r1)
+	cly	%r0, 0(%r15)
+	cly	%r0, 524287(%r1,%r15)
+	cly	%r0, 524287(%r15,%r1)
+	cly	%r15, 0
diff --git a/test/MC/SystemZ/insn-cly-02.s b/test/MC/SystemZ/insn-cly-02.s
new file mode 100644
index 0000000..23f37a9
--- /dev/null
+++ b/test/MC/SystemZ/insn-cly-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cly	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: cly	%r0, 524288
+
+	cly	%r0, -524289
+	cly	%r0, 524288
diff --git a/test/MC/SystemZ/insn-cpsdr-01.s b/test/MC/SystemZ/insn-cpsdr-01.s
new file mode 100644
index 0000000..23d773d
--- /dev/null
+++ b/test/MC/SystemZ/insn-cpsdr-01.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cpsdr	%f0, %f0, %f0           # encoding: [0xb3,0x72,0x00,0x00]
+#CHECK: cpsdr	%f0, %f0, %f15          # encoding: [0xb3,0x72,0x00,0x0f]
+#CHECK: cpsdr	%f0, %f15, %f0          # encoding: [0xb3,0x72,0xf0,0x00]
+#CHECK: cpsdr	%f15, %f0, %f0          # encoding: [0xb3,0x72,0x00,0xf0]
+#CHECK: cpsdr	%f1, %f2, %f3           # encoding: [0xb3,0x72,0x20,0x13]
+#CHECK: cpsdr	%f15, %f15, %f15        # encoding: [0xb3,0x72,0xf0,0xff]
+
+	cpsdr	%f0, %f0, %f0
+	cpsdr	%f0, %f0, %f15
+	cpsdr	%f0, %f15, %f0
+	cpsdr	%f15, %f0, %f0
+	cpsdr	%f1, %f2, %f3
+	cpsdr	%f15, %f15, %f15
+
diff --git a/test/MC/SystemZ/insn-cr-01.s b/test/MC/SystemZ/insn-cr-01.s
new file mode 100644
index 0000000..d77e085
--- /dev/null
+++ b/test/MC/SystemZ/insn-cr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cr	%r0, %r0                # encoding: [0x19,0x00]
+#CHECK: cr	%r0, %r15               # encoding: [0x19,0x0f]
+#CHECK: cr	%r15, %r0               # encoding: [0x19,0xf0]
+#CHECK: cr	%r7, %r8                # encoding: [0x19,0x78]
+
+	cr	%r0,%r0
+	cr	%r0,%r15
+	cr	%r15,%r0
+	cr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-crl-01.s b/test/MC/SystemZ/insn-crl-01.s
new file mode 100644
index 0000000..2451b4c
--- /dev/null
+++ b/test/MC/SystemZ/insn-crl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: crl	%r0, 2864434397         # encoding: [0xc6,0x0d,0x55,0x5d,0xe6,0x6e]
+#CHECK: crl	%r15, 2864434397        # encoding: [0xc6,0xfd,0x55,0x5d,0xe6,0x6e]
+
+	crl	%r0,0xaabbccdd
+	crl	%r15,0xaabbccdd
+
+#CHECK: crl	%r0, foo                # encoding: [0xc6,0x0d,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: crl	%r15, foo               # encoding: [0xc6,0xfd,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	crl	%r0,foo
+	crl	%r15,foo
+
+#CHECK: crl	%r3, bar+100            # encoding: [0xc6,0x3d,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: crl	%r4, bar+100            # encoding: [0xc6,0x4d,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	crl	%r3,bar+100
+	crl	%r4,bar+100
+
+#CHECK: crl	%r7, frob@PLT           # encoding: [0xc6,0x7d,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: crl	%r8, frob@PLT           # encoding: [0xc6,0x8d,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	crl	%r7,frob@PLT
+	crl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-cs-01.s b/test/MC/SystemZ/insn-cs-01.s
new file mode 100644
index 0000000..3fc6c34
--- /dev/null
+++ b/test/MC/SystemZ/insn-cs-01.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cs	%r0, %r0, 0             # encoding: [0xba,0x00,0x00,0x00]
+#CHECK: cs	%r0, %r0, 4095          # encoding: [0xba,0x00,0x0f,0xff]
+#CHECK: cs	%r0, %r0, 0(%r1)        # encoding: [0xba,0x00,0x10,0x00]
+#CHECK: cs	%r0, %r0, 0(%r15)       # encoding: [0xba,0x00,0xf0,0x00]
+#CHECK: cs	%r0, %r0, 4095(%r1)     # encoding: [0xba,0x00,0x1f,0xff]
+#CHECK: cs	%r0, %r0, 4095(%r15)    # encoding: [0xba,0x00,0xff,0xff]
+#CHECK: cs	%r0, %r15, 0            # encoding: [0xba,0x0f,0x00,0x00]
+#CHECK: cs	%r15, %r0, 0            # encoding: [0xba,0xf0,0x00,0x00]
+
+	cs	%r0, %r0, 0
+	cs	%r0, %r0, 4095
+	cs	%r0, %r0, 0(%r1)
+	cs	%r0, %r0, 0(%r15)
+	cs	%r0, %r0, 4095(%r1)
+	cs	%r0, %r0, 4095(%r15)
+	cs	%r0, %r15, 0
+	cs	%r15, %r0, 0
diff --git a/test/MC/SystemZ/insn-cs-02.s b/test/MC/SystemZ/insn-cs-02.s
new file mode 100644
index 0000000..c227959
--- /dev/null
+++ b/test/MC/SystemZ/insn-cs-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cs	%r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: cs	%r0, %r0, 4096
+#CHECK: error: invalid use of indexed addressing
+#CHECK: cs	%r0, %r0, 0(%r1,%r2)
+
+	cs	%r0, %r0, -1
+	cs	%r0, %r0, 4096
+	cs	%r0, %r0, 0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-csg-01.s b/test/MC/SystemZ/insn-csg-01.s
new file mode 100644
index 0000000..b0fcfa6
--- /dev/null
+++ b/test/MC/SystemZ/insn-csg-01.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: csg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x30]
+#CHECK: csg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x30]
+#CHECK: csg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x30]
+#CHECK: csg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x30]
+#CHECK: csg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x30]
+#CHECK: csg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x30]
+#CHECK: csg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x30]
+#CHECK: csg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x30]
+#CHECK: csg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x30]
+#CHECK: csg	%r0, %r15, 0            # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x30]
+#CHECK: csg	%r15, %r0, 0            # encoding: [0xeb,0xf0,0x00,0x00,0x00,0x30]
+
+	csg	%r0, %r0, -524288
+	csg	%r0, %r0, -1
+	csg	%r0, %r0, 0
+	csg	%r0, %r0, 1
+	csg	%r0, %r0, 524287
+	csg	%r0, %r0, 0(%r1)
+	csg	%r0, %r0, 0(%r15)
+	csg	%r0, %r0, 524287(%r1)
+	csg	%r0, %r0, 524287(%r15)
+	csg	%r0, %r15, 0
+	csg	%r15, %r0, 0
diff --git a/test/MC/SystemZ/insn-csg-02.s b/test/MC/SystemZ/insn-csg-02.s
new file mode 100644
index 0000000..816b155
--- /dev/null
+++ b/test/MC/SystemZ/insn-csg-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: csg	%r0, %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: csg	%r0, %r0, 524288
+#CHECK: error: invalid use of indexed addressing
+#CHECK: csg	%r0, %r0, 0(%r1,%r2)
+
+	csg	%r0, %r0, -524289
+	csg	%r0, %r0, 524288
+	csg	%r0, %r0, 0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-csy-01.s b/test/MC/SystemZ/insn-csy-01.s
new file mode 100644
index 0000000..d19b2df
--- /dev/null
+++ b/test/MC/SystemZ/insn-csy-01.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: csy	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x14]
+#CHECK: csy	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x14]
+#CHECK: csy	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x14]
+#CHECK: csy	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x14]
+#CHECK: csy	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x14]
+#CHECK: csy	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x14]
+#CHECK: csy	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x14]
+#CHECK: csy	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x14]
+#CHECK: csy	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x14]
+#CHECK: csy	%r0, %r15, 0            # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x14]
+#CHECK: csy	%r15, %r0, 0            # encoding: [0xeb,0xf0,0x00,0x00,0x00,0x14]
+
+	csy	%r0, %r0, -524288
+	csy	%r0, %r0, -1
+	csy	%r0, %r0, 0
+	csy	%r0, %r0, 1
+	csy	%r0, %r0, 524287
+	csy	%r0, %r0, 0(%r1)
+	csy	%r0, %r0, 0(%r15)
+	csy	%r0, %r0, 524287(%r1)
+	csy	%r0, %r0, 524287(%r15)
+	csy	%r0, %r15, 0
+	csy	%r15, %r0, 0
diff --git a/test/MC/SystemZ/insn-csy-02.s b/test/MC/SystemZ/insn-csy-02.s
new file mode 100644
index 0000000..3ff7959
--- /dev/null
+++ b/test/MC/SystemZ/insn-csy-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: csy	%r0, %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: csy	%r0, %r0, 524288
+#CHECK: error: invalid use of indexed addressing
+#CHECK: csy	%r0, %r0, 0(%r1,%r2)
+
+	csy	%r0, %r0, -524289
+	csy	%r0, %r0, 524288
+	csy	%r0, %r0, 0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-cxbr-01.s b/test/MC/SystemZ/insn-cxbr-01.s
new file mode 100644
index 0000000..79527f0
--- /dev/null
+++ b/test/MC/SystemZ/insn-cxbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cxbr	%f0, %f0                # encoding: [0xb3,0x49,0x00,0x00]
+#CHECK: cxbr	%f0, %f13               # encoding: [0xb3,0x49,0x00,0x0d]
+#CHECK: cxbr	%f8, %f8                # encoding: [0xb3,0x49,0x00,0x88]
+#CHECK: cxbr	%f13, %f0               # encoding: [0xb3,0x49,0x00,0xd0]
+
+	cxbr	%f0, %f0
+	cxbr	%f0, %f13
+	cxbr	%f8, %f8
+	cxbr	%f13, %f0
diff --git a/test/MC/SystemZ/insn-cxbr-02.s b/test/MC/SystemZ/insn-cxbr-02.s
new file mode 100644
index 0000000..7aaca91
--- /dev/null
+++ b/test/MC/SystemZ/insn-cxbr-02.s
@@ -0,0 +1,17 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: cxbr	%f0, %f2
+#CHECK: error: invalid register
+#CHECK: cxbr	%f0, %f14
+#CHECK: error: invalid register
+#CHECK: cxbr	%f2, %f0
+#CHECK: error: invalid register
+#CHECK: cxbr	%f14, %f0
+
+	cxbr	%f0, %f2
+	cxbr	%f0, %f14
+	cxbr	%f2, %f0
+	cxbr	%f14, %f0
+
diff --git a/test/MC/SystemZ/insn-cxfbr-01.s b/test/MC/SystemZ/insn-cxfbr-01.s
new file mode 100644
index 0000000..14bafd1
--- /dev/null
+++ b/test/MC/SystemZ/insn-cxfbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cxfbr	%f0, %r0                # encoding: [0xb3,0x96,0x00,0x00]
+#CHECK: cxfbr	%f0, %r15               # encoding: [0xb3,0x96,0x00,0x0f]
+#CHECK: cxfbr	%f13, %r0               # encoding: [0xb3,0x96,0x00,0xd0]
+#CHECK: cxfbr	%f8, %r7                # encoding: [0xb3,0x96,0x00,0x87]
+#CHECK: cxfbr	%f13, %r15              # encoding: [0xb3,0x96,0x00,0xdf]
+
+	cxfbr	%f0, %r0
+	cxfbr	%f0, %r15
+	cxfbr	%f13, %r0
+	cxfbr	%f8, %r7
+	cxfbr	%f13, %r15
diff --git a/test/MC/SystemZ/insn-cxfbr-02.s b/test/MC/SystemZ/insn-cxfbr-02.s
new file mode 100644
index 0000000..5343378
--- /dev/null
+++ b/test/MC/SystemZ/insn-cxfbr-02.s
@@ -0,0 +1,22 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: cxfbr	%r0, %r0
+#CHECK: error: invalid register
+#CHECK: cxfbr	%f0, %f0
+#CHECK: error: invalid register
+#CHECK: cxfbr	%f0, %a0
+#CHECK: error: invalid register
+#CHECK: cxfbr	%a0, %r0
+#CHECK: error: invalid register
+#CHECK: cxfbr	%f2, %r0
+#CHECK: error: invalid register
+#CHECK: cxfbr	%f14, %r0
+
+	cxfbr	%r0, %r0
+	cxfbr	%f0, %f0
+	cxfbr	%f0, %a0
+	cxfbr	%a0, %r0
+	cxfbr	%f2, %r0
+	cxfbr	%f14, %r0
diff --git a/test/MC/SystemZ/insn-cxgbr-01.s b/test/MC/SystemZ/insn-cxgbr-01.s
new file mode 100644
index 0000000..90914b4
--- /dev/null
+++ b/test/MC/SystemZ/insn-cxgbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cxgbr	%f0, %r0                # encoding: [0xb3,0xa6,0x00,0x00]
+#CHECK: cxgbr	%f0, %r15               # encoding: [0xb3,0xa6,0x00,0x0f]
+#CHECK: cxgbr	%f13, %r0               # encoding: [0xb3,0xa6,0x00,0xd0]
+#CHECK: cxgbr	%f8, %r7                # encoding: [0xb3,0xa6,0x00,0x87]
+#CHECK: cxgbr	%f13, %r15              # encoding: [0xb3,0xa6,0x00,0xdf]
+
+	cxgbr	%f0, %r0
+	cxgbr	%f0, %r15
+	cxgbr	%f13, %r0
+	cxgbr	%f8, %r7
+	cxgbr	%f13, %r15
diff --git a/test/MC/SystemZ/insn-cxgbr-02.s b/test/MC/SystemZ/insn-cxgbr-02.s
new file mode 100644
index 0000000..d10664d
--- /dev/null
+++ b/test/MC/SystemZ/insn-cxgbr-02.s
@@ -0,0 +1,22 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: cxgbr	%r0, %r0
+#CHECK: error: invalid register
+#CHECK: cxgbr	%f0, %f0
+#CHECK: error: invalid register
+#CHECK: cxgbr	%f0, %a0
+#CHECK: error: invalid register
+#CHECK: cxgbr	%a0, %r0
+#CHECK: error: invalid register
+#CHECK: cxgbr	%f2, %r0
+#CHECK: error: invalid register
+#CHECK: cxgbr	%f14, %r0
+
+	cxgbr	%r0, %r0
+	cxgbr	%f0, %f0
+	cxgbr	%f0, %a0
+	cxgbr	%a0, %r0
+	cxgbr	%f2, %r0
+	cxgbr	%f14, %r0
diff --git a/test/MC/SystemZ/insn-cy-01.s b/test/MC/SystemZ/insn-cy-01.s
new file mode 100644
index 0000000..5f21b96
--- /dev/null
+++ b/test/MC/SystemZ/insn-cy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: cy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x59]
+#CHECK: cy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x59]
+#CHECK: cy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x59]
+#CHECK: cy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x59]
+#CHECK: cy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x59]
+#CHECK: cy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x59]
+#CHECK: cy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x59]
+#CHECK: cy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x59]
+#CHECK: cy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x59]
+#CHECK: cy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x59]
+
+	cy	%r0, -524288
+	cy	%r0, -1
+	cy	%r0, 0
+	cy	%r0, 1
+	cy	%r0, 524287
+	cy	%r0, 0(%r1)
+	cy	%r0, 0(%r15)
+	cy	%r0, 524287(%r1,%r15)
+	cy	%r0, 524287(%r15,%r1)
+	cy	%r15, 0
diff --git a/test/MC/SystemZ/insn-cy-02.s b/test/MC/SystemZ/insn-cy-02.s
new file mode 100644
index 0000000..1c996cd
--- /dev/null
+++ b/test/MC/SystemZ/insn-cy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: cy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: cy	%r0, 524288
+
+	cy	%r0, -524289
+	cy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-ddb-01.s b/test/MC/SystemZ/insn-ddb-01.s
new file mode 100644
index 0000000..417af11
--- /dev/null
+++ b/test/MC/SystemZ/insn-ddb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ddb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x1d]
+#CHECK: ddb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1d]
+#CHECK: ddb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x1d]
+#CHECK: ddb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1d]
+#CHECK: ddb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x1d]
+#CHECK: ddb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1d]
+#CHECK: ddb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1d]
+
+	ddb	%f0, 0
+	ddb	%f0, 4095
+	ddb	%f0, 0(%r1)
+	ddb	%f0, 0(%r15)
+	ddb	%f0, 4095(%r1,%r15)
+	ddb	%f0, 4095(%r15,%r1)
+	ddb	%f15, 0
diff --git a/test/MC/SystemZ/insn-ddb-02.s b/test/MC/SystemZ/insn-ddb-02.s
new file mode 100644
index 0000000..c6357d1
--- /dev/null
+++ b/test/MC/SystemZ/insn-ddb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ddb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: ddb	%f0, 4096
+
+	ddb	%f0, -1
+	ddb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-ddbr-01.s b/test/MC/SystemZ/insn-ddbr-01.s
new file mode 100644
index 0000000..7ee1fee
--- /dev/null
+++ b/test/MC/SystemZ/insn-ddbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ddbr	%f0, %f0                # encoding: [0xb3,0x1d,0x00,0x00]
+#CHECK: ddbr	%f0, %f15               # encoding: [0xb3,0x1d,0x00,0x0f]
+#CHECK: ddbr	%f7, %f8                # encoding: [0xb3,0x1d,0x00,0x78]
+#CHECK: ddbr	%f15, %f0               # encoding: [0xb3,0x1d,0x00,0xf0]
+
+	ddbr	%f0, %f0
+	ddbr	%f0, %f15
+	ddbr	%f7, %f8
+	ddbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-deb-01.s b/test/MC/SystemZ/insn-deb-01.s
new file mode 100644
index 0000000..93cfb02
--- /dev/null
+++ b/test/MC/SystemZ/insn-deb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: deb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x0d]
+#CHECK: deb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0d]
+#CHECK: deb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x0d]
+#CHECK: deb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0d]
+#CHECK: deb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x0d]
+#CHECK: deb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0d]
+#CHECK: deb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0d]
+
+	deb	%f0, 0
+	deb	%f0, 4095
+	deb	%f0, 0(%r1)
+	deb	%f0, 0(%r15)
+	deb	%f0, 4095(%r1,%r15)
+	deb	%f0, 4095(%r15,%r1)
+	deb	%f15, 0
diff --git a/test/MC/SystemZ/insn-deb-02.s b/test/MC/SystemZ/insn-deb-02.s
new file mode 100644
index 0000000..e4edd4e
--- /dev/null
+++ b/test/MC/SystemZ/insn-deb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: deb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: deb	%f0, 4096
+
+	deb	%f0, -1
+	deb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-debr-01.s b/test/MC/SystemZ/insn-debr-01.s
new file mode 100644
index 0000000..02ee16c
--- /dev/null
+++ b/test/MC/SystemZ/insn-debr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: debr	%f0, %f0                # encoding: [0xb3,0x0d,0x00,0x00]
+#CHECK: debr	%f0, %f15               # encoding: [0xb3,0x0d,0x00,0x0f]
+#CHECK: debr	%f7, %f8                # encoding: [0xb3,0x0d,0x00,0x78]
+#CHECK: debr	%f15, %f0               # encoding: [0xb3,0x0d,0x00,0xf0]
+
+	debr	%f0, %f0
+	debr	%f0, %f15
+	debr	%f7, %f8
+	debr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-dl-01.s b/test/MC/SystemZ/insn-dl-01.s
new file mode 100644
index 0000000..50b24e7
--- /dev/null
+++ b/test/MC/SystemZ/insn-dl-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: dl	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x97]
+#CHECK: dl	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x97]
+#CHECK: dl	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x97]
+#CHECK: dl	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x97]
+#CHECK: dl	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x97]
+#CHECK: dl	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x97]
+#CHECK: dl	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x97]
+#CHECK: dl	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x97]
+#CHECK: dl	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x97]
+#CHECK: dl	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x97]
+
+	dl	%r0, -524288
+	dl	%r0, -1
+	dl	%r0, 0
+	dl	%r0, 1
+	dl	%r0, 524287
+	dl	%r0, 0(%r1)
+	dl	%r0, 0(%r15)
+	dl	%r0, 524287(%r1,%r15)
+	dl	%r0, 524287(%r15,%r1)
+	dl	%r14, 0
diff --git a/test/MC/SystemZ/insn-dl-02.s b/test/MC/SystemZ/insn-dl-02.s
new file mode 100644
index 0000000..8f9f373
--- /dev/null
+++ b/test/MC/SystemZ/insn-dl-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: dl	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: dl	%r0, 524288
+#CHECK: error: invalid register
+#CHECK: dl	%r1, 0
+#CHECK: error: invalid register
+#CHECK: dl	%r15, 0
+
+	dl	%r0, -524289
+	dl	%r0, 524288
+	dl	%r1, 0
+	dl	%r15, 0
diff --git a/test/MC/SystemZ/insn-dlg-01.s b/test/MC/SystemZ/insn-dlg-01.s
new file mode 100644
index 0000000..8a304f8
--- /dev/null
+++ b/test/MC/SystemZ/insn-dlg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: dlg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x87]
+#CHECK: dlg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x87]
+#CHECK: dlg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x87]
+#CHECK: dlg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x87]
+#CHECK: dlg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x87]
+#CHECK: dlg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x87]
+#CHECK: dlg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x87]
+#CHECK: dlg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x87]
+#CHECK: dlg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x87]
+#CHECK: dlg	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x87]
+
+	dlg	%r0, -524288
+	dlg	%r0, -1
+	dlg	%r0, 0
+	dlg	%r0, 1
+	dlg	%r0, 524287
+	dlg	%r0, 0(%r1)
+	dlg	%r0, 0(%r15)
+	dlg	%r0, 524287(%r1,%r15)
+	dlg	%r0, 524287(%r15,%r1)
+	dlg	%r14, 0
diff --git a/test/MC/SystemZ/insn-dlg-02.s b/test/MC/SystemZ/insn-dlg-02.s
new file mode 100644
index 0000000..cbed898
--- /dev/null
+++ b/test/MC/SystemZ/insn-dlg-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: dlg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: dlg	%r0, 524288
+#CHECK: error: invalid register
+#CHECK: dlg	%r1, 0
+#CHECK: error: invalid register
+#CHECK: dlg	%r15, 0
+
+	dlg	%r0, -524289
+	dlg	%r0, 524288
+	dlg	%r1, 0
+	dlg	%r15, 0
diff --git a/test/MC/SystemZ/insn-dlgr-01.s b/test/MC/SystemZ/insn-dlgr-01.s
new file mode 100644
index 0000000..b2a4de5
--- /dev/null
+++ b/test/MC/SystemZ/insn-dlgr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: dlgr	%r0, %r0                # encoding: [0xb9,0x87,0x00,0x00]
+#CHECK: dlgr	%r0, %r15               # encoding: [0xb9,0x87,0x00,0x0f]
+#CHECK: dlgr	%r14, %r0               # encoding: [0xb9,0x87,0x00,0xe0]
+#CHECK: dlgr	%r6, %r9                # encoding: [0xb9,0x87,0x00,0x69]
+
+	dlgr	%r0,%r0
+	dlgr	%r0,%r15
+	dlgr	%r14,%r0
+	dlgr	%r6,%r9
diff --git a/test/MC/SystemZ/insn-dlgr-02.s b/test/MC/SystemZ/insn-dlgr-02.s
new file mode 100644
index 0000000..c407b4f
--- /dev/null
+++ b/test/MC/SystemZ/insn-dlgr-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: dlgr	%r1, %r0
+#CHECK: error: invalid register
+#CHECK: dlgr	%r15, %r0
+
+	dlgr	%r1, %r0
+	dlgr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-dlr-01.s b/test/MC/SystemZ/insn-dlr-01.s
new file mode 100644
index 0000000..7e8be79
--- /dev/null
+++ b/test/MC/SystemZ/insn-dlr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: dlr	%r0, %r0                # encoding: [0xb9,0x97,0x00,0x00]
+#CHECK: dlr	%r0, %r15               # encoding: [0xb9,0x97,0x00,0x0f]
+#CHECK: dlr	%r14, %r0               # encoding: [0xb9,0x97,0x00,0xe0]
+#CHECK: dlr	%r6, %r9                # encoding: [0xb9,0x97,0x00,0x69]
+
+	dlr	%r0,%r0
+	dlr	%r0,%r15
+	dlr	%r14,%r0
+	dlr	%r6,%r9
diff --git a/test/MC/SystemZ/insn-dlr-02.s b/test/MC/SystemZ/insn-dlr-02.s
new file mode 100644
index 0000000..eb31e18
--- /dev/null
+++ b/test/MC/SystemZ/insn-dlr-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: dlr	%r1, %r0
+#CHECK: error: invalid register
+#CHECK: dlr	%r15, %r0
+
+	dlr	%r1, %r0
+	dlr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-dsg-01.s b/test/MC/SystemZ/insn-dsg-01.s
new file mode 100644
index 0000000..5cd0b40
--- /dev/null
+++ b/test/MC/SystemZ/insn-dsg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: dsg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0d]
+#CHECK: dsg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0d]
+#CHECK: dsg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0d]
+#CHECK: dsg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0d]
+#CHECK: dsg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0d]
+#CHECK: dsg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0d]
+#CHECK: dsg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0d]
+#CHECK: dsg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0d]
+#CHECK: dsg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0d]
+#CHECK: dsg	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x0d]
+
+	dsg	%r0, -524288
+	dsg	%r0, -1
+	dsg	%r0, 0
+	dsg	%r0, 1
+	dsg	%r0, 524287
+	dsg	%r0, 0(%r1)
+	dsg	%r0, 0(%r15)
+	dsg	%r0, 524287(%r1,%r15)
+	dsg	%r0, 524287(%r15,%r1)
+	dsg	%r14, 0
diff --git a/test/MC/SystemZ/insn-dsg-02.s b/test/MC/SystemZ/insn-dsg-02.s
new file mode 100644
index 0000000..1697941
--- /dev/null
+++ b/test/MC/SystemZ/insn-dsg-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: dsg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: dsg	%r0, 524288
+#CHECK: error: invalid register
+#CHECK: dsg	%r1, 0
+#CHECK: error: invalid register
+#CHECK: dsg	%r15, 0
+
+	dsg	%r0, -524289
+	dsg	%r0, 524288
+	dsg	%r1, 0
+	dsg	%r15, 0
diff --git a/test/MC/SystemZ/insn-dsgf-01.s b/test/MC/SystemZ/insn-dsgf-01.s
new file mode 100644
index 0000000..2cde0c7
--- /dev/null
+++ b/test/MC/SystemZ/insn-dsgf-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: dsgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1d]
+#CHECK: dsgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1d]
+#CHECK: dsgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1d]
+#CHECK: dsgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1d]
+#CHECK: dsgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1d]
+#CHECK: dsgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1d]
+#CHECK: dsgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1d]
+#CHECK: dsgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1d]
+#CHECK: dsgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1d]
+#CHECK: dsgf	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x1d]
+
+	dsgf	%r0, -524288
+	dsgf	%r0, -1
+	dsgf	%r0, 0
+	dsgf	%r0, 1
+	dsgf	%r0, 524287
+	dsgf	%r0, 0(%r1)
+	dsgf	%r0, 0(%r15)
+	dsgf	%r0, 524287(%r1,%r15)
+	dsgf	%r0, 524287(%r15,%r1)
+	dsgf	%r14, 0
diff --git a/test/MC/SystemZ/insn-dsgf-02.s b/test/MC/SystemZ/insn-dsgf-02.s
new file mode 100644
index 0000000..253d9ad
--- /dev/null
+++ b/test/MC/SystemZ/insn-dsgf-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: dsgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: dsgf	%r0, 524288
+#CHECK: error: invalid register
+#CHECK: dsgf	%r1, 0
+#CHECK: error: invalid register
+#CHECK: dsgf	%r15, 0
+
+	dsgf	%r0, -524289
+	dsgf	%r0, 524288
+	dsgf	%r1, 0
+	dsgf	%r15, 0
diff --git a/test/MC/SystemZ/insn-dsgfr-01.s b/test/MC/SystemZ/insn-dsgfr-01.s
new file mode 100644
index 0000000..9b61550
--- /dev/null
+++ b/test/MC/SystemZ/insn-dsgfr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: dsgfr	%r0, %r0                # encoding: [0xb9,0x1d,0x00,0x00]
+#CHECK: dsgfr	%r0, %r15               # encoding: [0xb9,0x1d,0x00,0x0f]
+#CHECK: dsgfr	%r14, %r0               # encoding: [0xb9,0x1d,0x00,0xe0]
+#CHECK: dsgfr	%r6, %r9                # encoding: [0xb9,0x1d,0x00,0x69]
+
+	dsgfr	%r0,%r0
+	dsgfr	%r0,%r15
+	dsgfr	%r14,%r0
+	dsgfr	%r6,%r9
diff --git a/test/MC/SystemZ/insn-dsgfr-02.s b/test/MC/SystemZ/insn-dsgfr-02.s
new file mode 100644
index 0000000..2eb8b23
--- /dev/null
+++ b/test/MC/SystemZ/insn-dsgfr-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: dsgfr	%r1, %r0
+#CHECK: error: invalid register
+#CHECK: dsgfr	%r15, %r0
+
+	dsgfr	%r1, %r0
+	dsgfr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-dsgr-01.s b/test/MC/SystemZ/insn-dsgr-01.s
new file mode 100644
index 0000000..02b4099
--- /dev/null
+++ b/test/MC/SystemZ/insn-dsgr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: dsgr	%r0, %r0                # encoding: [0xb9,0x0d,0x00,0x00]
+#CHECK: dsgr	%r0, %r15               # encoding: [0xb9,0x0d,0x00,0x0f]
+#CHECK: dsgr	%r14, %r0               # encoding: [0xb9,0x0d,0x00,0xe0]
+#CHECK: dsgr	%r6, %r9                # encoding: [0xb9,0x0d,0x00,0x69]
+
+	dsgr	%r0,%r0
+	dsgr	%r0,%r15
+	dsgr	%r14,%r0
+	dsgr	%r6,%r9
diff --git a/test/MC/SystemZ/insn-dsgr-02.s b/test/MC/SystemZ/insn-dsgr-02.s
new file mode 100644
index 0000000..0194295
--- /dev/null
+++ b/test/MC/SystemZ/insn-dsgr-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: dsgr	%r1, %r0
+#CHECK: error: invalid register
+#CHECK: dsgr	%r15, %r0
+
+	dsgr	%r1, %r0
+	dsgr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-dxbr-01.s b/test/MC/SystemZ/insn-dxbr-01.s
new file mode 100644
index 0000000..6a45208
--- /dev/null
+++ b/test/MC/SystemZ/insn-dxbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: dxbr	%f0, %f0                # encoding: [0xb3,0x4d,0x00,0x00]
+#CHECK: dxbr	%f0, %f13               # encoding: [0xb3,0x4d,0x00,0x0d]
+#CHECK: dxbr	%f8, %f8                # encoding: [0xb3,0x4d,0x00,0x88]
+#CHECK: dxbr	%f13, %f0               # encoding: [0xb3,0x4d,0x00,0xd0]
+
+	dxbr	%f0, %f0
+	dxbr	%f0, %f13
+	dxbr	%f8, %f8
+	dxbr	%f13, %f0
diff --git a/test/MC/SystemZ/insn-dxbr-02.s b/test/MC/SystemZ/insn-dxbr-02.s
new file mode 100644
index 0000000..cac6419
--- /dev/null
+++ b/test/MC/SystemZ/insn-dxbr-02.s
@@ -0,0 +1,17 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: dxbr	%f0, %f2
+#CHECK: error: invalid register
+#CHECK: dxbr	%f0, %f14
+#CHECK: error: invalid register
+#CHECK: dxbr	%f2, %f0
+#CHECK: error: invalid register
+#CHECK: dxbr	%f14, %f0
+
+	dxbr	%f0, %f2
+	dxbr	%f0, %f14
+	dxbr	%f2, %f0
+	dxbr	%f14, %f0
+
diff --git a/test/MC/SystemZ/insn-ear-01.s b/test/MC/SystemZ/insn-ear-01.s
new file mode 100644
index 0000000..f614f86
--- /dev/null
+++ b/test/MC/SystemZ/insn-ear-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ear	%r0, %a0                # encoding: [0xb2,0x4f,0x00,0x00]
+#CHECK: ear	%r0, %a15               # encoding: [0xb2,0x4f,0x00,0x0f]
+#CHECK: ear	%r15, %a0               # encoding: [0xb2,0x4f,0x00,0xf0]
+#CHECK: ear	%r7, %a8                # encoding: [0xb2,0x4f,0x00,0x78]
+#CHECK: ear	%r15, %a15              # encoding: [0xb2,0x4f,0x00,0xff]
+
+	ear	%r0, %a0
+	ear	%r0, %a15
+	ear	%r15, %a0
+	ear	%r7, %a8
+	ear	%r15, %a15
diff --git a/test/MC/SystemZ/insn-ear-02.s b/test/MC/SystemZ/insn-ear-02.s
new file mode 100644
index 0000000..7fb35ea
--- /dev/null
+++ b/test/MC/SystemZ/insn-ear-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ear	%r0, 0
+#CHECK: error: invalid register
+#CHECK: ear	%r0, %r0
+#CHECK: error: invalid register
+#CHECK: ear	%a0, %r0
+
+	ear	%r0, 0
+	ear	%r0, %r0
+	ear	%a0, %r0
diff --git a/test/MC/SystemZ/insn-fidbr-01.s b/test/MC/SystemZ/insn-fidbr-01.s
new file mode 100644
index 0000000..e52c91c
--- /dev/null
+++ b/test/MC/SystemZ/insn-fidbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: fidbr	%f0, 0, %f0             # encoding: [0xb3,0x5f,0x00,0x00]
+#CHECK: fidbr	%f0, 0, %f15            # encoding: [0xb3,0x5f,0x00,0x0f]
+#CHECK: fidbr	%f0, 15, %f0            # encoding: [0xb3,0x5f,0xf0,0x00]
+#CHECK: fidbr	%f4, 5, %f6             # encoding: [0xb3,0x5f,0x50,0x46]
+#CHECK: fidbr	%f15, 0, %f0            # encoding: [0xb3,0x5f,0x00,0xf0]
+
+	fidbr	%f0, 0, %f0
+	fidbr	%f0, 0, %f15
+	fidbr	%f0, 15, %f0
+	fidbr	%f4, 5, %f6
+	fidbr	%f15, 0, %f0
diff --git a/test/MC/SystemZ/insn-fidbr-02.s b/test/MC/SystemZ/insn-fidbr-02.s
new file mode 100644
index 0000000..5a35f46
--- /dev/null
+++ b/test/MC/SystemZ/insn-fidbr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: fidbr	%r0, 0, %f0
+#CHECK: error: invalid register
+#CHECK: fidbr	%f0, 0, %r0
+#CHECK: error: invalid operand
+#CHECK: fidbr	%f0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: fidbr	%f0, 16, %f0
+
+	fidbr	%r0, 0, %f0
+	fidbr	%f0, 0, %r0
+	fidbr	%f0, -1, %f0
+	fidbr	%f0, 16, %f0
diff --git a/test/MC/SystemZ/insn-fiebr-01.s b/test/MC/SystemZ/insn-fiebr-01.s
new file mode 100644
index 0000000..0b4e633
--- /dev/null
+++ b/test/MC/SystemZ/insn-fiebr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: fiebr	%f0, 0, %f0             # encoding: [0xb3,0x57,0x00,0x00]
+#CHECK: fiebr	%f0, 0, %f15            # encoding: [0xb3,0x57,0x00,0x0f]
+#CHECK: fiebr	%f0, 15, %f0            # encoding: [0xb3,0x57,0xf0,0x00]
+#CHECK: fiebr	%f4, 5, %f6             # encoding: [0xb3,0x57,0x50,0x46]
+#CHECK: fiebr	%f15, 0, %f0            # encoding: [0xb3,0x57,0x00,0xf0]
+
+	fiebr	%f0, 0, %f0
+	fiebr	%f0, 0, %f15
+	fiebr	%f0, 15, %f0
+	fiebr	%f4, 5, %f6
+	fiebr	%f15, 0, %f0
diff --git a/test/MC/SystemZ/insn-fiebr-02.s b/test/MC/SystemZ/insn-fiebr-02.s
new file mode 100644
index 0000000..2ecdd4d
--- /dev/null
+++ b/test/MC/SystemZ/insn-fiebr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: fiebr	%r0, 0, %f0
+#CHECK: error: invalid register
+#CHECK: fiebr	%f0, 0, %r0
+#CHECK: error: invalid operand
+#CHECK: fiebr	%f0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: fiebr	%f0, 16, %f0
+
+	fiebr	%r0, 0, %f0
+	fiebr	%f0, 0, %r0
+	fiebr	%f0, -1, %f0
+	fiebr	%f0, 16, %f0
diff --git a/test/MC/SystemZ/insn-fixbr-01.s b/test/MC/SystemZ/insn-fixbr-01.s
new file mode 100644
index 0000000..02676ed
--- /dev/null
+++ b/test/MC/SystemZ/insn-fixbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: fixbr	%f0, 0, %f0             # encoding: [0xb3,0x47,0x00,0x00]
+#CHECK: fixbr	%f0, 0, %f13            # encoding: [0xb3,0x47,0x00,0x0d]
+#CHECK: fixbr	%f0, 15, %f0            # encoding: [0xb3,0x47,0xf0,0x00]
+#CHECK: fixbr	%f4, 5, %f8             # encoding: [0xb3,0x47,0x50,0x48]
+#CHECK: fixbr	%f13, 0, %f0            # encoding: [0xb3,0x47,0x00,0xd0]
+
+	fixbr	%f0, 0, %f0
+	fixbr	%f0, 0, %f13
+	fixbr	%f0, 15, %f0
+	fixbr	%f4, 5, %f8
+	fixbr	%f13, 0, %f0
diff --git a/test/MC/SystemZ/insn-fixbr-02.s b/test/MC/SystemZ/insn-fixbr-02.s
new file mode 100644
index 0000000..3f68734
--- /dev/null
+++ b/test/MC/SystemZ/insn-fixbr-02.s
@@ -0,0 +1,28 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: fixbr	%r0, 0, %f0
+#CHECK: error: invalid register
+#CHECK: fixbr	%f0, 0, %r0
+#CHECK: error: invalid operand
+#CHECK: fixbr	%f0, -1, %f0
+#CHECK: error: invalid operand
+#CHECK: fixbr	%f0, 16, %f0
+#CHECK: error: invalid register
+#CHECK: fixbr	%f0, 0, %f2
+#CHECK: error: invalid register
+#CHECK: fixbr	%f0, 0, %f14
+#CHECK: error: invalid register
+#CHECK: fixbr	%f2, 0, %f0
+#CHECK: error: invalid register
+#CHECK: fixbr	%f14, 0, %f0
+
+	fixbr	%r0, 0, %f0
+	fixbr	%f0, 0, %r0
+	fixbr	%f0, -1, %f0
+	fixbr	%f0, 16, %f0
+	fixbr	%f0, 0, %f2
+	fixbr	%f0, 0, %f14
+	fixbr	%f2, 0, %f0
+	fixbr	%f14, 0, %f0
diff --git a/test/MC/SystemZ/insn-flogr-01.s b/test/MC/SystemZ/insn-flogr-01.s
new file mode 100644
index 0000000..f6031ce
--- /dev/null
+++ b/test/MC/SystemZ/insn-flogr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: flogr	%r0, %r0                # encoding: [0xb9,0x83,0x00,0x00]
+#CHECK: flogr	%r0, %r15               # encoding: [0xb9,0x83,0x00,0x0f]
+#CHECK: flogr	%r10, %r9               # encoding: [0xb9,0x83,0x00,0xa9]
+#CHECK: flogr	%r14, %r0               # encoding: [0xb9,0x83,0x00,0xe0]
+
+	flogr	%r0, %r0
+	flogr	%r0, %r15
+	flogr	%r10, %r9
+	flogr	%r14, %r0
diff --git a/test/MC/SystemZ/insn-flogr-02.s b/test/MC/SystemZ/insn-flogr-02.s
new file mode 100644
index 0000000..e0d117c
--- /dev/null
+++ b/test/MC/SystemZ/insn-flogr-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: flogr	%r1, %r0
+#CHECK: error: invalid register
+#CHECK: flogr	%r15, %r0
+
+	flogr	%r1, %r0
+	flogr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-ic-01.s b/test/MC/SystemZ/insn-ic-01.s
new file mode 100644
index 0000000..76772f9
--- /dev/null
+++ b/test/MC/SystemZ/insn-ic-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ic	%r0, 0                  # encoding: [0x43,0x00,0x00,0x00]
+#CHECK: ic	%r0, 4095               # encoding: [0x43,0x00,0x0f,0xff]
+#CHECK: ic	%r0, 0(%r1)             # encoding: [0x43,0x00,0x10,0x00]
+#CHECK: ic	%r0, 0(%r15)            # encoding: [0x43,0x00,0xf0,0x00]
+#CHECK: ic	%r0, 4095(%r1,%r15)     # encoding: [0x43,0x01,0xff,0xff]
+#CHECK: ic	%r0, 4095(%r15,%r1)     # encoding: [0x43,0x0f,0x1f,0xff]
+#CHECK: ic	%r15, 0                 # encoding: [0x43,0xf0,0x00,0x00]
+
+	ic	%r0, 0
+	ic	%r0, 4095
+	ic	%r0, 0(%r1)
+	ic	%r0, 0(%r15)
+	ic	%r0, 4095(%r1,%r15)
+	ic	%r0, 4095(%r15,%r1)
+	ic	%r15, 0
diff --git a/test/MC/SystemZ/insn-ic-02.s b/test/MC/SystemZ/insn-ic-02.s
new file mode 100644
index 0000000..e70ef1c
--- /dev/null
+++ b/test/MC/SystemZ/insn-ic-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ic	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: ic	%r0, 4096
+
+	ic	%r0, -1
+	ic	%r0, 4096
diff --git a/test/MC/SystemZ/insn-icy-01.s b/test/MC/SystemZ/insn-icy-01.s
new file mode 100644
index 0000000..079ae21
--- /dev/null
+++ b/test/MC/SystemZ/insn-icy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: icy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x73]
+#CHECK: icy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x73]
+#CHECK: icy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x73]
+#CHECK: icy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x73]
+#CHECK: icy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x73]
+#CHECK: icy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x73]
+#CHECK: icy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x73]
+#CHECK: icy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x73]
+#CHECK: icy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x73]
+#CHECK: icy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x73]
+
+	icy	%r0, -524288
+	icy	%r0, -1
+	icy	%r0, 0
+	icy	%r0, 1
+	icy	%r0, 524287
+	icy	%r0, 0(%r1)
+	icy	%r0, 0(%r15)
+	icy	%r0, 524287(%r1,%r15)
+	icy	%r0, 524287(%r15,%r1)
+	icy	%r15, 0
diff --git a/test/MC/SystemZ/insn-icy-02.s b/test/MC/SystemZ/insn-icy-02.s
new file mode 100644
index 0000000..321c86f
--- /dev/null
+++ b/test/MC/SystemZ/insn-icy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: icy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: icy	%r0, 524288
+
+	icy	%r0, -524289
+	icy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-iihf-01.s b/test/MC/SystemZ/insn-iihf-01.s
new file mode 100644
index 0000000..bf8d48f
--- /dev/null
+++ b/test/MC/SystemZ/insn-iihf-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: iihf	%r0, 0                  # encoding: [0xc0,0x08,0x00,0x00,0x00,0x00]
+#CHECK: iihf	%r0, 4294967295         # encoding: [0xc0,0x08,0xff,0xff,0xff,0xff]
+#CHECK: iihf	%r15, 0                 # encoding: [0xc0,0xf8,0x00,0x00,0x00,0x00]
+
+	iihf	%r0, 0
+	iihf	%r0, 0xffffffff
+	iihf	%r15, 0
diff --git a/test/MC/SystemZ/insn-iihf-02.s b/test/MC/SystemZ/insn-iihf-02.s
new file mode 100644
index 0000000..1c7a69a
--- /dev/null
+++ b/test/MC/SystemZ/insn-iihf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: iihf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: iihf	%r0, 1 << 32
+
+	iihf	%r0, -1
+	iihf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-iihh-01.s b/test/MC/SystemZ/insn-iihh-01.s
new file mode 100644
index 0000000..a2ba9a3
--- /dev/null
+++ b/test/MC/SystemZ/insn-iihh-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: iihh	%r0, 0                  # encoding: [0xa5,0x00,0x00,0x00]
+#CHECK: iihh	%r0, 32768              # encoding: [0xa5,0x00,0x80,0x00]
+#CHECK: iihh	%r0, 65535              # encoding: [0xa5,0x00,0xff,0xff]
+#CHECK: iihh	%r15, 0                 # encoding: [0xa5,0xf0,0x00,0x00]
+
+	iihh	%r0, 0
+	iihh	%r0, 0x8000
+	iihh	%r0, 0xffff
+	iihh	%r15, 0
diff --git a/test/MC/SystemZ/insn-iihh-02.s b/test/MC/SystemZ/insn-iihh-02.s
new file mode 100644
index 0000000..2d8f854
--- /dev/null
+++ b/test/MC/SystemZ/insn-iihh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: iihh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: iihh	%r0, 0x10000
+
+	iihh	%r0, -1
+	iihh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-iihl-01.s b/test/MC/SystemZ/insn-iihl-01.s
new file mode 100644
index 0000000..ff591e8
--- /dev/null
+++ b/test/MC/SystemZ/insn-iihl-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: iihl	%r0, 0                  # encoding: [0xa5,0x01,0x00,0x00]
+#CHECK: iihl	%r0, 32768              # encoding: [0xa5,0x01,0x80,0x00]
+#CHECK: iihl	%r0, 65535              # encoding: [0xa5,0x01,0xff,0xff]
+#CHECK: iihl	%r15, 0                 # encoding: [0xa5,0xf1,0x00,0x00]
+
+	iihl	%r0, 0
+	iihl	%r0, 0x8000
+	iihl	%r0, 0xffff
+	iihl	%r15, 0
diff --git a/test/MC/SystemZ/insn-iihl-02.s b/test/MC/SystemZ/insn-iihl-02.s
new file mode 100644
index 0000000..262955e
--- /dev/null
+++ b/test/MC/SystemZ/insn-iihl-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: iihl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: iihl	%r0, 0x10000
+
+	iihl	%r0, -1
+	iihl	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-iilf-01.s b/test/MC/SystemZ/insn-iilf-01.s
new file mode 100644
index 0000000..228e147
--- /dev/null
+++ b/test/MC/SystemZ/insn-iilf-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: iilf	%r0, 0                  # encoding: [0xc0,0x09,0x00,0x00,0x00,0x00]
+#CHECK: iilf	%r0, 4294967295         # encoding: [0xc0,0x09,0xff,0xff,0xff,0xff]
+#CHECK: iilf	%r15, 0                 # encoding: [0xc0,0xf9,0x00,0x00,0x00,0x00]
+
+	iilf	%r0, 0
+	iilf	%r0, 0xffffffff
+	iilf	%r15, 0
diff --git a/test/MC/SystemZ/insn-iilf-02.s b/test/MC/SystemZ/insn-iilf-02.s
new file mode 100644
index 0000000..c7571e8
--- /dev/null
+++ b/test/MC/SystemZ/insn-iilf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: iilf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: iilf	%r0, 1 << 32
+
+	iilf	%r0, -1
+	iilf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-iilh-01.s b/test/MC/SystemZ/insn-iilh-01.s
new file mode 100644
index 0000000..045ccfe
--- /dev/null
+++ b/test/MC/SystemZ/insn-iilh-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: iilh	%r0, 0                  # encoding: [0xa5,0x02,0x00,0x00]
+#CHECK: iilh	%r0, 32768              # encoding: [0xa5,0x02,0x80,0x00]
+#CHECK: iilh	%r0, 65535              # encoding: [0xa5,0x02,0xff,0xff]
+#CHECK: iilh	%r15, 0                 # encoding: [0xa5,0xf2,0x00,0x00]
+
+	iilh	%r0, 0
+	iilh	%r0, 0x8000
+	iilh	%r0, 0xffff
+	iilh	%r15, 0
diff --git a/test/MC/SystemZ/insn-iilh-02.s b/test/MC/SystemZ/insn-iilh-02.s
new file mode 100644
index 0000000..af5bdac
--- /dev/null
+++ b/test/MC/SystemZ/insn-iilh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: iilh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: iilh	%r0, 0x10000
+
+	iilh	%r0, -1
+	iilh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-iill-01.s b/test/MC/SystemZ/insn-iill-01.s
new file mode 100644
index 0000000..bf50eeb
--- /dev/null
+++ b/test/MC/SystemZ/insn-iill-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: iill	%r0, 0                  # encoding: [0xa5,0x03,0x00,0x00]
+#CHECK: iill	%r0, 32768              # encoding: [0xa5,0x03,0x80,0x00]
+#CHECK: iill	%r0, 65535              # encoding: [0xa5,0x03,0xff,0xff]
+#CHECK: iill	%r15, 0                 # encoding: [0xa5,0xf3,0x00,0x00]
+
+	iill	%r0, 0
+	iill	%r0, 0x8000
+	iill	%r0, 0xffff
+	iill	%r15, 0
diff --git a/test/MC/SystemZ/insn-iill-02.s b/test/MC/SystemZ/insn-iill-02.s
new file mode 100644
index 0000000..fe31e44
--- /dev/null
+++ b/test/MC/SystemZ/insn-iill-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: iill	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: iill	%r0, 0x10000
+
+	iill	%r0, -1
+	iill	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-l-01.s b/test/MC/SystemZ/insn-l-01.s
new file mode 100644
index 0000000..a589116
--- /dev/null
+++ b/test/MC/SystemZ/insn-l-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: l	%r0, 0                  # encoding: [0x58,0x00,0x00,0x00]
+#CHECK: l	%r0, 4095               # encoding: [0x58,0x00,0x0f,0xff]
+#CHECK: l	%r0, 0(%r1)             # encoding: [0x58,0x00,0x10,0x00]
+#CHECK: l	%r0, 0(%r15)            # encoding: [0x58,0x00,0xf0,0x00]
+#CHECK: l	%r0, 4095(%r1,%r15)     # encoding: [0x58,0x01,0xff,0xff]
+#CHECK: l	%r0, 4095(%r15,%r1)     # encoding: [0x58,0x0f,0x1f,0xff]
+#CHECK: l	%r15, 0                 # encoding: [0x58,0xf0,0x00,0x00]
+
+	l	%r0, 0
+	l	%r0, 4095
+	l	%r0, 0(%r1)
+	l	%r0, 0(%r15)
+	l	%r0, 4095(%r1,%r15)
+	l	%r0, 4095(%r15,%r1)
+	l	%r15, 0
diff --git a/test/MC/SystemZ/insn-l-02.s b/test/MC/SystemZ/insn-l-02.s
new file mode 100644
index 0000000..fad96ff
--- /dev/null
+++ b/test/MC/SystemZ/insn-l-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: l	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: l	%r0, 4096
+
+	l	%r0, -1
+	l	%r0, 4096
diff --git a/test/MC/SystemZ/insn-la-01.s b/test/MC/SystemZ/insn-la-01.s
new file mode 100644
index 0000000..d4776ab
--- /dev/null
+++ b/test/MC/SystemZ/insn-la-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: la	%r0, 0                  # encoding: [0x41,0x00,0x00,0x00]
+#CHECK: la	%r0, 4095               # encoding: [0x41,0x00,0x0f,0xff]
+#CHECK: la	%r0, 0(%r1)             # encoding: [0x41,0x00,0x10,0x00]
+#CHECK: la	%r0, 0(%r15)            # encoding: [0x41,0x00,0xf0,0x00]
+#CHECK: la	%r0, 4095(%r1,%r15)     # encoding: [0x41,0x01,0xff,0xff]
+#CHECK: la	%r0, 4095(%r15,%r1)     # encoding: [0x41,0x0f,0x1f,0xff]
+#CHECK: la	%r15, 0                 # encoding: [0x41,0xf0,0x00,0x00]
+
+	la	%r0, 0
+	la	%r0, 4095
+	la	%r0, 0(%r1)
+	la	%r0, 0(%r15)
+	la	%r0, 4095(%r1,%r15)
+	la	%r0, 4095(%r15,%r1)
+	la	%r15, 0
diff --git a/test/MC/SystemZ/insn-la-02.s b/test/MC/SystemZ/insn-la-02.s
new file mode 100644
index 0000000..35c1ab0
--- /dev/null
+++ b/test/MC/SystemZ/insn-la-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: la	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: la	%r0, 4096
+
+	la	%r0, -1
+	la	%r0, 4096
diff --git a/test/MC/SystemZ/insn-larl-01.s b/test/MC/SystemZ/insn-larl-01.s
new file mode 100644
index 0000000..3d0f98f
--- /dev/null
+++ b/test/MC/SystemZ/insn-larl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: larl	%r0, 2864434397         # encoding: [0xc0,0x00,0x55,0x5d,0xe6,0x6e]
+#CHECK: larl	%r15, 2864434397        # encoding: [0xc0,0xf0,0x55,0x5d,0xe6,0x6e]
+
+	larl	%r0,0xaabbccdd
+	larl	%r15,0xaabbccdd
+
+#CHECK: larl	%r0, foo                # encoding: [0xc0,0x00,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: larl	%r15, foo               # encoding: [0xc0,0xf0,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	larl	%r0,foo
+	larl	%r15,foo
+
+#CHECK: larl	%r3, bar+100            # encoding: [0xc0,0x30,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: larl	%r4, bar+100            # encoding: [0xc0,0x40,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	larl	%r3,bar+100
+	larl	%r4,bar+100
+
+#CHECK: larl	%r7, frob@PLT           # encoding: [0xc0,0x70,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: larl	%r8, frob@PLT           # encoding: [0xc0,0x80,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	larl	%r7,frob@PLT
+	larl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-lay-01.s b/test/MC/SystemZ/insn-lay-01.s
new file mode 100644
index 0000000..daa8828
--- /dev/null
+++ b/test/MC/SystemZ/insn-lay-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lay	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x71]
+#CHECK: lay	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x71]
+#CHECK: lay	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x71]
+#CHECK: lay	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x71]
+#CHECK: lay	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x71]
+#CHECK: lay	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x71]
+#CHECK: lay	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x71]
+#CHECK: lay	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x71]
+#CHECK: lay	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x71]
+#CHECK: lay	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x71]
+
+	lay	%r0, -524288
+	lay	%r0, -1
+	lay	%r0, 0
+	lay	%r0, 1
+	lay	%r0, 524287
+	lay	%r0, 0(%r1)
+	lay	%r0, 0(%r15)
+	lay	%r0, 524287(%r1,%r15)
+	lay	%r0, 524287(%r15,%r1)
+	lay	%r15, 0
diff --git a/test/MC/SystemZ/insn-lay-02.s b/test/MC/SystemZ/insn-lay-02.s
new file mode 100644
index 0000000..2729eea
--- /dev/null
+++ b/test/MC/SystemZ/insn-lay-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lay	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lay	%r0, 524288
+
+	lay	%r0, -524289
+	lay	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lb-01.s b/test/MC/SystemZ/insn-lb-01.s
new file mode 100644
index 0000000..e9ee0fa
--- /dev/null
+++ b/test/MC/SystemZ/insn-lb-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lb	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x76]
+#CHECK: lb	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x76]
+#CHECK: lb	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x76]
+#CHECK: lb	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x76]
+#CHECK: lb	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x76]
+#CHECK: lb	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x76]
+#CHECK: lb	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x76]
+#CHECK: lb	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x76]
+#CHECK: lb	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x76]
+#CHECK: lb	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x76]
+
+	lb	%r0, -524288
+	lb	%r0, -1
+	lb	%r0, 0
+	lb	%r0, 1
+	lb	%r0, 524287
+	lb	%r0, 0(%r1)
+	lb	%r0, 0(%r15)
+	lb	%r0, 524287(%r1,%r15)
+	lb	%r0, 524287(%r15,%r1)
+	lb	%r15, 0
diff --git a/test/MC/SystemZ/insn-lb-02.s b/test/MC/SystemZ/insn-lb-02.s
new file mode 100644
index 0000000..e65edaf
--- /dev/null
+++ b/test/MC/SystemZ/insn-lb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lb	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lb	%r0, 524288
+
+	lb	%r0, -524289
+	lb	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lbr-01.s b/test/MC/SystemZ/insn-lbr-01.s
new file mode 100644
index 0000000..cb4ead0
--- /dev/null
+++ b/test/MC/SystemZ/insn-lbr-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lbr	%r0, %r15               # encoding: [0xb9,0x26,0x00,0x0f]
+#CHECK: lbr	%r7, %r8                # encoding: [0xb9,0x26,0x00,0x78]
+#CHECK: lbr	%r15, %r0               # encoding: [0xb9,0x26,0x00,0xf0]
+
+	lbr	%r0, %r15
+	lbr	%r7, %r8
+	lbr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-lcdbr-01.s b/test/MC/SystemZ/insn-lcdbr-01.s
new file mode 100644
index 0000000..347cab5
--- /dev/null
+++ b/test/MC/SystemZ/insn-lcdbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lcdbr	%f0, %f9                # encoding: [0xb3,0x13,0x00,0x09]
+#CHECK: lcdbr	%f0, %f15               # encoding: [0xb3,0x13,0x00,0x0f]
+#CHECK: lcdbr	%f15, %f0               # encoding: [0xb3,0x13,0x00,0xf0]
+#CHECK: lcdbr	%f15, %f9               # encoding: [0xb3,0x13,0x00,0xf9]
+
+	lcdbr	%f0,%f9
+	lcdbr	%f0,%f15
+	lcdbr	%f15,%f0
+	lcdbr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lcebr-01.s b/test/MC/SystemZ/insn-lcebr-01.s
new file mode 100644
index 0000000..e31822f
--- /dev/null
+++ b/test/MC/SystemZ/insn-lcebr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lcebr	%f0, %f9                # encoding: [0xb3,0x03,0x00,0x09]
+#CHECK: lcebr	%f0, %f15               # encoding: [0xb3,0x03,0x00,0x0f]
+#CHECK: lcebr	%f15, %f0               # encoding: [0xb3,0x03,0x00,0xf0]
+#CHECK: lcebr	%f15, %f9               # encoding: [0xb3,0x03,0x00,0xf9]
+
+	lcebr	%f0,%f9
+	lcebr	%f0,%f15
+	lcebr	%f15,%f0
+	lcebr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lcgfr-01.s b/test/MC/SystemZ/insn-lcgfr-01.s
new file mode 100644
index 0000000..bca430b
--- /dev/null
+++ b/test/MC/SystemZ/insn-lcgfr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lcgfr	%r0, %r0                # encoding: [0xb9,0x13,0x00,0x00]
+#CHECK: lcgfr	%r0, %r15               # encoding: [0xb9,0x13,0x00,0x0f]
+#CHECK: lcgfr	%r15, %r0               # encoding: [0xb9,0x13,0x00,0xf0]
+#CHECK: lcgfr	%r7, %r8                # encoding: [0xb9,0x13,0x00,0x78]
+
+	lcgfr	%r0,%r0
+	lcgfr	%r0,%r15
+	lcgfr	%r15,%r0
+	lcgfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-lcgr-01.s b/test/MC/SystemZ/insn-lcgr-01.s
new file mode 100644
index 0000000..dc4e94f
--- /dev/null
+++ b/test/MC/SystemZ/insn-lcgr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lcgr	%r0, %r0                # encoding: [0xb9,0x03,0x00,0x00]
+#CHECK: lcgr	%r0, %r15               # encoding: [0xb9,0x03,0x00,0x0f]
+#CHECK: lcgr	%r15, %r0               # encoding: [0xb9,0x03,0x00,0xf0]
+#CHECK: lcgr	%r7, %r8                # encoding: [0xb9,0x03,0x00,0x78]
+
+	lcgr	%r0,%r0
+	lcgr	%r0,%r15
+	lcgr	%r15,%r0
+	lcgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-lcr-01.s b/test/MC/SystemZ/insn-lcr-01.s
new file mode 100644
index 0000000..52471cb
--- /dev/null
+++ b/test/MC/SystemZ/insn-lcr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lcr	%r0, %r0                # encoding: [0x13,0x00]
+#CHECK: lcr	%r0, %r15               # encoding: [0x13,0x0f]
+#CHECK: lcr	%r15, %r0               # encoding: [0x13,0xf0]
+#CHECK: lcr	%r7, %r8                # encoding: [0x13,0x78]
+
+	lcr	%r0,%r0
+	lcr	%r0,%r15
+	lcr	%r15,%r0
+	lcr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-lcxbr-01.s b/test/MC/SystemZ/insn-lcxbr-01.s
new file mode 100644
index 0000000..48c0b8a
--- /dev/null
+++ b/test/MC/SystemZ/insn-lcxbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lcxbr	%f0, %f8                # encoding: [0xb3,0x43,0x00,0x08]
+#CHECK: lcxbr	%f0, %f13               # encoding: [0xb3,0x43,0x00,0x0d]
+#CHECK: lcxbr	%f13, %f0               # encoding: [0xb3,0x43,0x00,0xd0]
+#CHECK: lcxbr	%f13, %f9               # encoding: [0xb3,0x43,0x00,0xd9]
+
+	lcxbr	%f0,%f8
+	lcxbr	%f0,%f13
+	lcxbr	%f13,%f0
+	lcxbr	%f13,%f9
diff --git a/test/MC/SystemZ/insn-lcxbr-02.s b/test/MC/SystemZ/insn-lcxbr-02.s
new file mode 100644
index 0000000..ab3d6bf
--- /dev/null
+++ b/test/MC/SystemZ/insn-lcxbr-02.s
@@ -0,0 +1,17 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: lcxbr	%f0, %f2
+#CHECK: error: invalid register
+#CHECK: lcxbr	%f0, %f14
+#CHECK: error: invalid register
+#CHECK: lcxbr	%f2, %f0
+#CHECK: error: invalid register
+#CHECK: lcxbr	%f14, %f0
+
+	lcxbr	%f0, %f2
+	lcxbr	%f0, %f14
+	lcxbr	%f2, %f0
+	lcxbr	%f14, %f0
+
diff --git a/test/MC/SystemZ/insn-ld-01.s b/test/MC/SystemZ/insn-ld-01.s
new file mode 100644
index 0000000..653eab5
--- /dev/null
+++ b/test/MC/SystemZ/insn-ld-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ld	%f0, 0                  # encoding: [0x68,0x00,0x00,0x00]
+#CHECK: ld	%f0, 4095               # encoding: [0x68,0x00,0x0f,0xff]
+#CHECK: ld	%f0, 0(%r1)             # encoding: [0x68,0x00,0x10,0x00]
+#CHECK: ld	%f0, 0(%r15)            # encoding: [0x68,0x00,0xf0,0x00]
+#CHECK: ld	%f0, 4095(%r1,%r15)     # encoding: [0x68,0x01,0xff,0xff]
+#CHECK: ld	%f0, 4095(%r15,%r1)     # encoding: [0x68,0x0f,0x1f,0xff]
+#CHECK: ld	%f15, 0                 # encoding: [0x68,0xf0,0x00,0x00]
+
+	ld	%f0, 0
+	ld	%f0, 4095
+	ld	%f0, 0(%r1)
+	ld	%f0, 0(%r15)
+	ld	%f0, 4095(%r1,%r15)
+	ld	%f0, 4095(%r15,%r1)
+	ld	%f15, 0
diff --git a/test/MC/SystemZ/insn-ld-02.s b/test/MC/SystemZ/insn-ld-02.s
new file mode 100644
index 0000000..5d786b5
--- /dev/null
+++ b/test/MC/SystemZ/insn-ld-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ld	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: ld	%f0, 4096
+
+	ld	%f0, -1
+	ld	%f0, 4096
diff --git a/test/MC/SystemZ/insn-ldeb-01.s b/test/MC/SystemZ/insn-ldeb-01.s
new file mode 100644
index 0000000..a06344d
--- /dev/null
+++ b/test/MC/SystemZ/insn-ldeb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ldeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x04]
+#CHECK: ldeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x04]
+#CHECK: ldeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x04]
+#CHECK: ldeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x04]
+#CHECK: ldeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x04]
+#CHECK: ldeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x04]
+#CHECK: ldeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x04]
+
+	ldeb	%f0, 0
+	ldeb	%f0, 4095
+	ldeb	%f0, 0(%r1)
+	ldeb	%f0, 0(%r15)
+	ldeb	%f0, 4095(%r1,%r15)
+	ldeb	%f0, 4095(%r15,%r1)
+	ldeb	%f15, 0
diff --git a/test/MC/SystemZ/insn-ldeb-02.s b/test/MC/SystemZ/insn-ldeb-02.s
new file mode 100644
index 0000000..6df5e7b
--- /dev/null
+++ b/test/MC/SystemZ/insn-ldeb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ldeb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: ldeb	%f0, 4096
+
+	ldeb	%f0, -1
+	ldeb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-ldebr-01.s b/test/MC/SystemZ/insn-ldebr-01.s
new file mode 100644
index 0000000..2df932c
--- /dev/null
+++ b/test/MC/SystemZ/insn-ldebr-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ldebr	%f0, %f15               # encoding: [0xb3,0x04,0x00,0x0f]
+#CHECK: ldebr	%f7, %f8                # encoding: [0xb3,0x04,0x00,0x78]
+#CHECK: ldebr	%f15, %f0               # encoding: [0xb3,0x04,0x00,0xf0]
+
+	ldebr	%f0, %f15
+	ldebr	%f7, %f8
+	ldebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-ldgr-01.s b/test/MC/SystemZ/insn-ldgr-01.s
new file mode 100644
index 0000000..61a4529
--- /dev/null
+++ b/test/MC/SystemZ/insn-ldgr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ldgr	%f0, %r0                # encoding: [0xb3,0xc1,0x00,0x00]
+#CHECK: ldgr	%f0, %r15               # encoding: [0xb3,0xc1,0x00,0x0f]
+#CHECK: ldgr	%f15, %r0               # encoding: [0xb3,0xc1,0x00,0xf0]
+#CHECK: ldgr	%f7, %r9                # encoding: [0xb3,0xc1,0x00,0x79]
+#CHECK: ldgr	%f15, %r15              # encoding: [0xb3,0xc1,0x00,0xff]
+
+	ldgr	%f0,%r0
+	ldgr	%f0,%r15
+	ldgr	%f15,%r0
+	ldgr	%f7,%r9
+	ldgr	%f15,%r15
diff --git a/test/MC/SystemZ/insn-ldgr-02.s b/test/MC/SystemZ/insn-ldgr-02.s
new file mode 100644
index 0000000..900174a
--- /dev/null
+++ b/test/MC/SystemZ/insn-ldgr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: ldgr	%f0, %f0
+#CHECK: error: invalid register
+#CHECK: ldgr	%r0, %r0
+#CHECK: error: invalid register
+#CHECK: ldgr	%f0, %a0
+#CHECK: error: invalid register
+#CHECK: ldgr	%a0, %r0
+
+	ldgr	%f0, %f0
+	ldgr	%r0, %r0
+	ldgr	%f0, %a0
+	ldgr	%a0, %r0
diff --git a/test/MC/SystemZ/insn-ldr-01.s b/test/MC/SystemZ/insn-ldr-01.s
new file mode 100644
index 0000000..895ed34
--- /dev/null
+++ b/test/MC/SystemZ/insn-ldr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ldr	%f0, %f9                # encoding: [0x28,0x09]
+#CHECK: ldr	%f0, %f15               # encoding: [0x28,0x0f]
+#CHECK: ldr	%f15, %f0               # encoding: [0x28,0xf0]
+#CHECK: ldr	%f15, %f9               # encoding: [0x28,0xf9]
+
+	ldr	%f0,%f9
+	ldr	%f0,%f15
+	ldr	%f15,%f0
+	ldr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-ldxbr-01.s b/test/MC/SystemZ/insn-ldxbr-01.s
new file mode 100644
index 0000000..49e1d2a
--- /dev/null
+++ b/test/MC/SystemZ/insn-ldxbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ldxbr	%f0, %f0                # encoding: [0xb3,0x45,0x00,0x00]
+#CHECK: ldxbr	%f0, %f13               # encoding: [0xb3,0x45,0x00,0x0d]
+#CHECK: ldxbr	%f8, %f12               # encoding: [0xb3,0x45,0x00,0x8c]
+#CHECK: ldxbr	%f13, %f0               # encoding: [0xb3,0x45,0x00,0xd0]
+#CHECK: ldxbr	%f13, %f13              # encoding: [0xb3,0x45,0x00,0xdd]
+
+	ldxbr	%f0, %f0
+	ldxbr	%f0, %f13
+	ldxbr	%f8, %f12
+	ldxbr	%f13, %f0
+	ldxbr	%f13, %f13
diff --git a/test/MC/SystemZ/insn-ldxbr-02.s b/test/MC/SystemZ/insn-ldxbr-02.s
new file mode 100644
index 0000000..89914df
--- /dev/null
+++ b/test/MC/SystemZ/insn-ldxbr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: ldxbr	%f0, %f2
+#CHECK: error: invalid register
+#CHECK: ldxbr	%f0, %f14
+#CHECK: error: invalid register
+#CHECK: ldxbr	%f2, %f0
+#CHECK: error: invalid register
+#CHECK: ldxbr	%f14, %f0
+
+	ldxbr	%f0, %f2
+	ldxbr	%f0, %f14
+	ldxbr	%f2, %f0
+	ldxbr	%f14, %f0
diff --git a/test/MC/SystemZ/insn-ldy-01.s b/test/MC/SystemZ/insn-ldy-01.s
new file mode 100644
index 0000000..5c2d145
--- /dev/null
+++ b/test/MC/SystemZ/insn-ldy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ldy	%f0, -524288            # encoding: [0xed,0x00,0x00,0x00,0x80,0x65]
+#CHECK: ldy	%f0, -1                 # encoding: [0xed,0x00,0x0f,0xff,0xff,0x65]
+#CHECK: ldy	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x65]
+#CHECK: ldy	%f0, 1                  # encoding: [0xed,0x00,0x00,0x01,0x00,0x65]
+#CHECK: ldy	%f0, 524287             # encoding: [0xed,0x00,0x0f,0xff,0x7f,0x65]
+#CHECK: ldy	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x65]
+#CHECK: ldy	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x65]
+#CHECK: ldy	%f0, 524287(%r1,%r15)   # encoding: [0xed,0x01,0xff,0xff,0x7f,0x65]
+#CHECK: ldy	%f0, 524287(%r15,%r1)   # encoding: [0xed,0x0f,0x1f,0xff,0x7f,0x65]
+#CHECK: ldy	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x65]
+
+	ldy	%f0, -524288
+	ldy	%f0, -1
+	ldy	%f0, 0
+	ldy	%f0, 1
+	ldy	%f0, 524287
+	ldy	%f0, 0(%r1)
+	ldy	%f0, 0(%r15)
+	ldy	%f0, 524287(%r1,%r15)
+	ldy	%f0, 524287(%r15,%r1)
+	ldy	%f15, 0
diff --git a/test/MC/SystemZ/insn-ldy-02.s b/test/MC/SystemZ/insn-ldy-02.s
new file mode 100644
index 0000000..b16e014
--- /dev/null
+++ b/test/MC/SystemZ/insn-ldy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ldy	%f0, -524289
+#CHECK: error: invalid operand
+#CHECK: ldy	%f0, 524288
+
+	ldy	%f0, -524289
+	ldy	%f0, 524288
diff --git a/test/MC/SystemZ/insn-le-01.s b/test/MC/SystemZ/insn-le-01.s
new file mode 100644
index 0000000..15bbce2
--- /dev/null
+++ b/test/MC/SystemZ/insn-le-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: le	%f0, 0                  # encoding: [0x78,0x00,0x00,0x00]
+#CHECK: le	%f0, 4095               # encoding: [0x78,0x00,0x0f,0xff]
+#CHECK: le	%f0, 0(%r1)             # encoding: [0x78,0x00,0x10,0x00]
+#CHECK: le	%f0, 0(%r15)            # encoding: [0x78,0x00,0xf0,0x00]
+#CHECK: le	%f0, 4095(%r1,%r15)     # encoding: [0x78,0x01,0xff,0xff]
+#CHECK: le	%f0, 4095(%r15,%r1)     # encoding: [0x78,0x0f,0x1f,0xff]
+#CHECK: le	%f15, 0                 # encoding: [0x78,0xf0,0x00,0x00]
+
+	le	%f0, 0
+	le	%f0, 4095
+	le	%f0, 0(%r1)
+	le	%f0, 0(%r15)
+	le	%f0, 4095(%r1,%r15)
+	le	%f0, 4095(%r15,%r1)
+	le	%f15, 0
diff --git a/test/MC/SystemZ/insn-le-02.s b/test/MC/SystemZ/insn-le-02.s
new file mode 100644
index 0000000..f784ea1
--- /dev/null
+++ b/test/MC/SystemZ/insn-le-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: le	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: le	%f0, 4096
+
+	le	%f0, -1
+	le	%f0, 4096
diff --git a/test/MC/SystemZ/insn-ledbr-01.s b/test/MC/SystemZ/insn-ledbr-01.s
new file mode 100644
index 0000000..6582d63
--- /dev/null
+++ b/test/MC/SystemZ/insn-ledbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ledbr	%f0, %f0                # encoding: [0xb3,0x44,0x00,0x00]
+#CHECK: ledbr	%f0, %f15               # encoding: [0xb3,0x44,0x00,0x0f]
+#CHECK: ledbr	%f7, %f8                # encoding: [0xb3,0x44,0x00,0x78]
+#CHECK: ledbr	%f15, %f0               # encoding: [0xb3,0x44,0x00,0xf0]
+#CHECK: ledbr	%f15, %f15              # encoding: [0xb3,0x44,0x00,0xff]
+
+	ledbr	%f0, %f0
+	ledbr	%f0, %f15
+	ledbr	%f7, %f8
+	ledbr	%f15, %f0
+	ledbr	%f15, %f15
diff --git a/test/MC/SystemZ/insn-ler-01.s b/test/MC/SystemZ/insn-ler-01.s
new file mode 100644
index 0000000..775e523
--- /dev/null
+++ b/test/MC/SystemZ/insn-ler-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ler	%f0, %f9                # encoding: [0x38,0x09]
+#CHECK: ler	%f0, %f15               # encoding: [0x38,0x0f]
+#CHECK: ler	%f15, %f0               # encoding: [0x38,0xf0]
+#CHECK: ler	%f15, %f9               # encoding: [0x38,0xf9]
+
+	ler	%f0,%f9
+	ler	%f0,%f15
+	ler	%f15,%f0
+	ler	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lexbr-01.s b/test/MC/SystemZ/insn-lexbr-01.s
new file mode 100644
index 0000000..ce32103
--- /dev/null
+++ b/test/MC/SystemZ/insn-lexbr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lexbr	%f0, %f0                # encoding: [0xb3,0x46,0x00,0x00]
+#CHECK: lexbr	%f0, %f13               # encoding: [0xb3,0x46,0x00,0x0d]
+#CHECK: lexbr	%f8, %f12               # encoding: [0xb3,0x46,0x00,0x8c]
+#CHECK: lexbr	%f13, %f0               # encoding: [0xb3,0x46,0x00,0xd0]
+#CHECK: lexbr	%f13, %f13              # encoding: [0xb3,0x46,0x00,0xdd]
+
+	lexbr	%f0, %f0
+	lexbr	%f0, %f13
+	lexbr	%f8, %f12
+	lexbr	%f13, %f0
+	lexbr	%f13, %f13
diff --git a/test/MC/SystemZ/insn-lexbr-02.s b/test/MC/SystemZ/insn-lexbr-02.s
new file mode 100644
index 0000000..8c9bb9e
--- /dev/null
+++ b/test/MC/SystemZ/insn-lexbr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: lexbr	%f0, %f2
+#CHECK: error: invalid register
+#CHECK: lexbr	%f0, %f14
+#CHECK: error: invalid register
+#CHECK: lexbr	%f2, %f0
+#CHECK: error: invalid register
+#CHECK: lexbr	%f14, %f0
+
+	lexbr	%f0, %f2
+	lexbr	%f0, %f14
+	lexbr	%f2, %f0
+	lexbr	%f14, %f0
diff --git a/test/MC/SystemZ/insn-ley-01.s b/test/MC/SystemZ/insn-ley-01.s
new file mode 100644
index 0000000..b854dc1
--- /dev/null
+++ b/test/MC/SystemZ/insn-ley-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ley	%f0, -524288            # encoding: [0xed,0x00,0x00,0x00,0x80,0x64]
+#CHECK: ley	%f0, -1                 # encoding: [0xed,0x00,0x0f,0xff,0xff,0x64]
+#CHECK: ley	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x64]
+#CHECK: ley	%f0, 1                  # encoding: [0xed,0x00,0x00,0x01,0x00,0x64]
+#CHECK: ley	%f0, 524287             # encoding: [0xed,0x00,0x0f,0xff,0x7f,0x64]
+#CHECK: ley	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x64]
+#CHECK: ley	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x64]
+#CHECK: ley	%f0, 524287(%r1,%r15)   # encoding: [0xed,0x01,0xff,0xff,0x7f,0x64]
+#CHECK: ley	%f0, 524287(%r15,%r1)   # encoding: [0xed,0x0f,0x1f,0xff,0x7f,0x64]
+#CHECK: ley	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x64]
+
+	ley	%f0, -524288
+	ley	%f0, -1
+	ley	%f0, 0
+	ley	%f0, 1
+	ley	%f0, 524287
+	ley	%f0, 0(%r1)
+	ley	%f0, 0(%r15)
+	ley	%f0, 524287(%r1,%r15)
+	ley	%f0, 524287(%r15,%r1)
+	ley	%f15, 0
diff --git a/test/MC/SystemZ/insn-ley-02.s b/test/MC/SystemZ/insn-ley-02.s
new file mode 100644
index 0000000..98bbd94
--- /dev/null
+++ b/test/MC/SystemZ/insn-ley-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ley	%f0, -524289
+#CHECK: error: invalid operand
+#CHECK: ley	%f0, 524288
+
+	ley	%f0, -524289
+	ley	%f0, 524288
diff --git a/test/MC/SystemZ/insn-lg-01.s b/test/MC/SystemZ/insn-lg-01.s
new file mode 100644
index 0000000..10a95cc
--- /dev/null
+++ b/test/MC/SystemZ/insn-lg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x04]
+#CHECK: lg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x04]
+#CHECK: lg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x04]
+#CHECK: lg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x04]
+#CHECK: lg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x04]
+#CHECK: lg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x04]
+#CHECK: lg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x04]
+#CHECK: lg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x04]
+#CHECK: lg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x04]
+#CHECK: lg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x04]
+
+	lg	%r0, -524288
+	lg	%r0, -1
+	lg	%r0, 0
+	lg	%r0, 1
+	lg	%r0, 524287
+	lg	%r0, 0(%r1)
+	lg	%r0, 0(%r15)
+	lg	%r0, 524287(%r1,%r15)
+	lg	%r0, 524287(%r15,%r1)
+	lg	%r15, 0
diff --git a/test/MC/SystemZ/insn-lg-02.s b/test/MC/SystemZ/insn-lg-02.s
new file mode 100644
index 0000000..85b29ff
--- /dev/null
+++ b/test/MC/SystemZ/insn-lg-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lg	%r0, 524288
+
+	lg	%r0, -524289
+	lg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lgb-01.s b/test/MC/SystemZ/insn-lgb-01.s
new file mode 100644
index 0000000..82b92f1
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgb-01.s
@@ -0,0 +1,24 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lgb	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x77]
+#CHECK: lgb	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x77]
+#CHECK: lgb	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x77]
+#CHECK: lgb	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x77]
+#CHECK: lgb	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x77]
+#CHECK: lgb	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x77]
+#CHECK: lgb	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x77]
+#CHECK: lgb	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x77]
+#CHECK: lgb	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x77]
+#CHECK: lgb	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x77]
+
+	lgb	%r0, -524288
+	lgb	%r0, -1
+	lgb	%r0, 0
+	lgb	%r0, 1
+	lgb	%r0, 524287
+	lgb	%r0, 0(%r1)
+	lgb	%r0, 0(%r15)
+	lgb	%r0, 524287(%r1,%r15)
+	lgb	%r0, 524287(%r15,%r1)
+	lgb	%r15, 0
+
diff --git a/test/MC/SystemZ/insn-lgb-02.s b/test/MC/SystemZ/insn-lgb-02.s
new file mode 100644
index 0000000..7acc176
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lgb	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lgb	%r0, 524288
+
+	lgb	%r0, -524289
+	lgb	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lgbr-01.s b/test/MC/SystemZ/insn-lgbr-01.s
new file mode 100644
index 0000000..ec2e622
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgbr-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lgbr	%r0, %r15               # encoding: [0xb9,0x06,0x00,0x0f]
+#CHECK: lgbr	%r7, %r8                # encoding: [0xb9,0x06,0x00,0x78]
+#CHECK: lgbr	%r15, %r0               # encoding: [0xb9,0x06,0x00,0xf0]
+
+	lgbr	%r0, %r15
+	lgbr	%r7, %r8
+	lgbr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-lgdr-01.s b/test/MC/SystemZ/insn-lgdr-01.s
new file mode 100644
index 0000000..56d1e03
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgdr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lgdr	%r0, %f0                # encoding: [0xb3,0xcd,0x00,0x00]
+#CHECK: lgdr	%r0, %f15               # encoding: [0xb3,0xcd,0x00,0x0f]
+#CHECK: lgdr	%r15, %f0               # encoding: [0xb3,0xcd,0x00,0xf0]
+#CHECK: lgdr	%r8, %f8                # encoding: [0xb3,0xcd,0x00,0x88]
+#CHECK: lgdr	%r15, %f15              # encoding: [0xb3,0xcd,0x00,0xff]
+
+	lgdr	%r0,%f0
+	lgdr	%r0,%f15
+	lgdr	%r15,%f0
+	lgdr	%r8,%f8
+	lgdr	%r15,%f15
diff --git a/test/MC/SystemZ/insn-lgdr-02.s b/test/MC/SystemZ/insn-lgdr-02.s
new file mode 100644
index 0000000..3bf014d
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgdr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: lgdr	%f0, %f0
+#CHECK: error: invalid register
+#CHECK: lgdr	%r0, %r0
+#CHECK: error: invalid register
+#CHECK: lgdr	%r0, %a0
+#CHECK: error: invalid register
+#CHECK: lgdr	%a0, %f0
+
+	lgdr	%f0, %f0
+	lgdr	%r0, %r0
+	lgdr	%r0, %a0
+	lgdr	%a0, %f0
diff --git a/test/MC/SystemZ/insn-lgf-01.s b/test/MC/SystemZ/insn-lgf-01.s
new file mode 100644
index 0000000..9ed9172
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgf-01.s
@@ -0,0 +1,24 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x14]
+#CHECK: lgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x14]
+#CHECK: lgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x14]
+#CHECK: lgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x14]
+#CHECK: lgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x14]
+#CHECK: lgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x14]
+#CHECK: lgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x14]
+#CHECK: lgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x14]
+#CHECK: lgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x14]
+#CHECK: lgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x14]
+
+	lgf	%r0, -524288
+	lgf	%r0, -1
+	lgf	%r0, 0
+	lgf	%r0, 1
+	lgf	%r0, 524287
+	lgf	%r0, 0(%r1)
+	lgf	%r0, 0(%r15)
+	lgf	%r0, 524287(%r1,%r15)
+	lgf	%r0, 524287(%r15,%r1)
+	lgf	%r15, 0
+
diff --git a/test/MC/SystemZ/insn-lgf-02.s b/test/MC/SystemZ/insn-lgf-02.s
new file mode 100644
index 0000000..32095a8
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lgf	%r0, 524288
+
+	lgf	%r0, -524289
+	lgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lgfi-01.s b/test/MC/SystemZ/insn-lgfi-01.s
new file mode 100644
index 0000000..a5ca7a5
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgfi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lgfi	%r0, -2147483648        # encoding: [0xc0,0x01,0x80,0x00,0x00,0x00]
+#CHECK: lgfi	%r0, -1                 # encoding: [0xc0,0x01,0xff,0xff,0xff,0xff]
+#CHECK: lgfi	%r0, 0                  # encoding: [0xc0,0x01,0x00,0x00,0x00,0x00]
+#CHECK: lgfi	%r0, 1                  # encoding: [0xc0,0x01,0x00,0x00,0x00,0x01]
+#CHECK: lgfi	%r0, 2147483647         # encoding: [0xc0,0x01,0x7f,0xff,0xff,0xff]
+#CHECK: lgfi	%r15, 0                 # encoding: [0xc0,0xf1,0x00,0x00,0x00,0x00]
+
+	lgfi	%r0, -1 << 31
+	lgfi	%r0, -1
+	lgfi	%r0, 0
+	lgfi	%r0, 1
+	lgfi	%r0, (1 << 31) - 1
+	lgfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-lgfi-02.s b/test/MC/SystemZ/insn-lgfi-02.s
new file mode 100644
index 0000000..a45cfeb
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgfi-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lgfi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: lgfi	%r0, (1 << 31)
+
+	lgfi	%r0, (-1 << 31) - 1
+	lgfi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-lgfr-01.s b/test/MC/SystemZ/insn-lgfr-01.s
new file mode 100644
index 0000000..bc375a6
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgfr-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lgfr	%r0, %r15               # encoding: [0xb9,0x14,0x00,0x0f]
+#CHECK: lgfr	%r7, %r8                # encoding: [0xb9,0x14,0x00,0x78]
+#CHECK: lgfr	%r15, %r0               # encoding: [0xb9,0x14,0x00,0xf0]
+
+	lgfr	%r0, %r15
+	lgfr	%r7, %r8
+	lgfr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-lgfrl-01.s b/test/MC/SystemZ/insn-lgfrl-01.s
new file mode 100644
index 0000000..85c9ea7
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgfrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lgfrl	%r0, 2864434397         # encoding: [0xc4,0x0c,0x55,0x5d,0xe6,0x6e]
+#CHECK: lgfrl	%r15, 2864434397        # encoding: [0xc4,0xfc,0x55,0x5d,0xe6,0x6e]
+
+	lgfrl	%r0,0xaabbccdd
+	lgfrl	%r15,0xaabbccdd
+
+#CHECK: lgfrl	%r0, foo                # encoding: [0xc4,0x0c,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: lgfrl	%r15, foo               # encoding: [0xc4,0xfc,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	lgfrl	%r0,foo
+	lgfrl	%r15,foo
+
+#CHECK: lgfrl	%r3, bar+100            # encoding: [0xc4,0x3c,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: lgfrl	%r4, bar+100            # encoding: [0xc4,0x4c,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	lgfrl	%r3,bar+100
+	lgfrl	%r4,bar+100
+
+#CHECK: lgfrl	%r7, frob@PLT           # encoding: [0xc4,0x7c,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: lgfrl	%r8, frob@PLT           # encoding: [0xc4,0x8c,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	lgfrl	%r7,frob@PLT
+	lgfrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-lgh-01.s b/test/MC/SystemZ/insn-lgh-01.s
new file mode 100644
index 0000000..9dae621
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgh-01.s
@@ -0,0 +1,24 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lgh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x15]
+#CHECK: lgh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x15]
+#CHECK: lgh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x15]
+#CHECK: lgh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x15]
+#CHECK: lgh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x15]
+#CHECK: lgh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x15]
+#CHECK: lgh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x15]
+#CHECK: lgh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x15]
+#CHECK: lgh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x15]
+#CHECK: lgh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x15]
+
+	lgh	%r0, -524288
+	lgh	%r0, -1
+	lgh	%r0, 0
+	lgh	%r0, 1
+	lgh	%r0, 524287
+	lgh	%r0, 0(%r1)
+	lgh	%r0, 0(%r15)
+	lgh	%r0, 524287(%r1,%r15)
+	lgh	%r0, 524287(%r15,%r1)
+	lgh	%r15, 0
+
diff --git a/test/MC/SystemZ/insn-lgh-02.s b/test/MC/SystemZ/insn-lgh-02.s
new file mode 100644
index 0000000..62b7341
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lgh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lgh	%r0, 524288
+
+	lgh	%r0, -524289
+	lgh	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lghi-01.s b/test/MC/SystemZ/insn-lghi-01.s
new file mode 100644
index 0000000..d9d12ae
--- /dev/null
+++ b/test/MC/SystemZ/insn-lghi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lghi	%r0, -32768             # encoding: [0xa7,0x09,0x80,0x00]
+#CHECK: lghi	%r0, -1                 # encoding: [0xa7,0x09,0xff,0xff]
+#CHECK: lghi	%r0, 0                  # encoding: [0xa7,0x09,0x00,0x00]
+#CHECK: lghi	%r0, 1                  # encoding: [0xa7,0x09,0x00,0x01]
+#CHECK: lghi	%r0, 32767              # encoding: [0xa7,0x09,0x7f,0xff]
+#CHECK: lghi	%r15, 0                 # encoding: [0xa7,0xf9,0x00,0x00]
+
+	lghi	%r0, -32768
+	lghi	%r0, -1
+	lghi	%r0, 0
+	lghi	%r0, 1
+	lghi	%r0, 32767
+	lghi	%r15, 0
diff --git a/test/MC/SystemZ/insn-lghi-02.s b/test/MC/SystemZ/insn-lghi-02.s
new file mode 100644
index 0000000..b1af7a0
--- /dev/null
+++ b/test/MC/SystemZ/insn-lghi-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lghi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: lghi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: lghi	%r0, foo
+
+	lghi	%r0, -32769
+	lghi	%r0, 32768
+	lghi	%r0, foo
diff --git a/test/MC/SystemZ/insn-lghr-01.s b/test/MC/SystemZ/insn-lghr-01.s
new file mode 100644
index 0000000..a1dc842
--- /dev/null
+++ b/test/MC/SystemZ/insn-lghr-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lghr	%r0, %r15               # encoding: [0xb9,0x07,0x00,0x0f]
+#CHECK: lghr	%r7, %r8                # encoding: [0xb9,0x07,0x00,0x78]
+#CHECK: lghr	%r15, %r0               # encoding: [0xb9,0x07,0x00,0xf0]
+
+	lghr	%r0, %r15
+	lghr	%r7, %r8
+	lghr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-lghrl-01.s b/test/MC/SystemZ/insn-lghrl-01.s
new file mode 100644
index 0000000..34992e6
--- /dev/null
+++ b/test/MC/SystemZ/insn-lghrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lghrl	%r0, 2864434397         # encoding: [0xc4,0x04,0x55,0x5d,0xe6,0x6e]
+#CHECK: lghrl	%r15, 2864434397        # encoding: [0xc4,0xf4,0x55,0x5d,0xe6,0x6e]
+
+	lghrl	%r0,0xaabbccdd
+	lghrl	%r15,0xaabbccdd
+
+#CHECK: lghrl	%r0, foo                # encoding: [0xc4,0x04,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: lghrl	%r15, foo               # encoding: [0xc4,0xf4,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	lghrl	%r0,foo
+	lghrl	%r15,foo
+
+#CHECK: lghrl	%r3, bar+100            # encoding: [0xc4,0x34,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: lghrl	%r4, bar+100            # encoding: [0xc4,0x44,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	lghrl	%r3,bar+100
+	lghrl	%r4,bar+100
+
+#CHECK: lghrl	%r7, frob@PLT           # encoding: [0xc4,0x74,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: lghrl	%r8, frob@PLT           # encoding: [0xc4,0x84,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	lghrl	%r7,frob@PLT
+	lghrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-lgr-01.s b/test/MC/SystemZ/insn-lgr-01.s
new file mode 100644
index 0000000..e502956
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lgr	%r0, %r9                # encoding: [0xb9,0x04,0x00,0x09]
+#CHECK: lgr	%r0, %r15               # encoding: [0xb9,0x04,0x00,0x0f]
+#CHECK: lgr	%r15, %r0               # encoding: [0xb9,0x04,0x00,0xf0]
+#CHECK: lgr	%r15, %r9               # encoding: [0xb9,0x04,0x00,0xf9]
+
+	lgr	%r0,%r9
+	lgr	%r0,%r15
+	lgr	%r15,%r0
+	lgr	%r15,%r9
diff --git a/test/MC/SystemZ/insn-lgrl-01.s b/test/MC/SystemZ/insn-lgrl-01.s
new file mode 100644
index 0000000..7a18908
--- /dev/null
+++ b/test/MC/SystemZ/insn-lgrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lgrl	%r0, 2864434397         # encoding: [0xc4,0x08,0x55,0x5d,0xe6,0x6e]
+#CHECK: lgrl	%r15, 2864434397        # encoding: [0xc4,0xf8,0x55,0x5d,0xe6,0x6e]
+
+	lgrl	%r0,0xaabbccdd
+	lgrl	%r15,0xaabbccdd
+
+#CHECK: lgrl	%r0, foo                # encoding: [0xc4,0x08,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: lgrl	%r15, foo               # encoding: [0xc4,0xf8,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	lgrl	%r0,foo
+	lgrl	%r15,foo
+
+#CHECK: lgrl	%r3, bar+100            # encoding: [0xc4,0x38,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: lgrl	%r4, bar+100            # encoding: [0xc4,0x48,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	lgrl	%r3,bar+100
+	lgrl	%r4,bar+100
+
+#CHECK: lgrl	%r7, frob@PLT           # encoding: [0xc4,0x78,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: lgrl	%r8, frob@PLT           # encoding: [0xc4,0x88,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	lgrl	%r7,frob@PLT
+	lgrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-lh-01.s b/test/MC/SystemZ/insn-lh-01.s
new file mode 100644
index 0000000..07be012
--- /dev/null
+++ b/test/MC/SystemZ/insn-lh-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lh	%r0, 0                  # encoding: [0x48,0x00,0x00,0x00]
+#CHECK: lh	%r0, 4095               # encoding: [0x48,0x00,0x0f,0xff]
+#CHECK: lh	%r0, 0(%r1)             # encoding: [0x48,0x00,0x10,0x00]
+#CHECK: lh	%r0, 0(%r15)            # encoding: [0x48,0x00,0xf0,0x00]
+#CHECK: lh	%r0, 4095(%r1,%r15)     # encoding: [0x48,0x01,0xff,0xff]
+#CHECK: lh	%r0, 4095(%r15,%r1)     # encoding: [0x48,0x0f,0x1f,0xff]
+#CHECK: lh	%r15, 0                 # encoding: [0x48,0xf0,0x00,0x00]
+
+	lh	%r0, 0
+	lh	%r0, 4095
+	lh	%r0, 0(%r1)
+	lh	%r0, 0(%r15)
+	lh	%r0, 4095(%r1,%r15)
+	lh	%r0, 4095(%r15,%r1)
+	lh	%r15, 0
diff --git a/test/MC/SystemZ/insn-lh-02.s b/test/MC/SystemZ/insn-lh-02.s
new file mode 100644
index 0000000..80566d8
--- /dev/null
+++ b/test/MC/SystemZ/insn-lh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: lh	%r0, 4096
+
+	lh	%r0, -1
+	lh	%r0, 4096
diff --git a/test/MC/SystemZ/insn-lhi-01.s b/test/MC/SystemZ/insn-lhi-01.s
new file mode 100644
index 0000000..43b7df0
--- /dev/null
+++ b/test/MC/SystemZ/insn-lhi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lhi	%r0, -32768             # encoding: [0xa7,0x08,0x80,0x00]
+#CHECK: lhi	%r0, -1                 # encoding: [0xa7,0x08,0xff,0xff]
+#CHECK: lhi	%r0, 0                  # encoding: [0xa7,0x08,0x00,0x00]
+#CHECK: lhi	%r0, 1                  # encoding: [0xa7,0x08,0x00,0x01]
+#CHECK: lhi	%r0, 32767              # encoding: [0xa7,0x08,0x7f,0xff]
+#CHECK: lhi	%r15, 0                 # encoding: [0xa7,0xf8,0x00,0x00]
+
+	lhi	%r0, -32768
+	lhi	%r0, -1
+	lhi	%r0, 0
+	lhi	%r0, 1
+	lhi	%r0, 32767
+	lhi	%r15, 0
diff --git a/test/MC/SystemZ/insn-lhi-02.s b/test/MC/SystemZ/insn-lhi-02.s
new file mode 100644
index 0000000..8e38464
--- /dev/null
+++ b/test/MC/SystemZ/insn-lhi-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lhi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: lhi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: lhi	%r0, foo
+
+	lhi	%r0, -32769
+	lhi	%r0, 32768
+	lhi	%r0, foo
diff --git a/test/MC/SystemZ/insn-lhr-01.s b/test/MC/SystemZ/insn-lhr-01.s
new file mode 100644
index 0000000..a31cbc6
--- /dev/null
+++ b/test/MC/SystemZ/insn-lhr-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lhr	%r0, %r15               # encoding: [0xb9,0x27,0x00,0x0f]
+#CHECK: lhr	%r7, %r8                # encoding: [0xb9,0x27,0x00,0x78]
+#CHECK: lhr	%r15, %r0               # encoding: [0xb9,0x27,0x00,0xf0]
+
+	lhr	%r0, %r15
+	lhr	%r7, %r8
+	lhr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-lhrl-01.s b/test/MC/SystemZ/insn-lhrl-01.s
new file mode 100644
index 0000000..87925fe
--- /dev/null
+++ b/test/MC/SystemZ/insn-lhrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lhrl	%r0, 2864434397         # encoding: [0xc4,0x05,0x55,0x5d,0xe6,0x6e]
+#CHECK: lhrl	%r15, 2864434397        # encoding: [0xc4,0xf5,0x55,0x5d,0xe6,0x6e]
+
+	lhrl	%r0,0xaabbccdd
+	lhrl	%r15,0xaabbccdd
+
+#CHECK: lhrl	%r0, foo                # encoding: [0xc4,0x05,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: lhrl	%r15, foo               # encoding: [0xc4,0xf5,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	lhrl	%r0,foo
+	lhrl	%r15,foo
+
+#CHECK: lhrl	%r3, bar+100            # encoding: [0xc4,0x35,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: lhrl	%r4, bar+100            # encoding: [0xc4,0x45,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	lhrl	%r3,bar+100
+	lhrl	%r4,bar+100
+
+#CHECK: lhrl	%r7, frob@PLT           # encoding: [0xc4,0x75,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: lhrl	%r8, frob@PLT           # encoding: [0xc4,0x85,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	lhrl	%r7,frob@PLT
+	lhrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-lhy-01.s b/test/MC/SystemZ/insn-lhy-01.s
new file mode 100644
index 0000000..db811a4
--- /dev/null
+++ b/test/MC/SystemZ/insn-lhy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lhy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x78]
+#CHECK: lhy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x78]
+#CHECK: lhy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x78]
+#CHECK: lhy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x78]
+#CHECK: lhy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x78]
+#CHECK: lhy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x78]
+#CHECK: lhy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x78]
+#CHECK: lhy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x78]
+#CHECK: lhy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x78]
+#CHECK: lhy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x78]
+
+	lhy	%r0, -524288
+	lhy	%r0, -1
+	lhy	%r0, 0
+	lhy	%r0, 1
+	lhy	%r0, 524287
+	lhy	%r0, 0(%r1)
+	lhy	%r0, 0(%r15)
+	lhy	%r0, 524287(%r1,%r15)
+	lhy	%r0, 524287(%r15,%r1)
+	lhy	%r15, 0
diff --git a/test/MC/SystemZ/insn-lhy-02.s b/test/MC/SystemZ/insn-lhy-02.s
new file mode 100644
index 0000000..6f1caa5
--- /dev/null
+++ b/test/MC/SystemZ/insn-lhy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lhy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lhy	%r0, 524288
+
+	lhy	%r0, -524289
+	lhy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-llc-01.s b/test/MC/SystemZ/insn-llc-01.s
new file mode 100644
index 0000000..74a819b
--- /dev/null
+++ b/test/MC/SystemZ/insn-llc-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llc	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x94]
+#CHECK: llc	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x94]
+#CHECK: llc	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x94]
+#CHECK: llc	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x94]
+#CHECK: llc	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x94]
+#CHECK: llc	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x94]
+#CHECK: llc	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x94]
+#CHECK: llc	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x94]
+#CHECK: llc	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x94]
+#CHECK: llc	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x94]
+
+	llc	%r0, -524288
+	llc	%r0, -1
+	llc	%r0, 0
+	llc	%r0, 1
+	llc	%r0, 524287
+	llc	%r0, 0(%r1)
+	llc	%r0, 0(%r15)
+	llc	%r0, 524287(%r1,%r15)
+	llc	%r0, 524287(%r15,%r1)
+	llc	%r15, 0
diff --git a/test/MC/SystemZ/insn-llc-02.s b/test/MC/SystemZ/insn-llc-02.s
new file mode 100644
index 0000000..4a65f6c
--- /dev/null
+++ b/test/MC/SystemZ/insn-llc-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: llc	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llc	%r0, 524288
+
+	llc	%r0, -524289
+	llc	%r0, 524288
diff --git a/test/MC/SystemZ/insn-llcr-01.s b/test/MC/SystemZ/insn-llcr-01.s
new file mode 100644
index 0000000..72a695c
--- /dev/null
+++ b/test/MC/SystemZ/insn-llcr-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llcr	%r0, %r15               # encoding: [0xb9,0x94,0x00,0x0f]
+#CHECK: llcr	%r7, %r8                # encoding: [0xb9,0x94,0x00,0x78]
+#CHECK: llcr	%r15, %r0               # encoding: [0xb9,0x94,0x00,0xf0]
+
+	llcr	%r0, %r15
+	llcr	%r7, %r8
+	llcr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-llgc-01.s b/test/MC/SystemZ/insn-llgc-01.s
new file mode 100644
index 0000000..297c6d6
--- /dev/null
+++ b/test/MC/SystemZ/insn-llgc-01.s
@@ -0,0 +1,24 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llgc	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x90]
+#CHECK: llgc	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x90]
+#CHECK: llgc	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x90]
+#CHECK: llgc	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x90]
+#CHECK: llgc	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x90]
+#CHECK: llgc	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x90]
+#CHECK: llgc	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x90]
+#CHECK: llgc	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x90]
+#CHECK: llgc	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x90]
+#CHECK: llgc	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x90]
+
+	llgc	%r0, -524288
+	llgc	%r0, -1
+	llgc	%r0, 0
+	llgc	%r0, 1
+	llgc	%r0, 524287
+	llgc	%r0, 0(%r1)
+	llgc	%r0, 0(%r15)
+	llgc	%r0, 524287(%r1,%r15)
+	llgc	%r0, 524287(%r15,%r1)
+	llgc	%r15, 0
+
diff --git a/test/MC/SystemZ/insn-llgc-02.s b/test/MC/SystemZ/insn-llgc-02.s
new file mode 100644
index 0000000..76fca0f
--- /dev/null
+++ b/test/MC/SystemZ/insn-llgc-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: llgc	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llgc	%r0, 524288
+
+	llgc	%r0, -524289
+	llgc	%r0, 524288
diff --git a/test/MC/SystemZ/insn-llgcr-01.s b/test/MC/SystemZ/insn-llgcr-01.s
new file mode 100644
index 0000000..5d653bf
--- /dev/null
+++ b/test/MC/SystemZ/insn-llgcr-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llgcr	%r0, %r15               # encoding: [0xb9,0x84,0x00,0x0f]
+#CHECK: llgcr	%r7, %r8                # encoding: [0xb9,0x84,0x00,0x78]
+#CHECK: llgcr	%r15, %r0               # encoding: [0xb9,0x84,0x00,0xf0]
+
+	llgcr	%r0, %r15
+	llgcr	%r7, %r8
+	llgcr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-llgf-01.s b/test/MC/SystemZ/insn-llgf-01.s
new file mode 100644
index 0000000..0394140
--- /dev/null
+++ b/test/MC/SystemZ/insn-llgf-01.s
@@ -0,0 +1,24 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x16]
+#CHECK: llgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x16]
+#CHECK: llgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x16]
+#CHECK: llgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x16]
+#CHECK: llgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x16]
+#CHECK: llgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x16]
+#CHECK: llgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x16]
+#CHECK: llgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x16]
+#CHECK: llgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x16]
+#CHECK: llgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x16]
+
+	llgf	%r0, -524288
+	llgf	%r0, -1
+	llgf	%r0, 0
+	llgf	%r0, 1
+	llgf	%r0, 524287
+	llgf	%r0, 0(%r1)
+	llgf	%r0, 0(%r15)
+	llgf	%r0, 524287(%r1,%r15)
+	llgf	%r0, 524287(%r15,%r1)
+	llgf	%r15, 0
+
diff --git a/test/MC/SystemZ/insn-llgf-02.s b/test/MC/SystemZ/insn-llgf-02.s
new file mode 100644
index 0000000..0b2fab0
--- /dev/null
+++ b/test/MC/SystemZ/insn-llgf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: llgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llgf	%r0, 524288
+
+	llgf	%r0, -524289
+	llgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-llgfr-01.s b/test/MC/SystemZ/insn-llgfr-01.s
new file mode 100644
index 0000000..74f1074
--- /dev/null
+++ b/test/MC/SystemZ/insn-llgfr-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llgfr	%r0, %r15               # encoding: [0xb9,0x16,0x00,0x0f]
+#CHECK: llgfr	%r7, %r8                # encoding: [0xb9,0x16,0x00,0x78]
+#CHECK: llgfr	%r15, %r0               # encoding: [0xb9,0x16,0x00,0xf0]
+
+	llgfr	%r0, %r15
+	llgfr	%r7, %r8
+	llgfr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-llgfrl-01.s b/test/MC/SystemZ/insn-llgfrl-01.s
new file mode 100644
index 0000000..85fc9f4
--- /dev/null
+++ b/test/MC/SystemZ/insn-llgfrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llgfrl	%r0, 2864434397         # encoding: [0xc4,0x0e,0x55,0x5d,0xe6,0x6e]
+#CHECK: llgfrl	%r15, 2864434397        # encoding: [0xc4,0xfe,0x55,0x5d,0xe6,0x6e]
+
+	llgfrl	%r0,0xaabbccdd
+	llgfrl	%r15,0xaabbccdd
+
+#CHECK: llgfrl	%r0, foo                # encoding: [0xc4,0x0e,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: llgfrl	%r15, foo               # encoding: [0xc4,0xfe,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	llgfrl	%r0,foo
+	llgfrl	%r15,foo
+
+#CHECK: llgfrl	%r3, bar+100            # encoding: [0xc4,0x3e,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: llgfrl	%r4, bar+100            # encoding: [0xc4,0x4e,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	llgfrl	%r3,bar+100
+	llgfrl	%r4,bar+100
+
+#CHECK: llgfrl	%r7, frob@PLT           # encoding: [0xc4,0x7e,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: llgfrl	%r8, frob@PLT           # encoding: [0xc4,0x8e,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	llgfrl	%r7,frob@PLT
+	llgfrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-llgh-01.s b/test/MC/SystemZ/insn-llgh-01.s
new file mode 100644
index 0000000..acbab00
--- /dev/null
+++ b/test/MC/SystemZ/insn-llgh-01.s
@@ -0,0 +1,24 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llgh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x91]
+#CHECK: llgh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x91]
+#CHECK: llgh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x91]
+#CHECK: llgh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x91]
+#CHECK: llgh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x91]
+#CHECK: llgh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x91]
+#CHECK: llgh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x91]
+#CHECK: llgh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x91]
+#CHECK: llgh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x91]
+#CHECK: llgh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x91]
+
+	llgh	%r0, -524288
+	llgh	%r0, -1
+	llgh	%r0, 0
+	llgh	%r0, 1
+	llgh	%r0, 524287
+	llgh	%r0, 0(%r1)
+	llgh	%r0, 0(%r15)
+	llgh	%r0, 524287(%r1,%r15)
+	llgh	%r0, 524287(%r15,%r1)
+	llgh	%r15, 0
+
diff --git a/test/MC/SystemZ/insn-llgh-02.s b/test/MC/SystemZ/insn-llgh-02.s
new file mode 100644
index 0000000..95b6b12
--- /dev/null
+++ b/test/MC/SystemZ/insn-llgh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: llgh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llgh	%r0, 524288
+
+	llgh	%r0, -524289
+	llgh	%r0, 524288
diff --git a/test/MC/SystemZ/insn-llghr-01.s b/test/MC/SystemZ/insn-llghr-01.s
new file mode 100644
index 0000000..3e2f6de
--- /dev/null
+++ b/test/MC/SystemZ/insn-llghr-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llghr	%r0, %r15               # encoding: [0xb9,0x85,0x00,0x0f]
+#CHECK: llghr	%r7, %r8                # encoding: [0xb9,0x85,0x00,0x78]
+#CHECK: llghr	%r15, %r0               # encoding: [0xb9,0x85,0x00,0xf0]
+
+	llghr	%r0, %r15
+	llghr	%r7, %r8
+	llghr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-llghrl-01.s b/test/MC/SystemZ/insn-llghrl-01.s
new file mode 100644
index 0000000..af3fa8b
--- /dev/null
+++ b/test/MC/SystemZ/insn-llghrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llghrl	%r0, 2864434397         # encoding: [0xc4,0x06,0x55,0x5d,0xe6,0x6e]
+#CHECK: llghrl	%r15, 2864434397        # encoding: [0xc4,0xf6,0x55,0x5d,0xe6,0x6e]
+
+	llghrl	%r0,0xaabbccdd
+	llghrl	%r15,0xaabbccdd
+
+#CHECK: llghrl	%r0, foo                # encoding: [0xc4,0x06,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: llghrl	%r15, foo               # encoding: [0xc4,0xf6,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	llghrl	%r0,foo
+	llghrl	%r15,foo
+
+#CHECK: llghrl	%r3, bar+100            # encoding: [0xc4,0x36,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: llghrl	%r4, bar+100            # encoding: [0xc4,0x46,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	llghrl	%r3,bar+100
+	llghrl	%r4,bar+100
+
+#CHECK: llghrl	%r7, frob@PLT           # encoding: [0xc4,0x76,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: llghrl	%r8, frob@PLT           # encoding: [0xc4,0x86,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	llghrl	%r7,frob@PLT
+	llghrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-llh-01.s b/test/MC/SystemZ/insn-llh-01.s
new file mode 100644
index 0000000..7e15f3f
--- /dev/null
+++ b/test/MC/SystemZ/insn-llh-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llh	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x95]
+#CHECK: llh	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x95]
+#CHECK: llh	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x95]
+#CHECK: llh	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x95]
+#CHECK: llh	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x95]
+#CHECK: llh	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x95]
+#CHECK: llh	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x95]
+#CHECK: llh	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x95]
+#CHECK: llh	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x95]
+#CHECK: llh	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x95]
+
+	llh	%r0, -524288
+	llh	%r0, -1
+	llh	%r0, 0
+	llh	%r0, 1
+	llh	%r0, 524287
+	llh	%r0, 0(%r1)
+	llh	%r0, 0(%r15)
+	llh	%r0, 524287(%r1,%r15)
+	llh	%r0, 524287(%r15,%r1)
+	llh	%r15, 0
diff --git a/test/MC/SystemZ/insn-llh-02.s b/test/MC/SystemZ/insn-llh-02.s
new file mode 100644
index 0000000..92c49bb
--- /dev/null
+++ b/test/MC/SystemZ/insn-llh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: llh	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llh	%r0, 524288
+
+	llh	%r0, -524289
+	llh	%r0, 524288
diff --git a/test/MC/SystemZ/insn-llhr-01.s b/test/MC/SystemZ/insn-llhr-01.s
new file mode 100644
index 0000000..bb1d3b5
--- /dev/null
+++ b/test/MC/SystemZ/insn-llhr-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llhr	%r0, %r15               # encoding: [0xb9,0x95,0x00,0x0f]
+#CHECK: llhr	%r7, %r8                # encoding: [0xb9,0x95,0x00,0x78]
+#CHECK: llhr	%r15, %r0               # encoding: [0xb9,0x95,0x00,0xf0]
+
+	llhr	%r0, %r15
+	llhr	%r7, %r8
+	llhr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-llhrl-01.s b/test/MC/SystemZ/insn-llhrl-01.s
new file mode 100644
index 0000000..30ed4f9
--- /dev/null
+++ b/test/MC/SystemZ/insn-llhrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llhrl	%r0, 2864434397         # encoding: [0xc4,0x02,0x55,0x5d,0xe6,0x6e]
+#CHECK: llhrl	%r15, 2864434397        # encoding: [0xc4,0xf2,0x55,0x5d,0xe6,0x6e]
+
+	llhrl	%r0,0xaabbccdd
+	llhrl	%r15,0xaabbccdd
+
+#CHECK: llhrl	%r0, foo                # encoding: [0xc4,0x02,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: llhrl	%r15, foo               # encoding: [0xc4,0xf2,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	llhrl	%r0,foo
+	llhrl	%r15,foo
+
+#CHECK: llhrl	%r3, bar+100            # encoding: [0xc4,0x32,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: llhrl	%r4, bar+100            # encoding: [0xc4,0x42,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	llhrl	%r3,bar+100
+	llhrl	%r4,bar+100
+
+#CHECK: llhrl	%r7, frob@PLT           # encoding: [0xc4,0x72,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: llhrl	%r8, frob@PLT           # encoding: [0xc4,0x82,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	llhrl	%r7,frob@PLT
+	llhrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-llihf-01.s b/test/MC/SystemZ/insn-llihf-01.s
new file mode 100644
index 0000000..6ddd29f
--- /dev/null
+++ b/test/MC/SystemZ/insn-llihf-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llihf	%r0, 0                  # encoding: [0xc0,0x0e,0x00,0x00,0x00,0x00]
+#CHECK: llihf	%r0, 4294967295         # encoding: [0xc0,0x0e,0xff,0xff,0xff,0xff]
+#CHECK: llihf	%r15, 0                 # encoding: [0xc0,0xfe,0x00,0x00,0x00,0x00]
+
+	llihf	%r0, 0
+	llihf	%r0, 0xffffffff
+	llihf	%r15, 0
diff --git a/test/MC/SystemZ/insn-llihf-02.s b/test/MC/SystemZ/insn-llihf-02.s
new file mode 100644
index 0000000..e1b4537
--- /dev/null
+++ b/test/MC/SystemZ/insn-llihf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: llihf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: llihf	%r0, 1 << 32
+
+	llihf	%r0, -1
+	llihf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-llihh-01.s b/test/MC/SystemZ/insn-llihh-01.s
new file mode 100644
index 0000000..0606076
--- /dev/null
+++ b/test/MC/SystemZ/insn-llihh-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llihh	%r0, 0                  # encoding: [0xa5,0x0c,0x00,0x00]
+#CHECK: llihh	%r0, 32768              # encoding: [0xa5,0x0c,0x80,0x00]
+#CHECK: llihh	%r0, 65535              # encoding: [0xa5,0x0c,0xff,0xff]
+#CHECK: llihh	%r15, 0                 # encoding: [0xa5,0xfc,0x00,0x00]
+
+	llihh	%r0, 0
+	llihh	%r0, 0x8000
+	llihh	%r0, 0xffff
+	llihh	%r15, 0
diff --git a/test/MC/SystemZ/insn-llihh-02.s b/test/MC/SystemZ/insn-llihh-02.s
new file mode 100644
index 0000000..1309f14
--- /dev/null
+++ b/test/MC/SystemZ/insn-llihh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: llihh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: llihh	%r0, 0x10000
+
+	llihh	%r0, -1
+	llihh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-llihl-01.s b/test/MC/SystemZ/insn-llihl-01.s
new file mode 100644
index 0000000..6353353
--- /dev/null
+++ b/test/MC/SystemZ/insn-llihl-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llihl	%r0, 0                  # encoding: [0xa5,0x0d,0x00,0x00]
+#CHECK: llihl	%r0, 32768              # encoding: [0xa5,0x0d,0x80,0x00]
+#CHECK: llihl	%r0, 65535              # encoding: [0xa5,0x0d,0xff,0xff]
+#CHECK: llihl	%r15, 0                 # encoding: [0xa5,0xfd,0x00,0x00]
+
+	llihl	%r0, 0
+	llihl	%r0, 0x8000
+	llihl	%r0, 0xffff
+	llihl	%r15, 0
diff --git a/test/MC/SystemZ/insn-llihl-02.s b/test/MC/SystemZ/insn-llihl-02.s
new file mode 100644
index 0000000..6891c42
--- /dev/null
+++ b/test/MC/SystemZ/insn-llihl-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: llihl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: llihl	%r0, 0x10000
+
+	llihl	%r0, -1
+	llihl	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-llilf-01.s b/test/MC/SystemZ/insn-llilf-01.s
new file mode 100644
index 0000000..8166583
--- /dev/null
+++ b/test/MC/SystemZ/insn-llilf-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llilf	%r0, 0                  # encoding: [0xc0,0x0f,0x00,0x00,0x00,0x00]
+#CHECK: llilf	%r0, 4294967295         # encoding: [0xc0,0x0f,0xff,0xff,0xff,0xff]
+#CHECK: llilf	%r15, 0                 # encoding: [0xc0,0xff,0x00,0x00,0x00,0x00]
+
+	llilf	%r0, 0
+	llilf	%r0, 0xffffffff
+	llilf	%r15, 0
diff --git a/test/MC/SystemZ/insn-llilf-02.s b/test/MC/SystemZ/insn-llilf-02.s
new file mode 100644
index 0000000..dc10cc3
--- /dev/null
+++ b/test/MC/SystemZ/insn-llilf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: llilf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: llilf	%r0, 1 << 32
+
+	llilf	%r0, -1
+	llilf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-llilh-01.s b/test/MC/SystemZ/insn-llilh-01.s
new file mode 100644
index 0000000..2ec5a79
--- /dev/null
+++ b/test/MC/SystemZ/insn-llilh-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llilh	%r0, 0                  # encoding: [0xa5,0x0e,0x00,0x00]
+#CHECK: llilh	%r0, 32768              # encoding: [0xa5,0x0e,0x80,0x00]
+#CHECK: llilh	%r0, 65535              # encoding: [0xa5,0x0e,0xff,0xff]
+#CHECK: llilh	%r15, 0                 # encoding: [0xa5,0xfe,0x00,0x00]
+
+	llilh	%r0, 0
+	llilh	%r0, 0x8000
+	llilh	%r0, 0xffff
+	llilh	%r15, 0
diff --git a/test/MC/SystemZ/insn-llilh-02.s b/test/MC/SystemZ/insn-llilh-02.s
new file mode 100644
index 0000000..bdfa1e7
--- /dev/null
+++ b/test/MC/SystemZ/insn-llilh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: llilh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: llilh	%r0, 0x10000
+
+	llilh	%r0, -1
+	llilh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-llill-01.s b/test/MC/SystemZ/insn-llill-01.s
new file mode 100644
index 0000000..b95841d
--- /dev/null
+++ b/test/MC/SystemZ/insn-llill-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: llill	%r0, 0                  # encoding: [0xa5,0x0f,0x00,0x00]
+#CHECK: llill	%r0, 32768              # encoding: [0xa5,0x0f,0x80,0x00]
+#CHECK: llill	%r0, 65535              # encoding: [0xa5,0x0f,0xff,0xff]
+#CHECK: llill	%r15, 0                 # encoding: [0xa5,0xff,0x00,0x00]
+
+	llill	%r0, 0
+	llill	%r0, 0x8000
+	llill	%r0, 0xffff
+	llill	%r15, 0
diff --git a/test/MC/SystemZ/insn-llill-02.s b/test/MC/SystemZ/insn-llill-02.s
new file mode 100644
index 0000000..2503b53
--- /dev/null
+++ b/test/MC/SystemZ/insn-llill-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: llill	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: llill	%r0, 0x10000
+
+	llill	%r0, -1
+	llill	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-lmg-01.s b/test/MC/SystemZ/insn-lmg-01.s
new file mode 100644
index 0000000..24a2768
--- /dev/null
+++ b/test/MC/SystemZ/insn-lmg-01.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lmg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x04]
+#CHECK: lmg	%r0, %r15, 0            # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x04]
+#CHECK: lmg	%r14, %r15, 0           # encoding: [0xeb,0xef,0x00,0x00,0x00,0x04]
+#CHECK: lmg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x04]
+#CHECK: lmg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x04]
+#CHECK: lmg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x04]
+#CHECK: lmg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x04]
+#CHECK: lmg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x04]
+#CHECK: lmg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x04]
+#CHECK: lmg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x04]
+#CHECK: lmg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x04]
+#CHECK: lmg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x04]
+#CHECK: lmg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x04]
+
+	lmg	%r0,%r0,0
+	lmg	%r0,%r15,0
+	lmg	%r14,%r15,0
+	lmg	%r15,%r15,0
+	lmg	%r0,%r0,-524288
+	lmg	%r0,%r0,-1
+	lmg	%r0,%r0,0
+	lmg	%r0,%r0,1
+	lmg	%r0,%r0,524287
+	lmg	%r0,%r0,0(%r1)
+	lmg	%r0,%r0,0(%r15)
+	lmg	%r0,%r0,524287(%r1)
+	lmg	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-lmg-02.s b/test/MC/SystemZ/insn-lmg-02.s
new file mode 100644
index 0000000..9a67c08
--- /dev/null
+++ b/test/MC/SystemZ/insn-lmg-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lmg	%r0, %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lmg	%r0, %r0, 524288
+#CHECK: error: invalid use of indexed addressing
+#CHECK: lmg	%r0, %r0, 0(%r1,%r2)
+
+	lmg	%r0, %r0, -524289
+	lmg	%r0, %r0, 524288
+	lmg	%r0, %r0, 0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-lndbr-01.s b/test/MC/SystemZ/insn-lndbr-01.s
new file mode 100644
index 0000000..2278623
--- /dev/null
+++ b/test/MC/SystemZ/insn-lndbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lndbr	%f0, %f9                # encoding: [0xb3,0x11,0x00,0x09]
+#CHECK: lndbr	%f0, %f15               # encoding: [0xb3,0x11,0x00,0x0f]
+#CHECK: lndbr	%f15, %f0               # encoding: [0xb3,0x11,0x00,0xf0]
+#CHECK: lndbr	%f15, %f9               # encoding: [0xb3,0x11,0x00,0xf9]
+
+	lndbr	%f0,%f9
+	lndbr	%f0,%f15
+	lndbr	%f15,%f0
+	lndbr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lnebr-01.s b/test/MC/SystemZ/insn-lnebr-01.s
new file mode 100644
index 0000000..cf32734
--- /dev/null
+++ b/test/MC/SystemZ/insn-lnebr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lnebr	%f0, %f9                # encoding: [0xb3,0x01,0x00,0x09]
+#CHECK: lnebr	%f0, %f15               # encoding: [0xb3,0x01,0x00,0x0f]
+#CHECK: lnebr	%f15, %f0               # encoding: [0xb3,0x01,0x00,0xf0]
+#CHECK: lnebr	%f15, %f9               # encoding: [0xb3,0x01,0x00,0xf9]
+
+	lnebr	%f0,%f9
+	lnebr	%f0,%f15
+	lnebr	%f15,%f0
+	lnebr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lnxbr-01.s b/test/MC/SystemZ/insn-lnxbr-01.s
new file mode 100644
index 0000000..bf37948
--- /dev/null
+++ b/test/MC/SystemZ/insn-lnxbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lnxbr	%f0, %f8                # encoding: [0xb3,0x41,0x00,0x08]
+#CHECK: lnxbr	%f0, %f13               # encoding: [0xb3,0x41,0x00,0x0d]
+#CHECK: lnxbr	%f13, %f0               # encoding: [0xb3,0x41,0x00,0xd0]
+#CHECK: lnxbr	%f13, %f9               # encoding: [0xb3,0x41,0x00,0xd9]
+
+	lnxbr	%f0,%f8
+	lnxbr	%f0,%f13
+	lnxbr	%f13,%f0
+	lnxbr	%f13,%f9
diff --git a/test/MC/SystemZ/insn-lnxbr-02.s b/test/MC/SystemZ/insn-lnxbr-02.s
new file mode 100644
index 0000000..9a69f48
--- /dev/null
+++ b/test/MC/SystemZ/insn-lnxbr-02.s
@@ -0,0 +1,17 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: lnxbr	%f0, %f2
+#CHECK: error: invalid register
+#CHECK: lnxbr	%f0, %f14
+#CHECK: error: invalid register
+#CHECK: lnxbr	%f2, %f0
+#CHECK: error: invalid register
+#CHECK: lnxbr	%f14, %f0
+
+	lnxbr	%f0, %f2
+	lnxbr	%f0, %f14
+	lnxbr	%f2, %f0
+	lnxbr	%f14, %f0
+
diff --git a/test/MC/SystemZ/insn-lpdbr-01.s b/test/MC/SystemZ/insn-lpdbr-01.s
new file mode 100644
index 0000000..869b0c9
--- /dev/null
+++ b/test/MC/SystemZ/insn-lpdbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lpdbr	%f0, %f9                # encoding: [0xb3,0x10,0x00,0x09]
+#CHECK: lpdbr	%f0, %f15               # encoding: [0xb3,0x10,0x00,0x0f]
+#CHECK: lpdbr	%f15, %f0               # encoding: [0xb3,0x10,0x00,0xf0]
+#CHECK: lpdbr	%f15, %f9               # encoding: [0xb3,0x10,0x00,0xf9]
+
+	lpdbr	%f0,%f9
+	lpdbr	%f0,%f15
+	lpdbr	%f15,%f0
+	lpdbr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lpebr-01.s b/test/MC/SystemZ/insn-lpebr-01.s
new file mode 100644
index 0000000..917f26e
--- /dev/null
+++ b/test/MC/SystemZ/insn-lpebr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lpebr	%f0, %f9                # encoding: [0xb3,0x00,0x00,0x09]
+#CHECK: lpebr	%f0, %f15               # encoding: [0xb3,0x00,0x00,0x0f]
+#CHECK: lpebr	%f15, %f0               # encoding: [0xb3,0x00,0x00,0xf0]
+#CHECK: lpebr	%f15, %f9               # encoding: [0xb3,0x00,0x00,0xf9]
+
+	lpebr	%f0,%f9
+	lpebr	%f0,%f15
+	lpebr	%f15,%f0
+	lpebr	%f15,%f9
diff --git a/test/MC/SystemZ/insn-lpxbr-01.s b/test/MC/SystemZ/insn-lpxbr-01.s
new file mode 100644
index 0000000..56a628a
--- /dev/null
+++ b/test/MC/SystemZ/insn-lpxbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lpxbr	%f0, %f8                # encoding: [0xb3,0x40,0x00,0x08]
+#CHECK: lpxbr	%f0, %f13               # encoding: [0xb3,0x40,0x00,0x0d]
+#CHECK: lpxbr	%f13, %f0               # encoding: [0xb3,0x40,0x00,0xd0]
+#CHECK: lpxbr	%f13, %f9               # encoding: [0xb3,0x40,0x00,0xd9]
+
+	lpxbr	%f0,%f8
+	lpxbr	%f0,%f13
+	lpxbr	%f13,%f0
+	lpxbr	%f13,%f9
diff --git a/test/MC/SystemZ/insn-lpxbr-02.s b/test/MC/SystemZ/insn-lpxbr-02.s
new file mode 100644
index 0000000..6fa3697
--- /dev/null
+++ b/test/MC/SystemZ/insn-lpxbr-02.s
@@ -0,0 +1,17 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: lpxbr	%f0, %f2
+#CHECK: error: invalid register
+#CHECK: lpxbr	%f0, %f14
+#CHECK: error: invalid register
+#CHECK: lpxbr	%f2, %f0
+#CHECK: error: invalid register
+#CHECK: lpxbr	%f14, %f0
+
+	lpxbr	%f0, %f2
+	lpxbr	%f0, %f14
+	lpxbr	%f2, %f0
+	lpxbr	%f14, %f0
+
diff --git a/test/MC/SystemZ/insn-lr-01.s b/test/MC/SystemZ/insn-lr-01.s
new file mode 100644
index 0000000..8ce4a00
--- /dev/null
+++ b/test/MC/SystemZ/insn-lr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lr	%r0, %r9                # encoding: [0x18,0x09]
+#CHECK: lr	%r0, %r15               # encoding: [0x18,0x0f]
+#CHECK: lr	%r15, %r0               # encoding: [0x18,0xf0]
+#CHECK: lr	%r15, %r9               # encoding: [0x18,0xf9]
+
+	lr	%r0,%r9
+	lr	%r0,%r15
+	lr	%r15,%r0
+	lr	%r15,%r9
diff --git a/test/MC/SystemZ/insn-lrl-01.s b/test/MC/SystemZ/insn-lrl-01.s
new file mode 100644
index 0000000..32d0eeb
--- /dev/null
+++ b/test/MC/SystemZ/insn-lrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lrl	%r0, 2864434397         # encoding: [0xc4,0x0d,0x55,0x5d,0xe6,0x6e]
+#CHECK: lrl	%r15, 2864434397        # encoding: [0xc4,0xfd,0x55,0x5d,0xe6,0x6e]
+
+	lrl	%r0,0xaabbccdd
+	lrl	%r15,0xaabbccdd
+
+#CHECK: lrl	%r0, foo                # encoding: [0xc4,0x0d,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: lrl	%r15, foo               # encoding: [0xc4,0xfd,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	lrl	%r0,foo
+	lrl	%r15,foo
+
+#CHECK: lrl	%r3, bar+100            # encoding: [0xc4,0x3d,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: lrl	%r4, bar+100            # encoding: [0xc4,0x4d,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	lrl	%r3,bar+100
+	lrl	%r4,bar+100
+
+#CHECK: lrl	%r7, frob@PLT           # encoding: [0xc4,0x7d,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: lrl	%r8, frob@PLT           # encoding: [0xc4,0x8d,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	lrl	%r7,frob@PLT
+	lrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-lrv-01.s b/test/MC/SystemZ/insn-lrv-01.s
new file mode 100644
index 0000000..75b973a
--- /dev/null
+++ b/test/MC/SystemZ/insn-lrv-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lrv	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1e]
+#CHECK: lrv	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1e]
+#CHECK: lrv	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1e]
+#CHECK: lrv	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1e]
+#CHECK: lrv	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1e]
+#CHECK: lrv	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1e]
+#CHECK: lrv	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1e]
+#CHECK: lrv	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1e]
+#CHECK: lrv	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1e]
+#CHECK: lrv	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x1e]
+
+	lrv	%r0,-524288
+	lrv	%r0,-1
+	lrv	%r0,0
+	lrv	%r0,1
+	lrv	%r0,524287
+	lrv	%r0,0(%r1)
+	lrv	%r0,0(%r15)
+	lrv	%r0,524287(%r1,%r15)
+	lrv	%r0,524287(%r15,%r1)
+	lrv	%r15,0
diff --git a/test/MC/SystemZ/insn-lrv-02.s b/test/MC/SystemZ/insn-lrv-02.s
new file mode 100644
index 0000000..f2dcfa7
--- /dev/null
+++ b/test/MC/SystemZ/insn-lrv-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lrv	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lrv	%r0, 524288
+
+	lrv	%r0, -524289
+	lrv	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lrvg-01.s b/test/MC/SystemZ/insn-lrvg-01.s
new file mode 100644
index 0000000..d2d8b2d
--- /dev/null
+++ b/test/MC/SystemZ/insn-lrvg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lrvg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0f]
+#CHECK: lrvg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0f]
+#CHECK: lrvg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0f]
+#CHECK: lrvg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0f]
+#CHECK: lrvg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0f]
+#CHECK: lrvg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0f]
+#CHECK: lrvg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0f]
+#CHECK: lrvg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0f]
+#CHECK: lrvg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0f]
+#CHECK: lrvg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x0f]
+
+	lrvg	%r0,-524288
+	lrvg	%r0,-1
+	lrvg	%r0,0
+	lrvg	%r0,1
+	lrvg	%r0,524287
+	lrvg	%r0,0(%r1)
+	lrvg	%r0,0(%r15)
+	lrvg	%r0,524287(%r1,%r15)
+	lrvg	%r0,524287(%r15,%r1)
+	lrvg	%r15,0
diff --git a/test/MC/SystemZ/insn-lrvg-02.s b/test/MC/SystemZ/insn-lrvg-02.s
new file mode 100644
index 0000000..690fa13
--- /dev/null
+++ b/test/MC/SystemZ/insn-lrvg-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: lrvg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lrvg	%r0, 524288
+
+	lrvg	%r0, -524289
+	lrvg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lrvgr-01.s b/test/MC/SystemZ/insn-lrvgr-01.s
new file mode 100644
index 0000000..1b6e884
--- /dev/null
+++ b/test/MC/SystemZ/insn-lrvgr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lrvgr	%r0, %r0                # encoding: [0xb9,0x0f,0x00,0x00]
+#CHECK: lrvgr	%r0, %r15               # encoding: [0xb9,0x0f,0x00,0x0f]
+#CHECK: lrvgr	%r15, %r0               # encoding: [0xb9,0x0f,0x00,0xf0]
+#CHECK: lrvgr	%r7, %r8                # encoding: [0xb9,0x0f,0x00,0x78]
+#CHECK: lrvgr	%r15, %r15              # encoding: [0xb9,0x0f,0x00,0xff]
+
+	lrvgr	%r0,%r0
+	lrvgr	%r0,%r15
+	lrvgr	%r15,%r0
+	lrvgr	%r7,%r8
+	lrvgr	%r15,%r15
diff --git a/test/MC/SystemZ/insn-lrvr-01.s b/test/MC/SystemZ/insn-lrvr-01.s
new file mode 100644
index 0000000..c0d5d89
--- /dev/null
+++ b/test/MC/SystemZ/insn-lrvr-01.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lrvr	%r0, %r0                # encoding: [0xb9,0x1f,0x00,0x00]
+#CHECK: lrvr	%r0, %r15               # encoding: [0xb9,0x1f,0x00,0x0f]
+#CHECK: lrvr	%r15, %r0               # encoding: [0xb9,0x1f,0x00,0xf0]
+#CHECK: lrvr	%r7, %r8                # encoding: [0xb9,0x1f,0x00,0x78]
+#CHECK: lrvr	%r15, %r15              # encoding: [0xb9,0x1f,0x00,0xff]
+
+	lrvr	%r0,%r0
+	lrvr	%r0,%r15
+	lrvr	%r15,%r0
+	lrvr	%r7,%r8
+	lrvr	%r15,%r15
diff --git a/test/MC/SystemZ/insn-lxr-01.s b/test/MC/SystemZ/insn-lxr-01.s
new file mode 100644
index 0000000..a04cdf7
--- /dev/null
+++ b/test/MC/SystemZ/insn-lxr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lxr	%f0, %f8                # encoding: [0xb3,0x65,0x00,0x08]
+#CHECK: lxr	%f0, %f13               # encoding: [0xb3,0x65,0x00,0x0d]
+#CHECK: lxr	%f13, %f0               # encoding: [0xb3,0x65,0x00,0xd0]
+#CHECK: lxr	%f13, %f9               # encoding: [0xb3,0x65,0x00,0xd9]
+
+	lxr	%f0,%f8
+	lxr	%f0,%f13
+	lxr	%f13,%f0
+	lxr	%f13,%f9
diff --git a/test/MC/SystemZ/insn-lxr-02.s b/test/MC/SystemZ/insn-lxr-02.s
new file mode 100644
index 0000000..b18ad75
--- /dev/null
+++ b/test/MC/SystemZ/insn-lxr-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: lxr	%f2, %f0
+#CHECK: error: invalid register
+#CHECK: lxr	%f15, %f0
+#CHECK: error: invalid register
+#CHECK: lxr	%f0, %f2
+#CHECK: error: invalid register
+#CHECK: lxr	%f0, %f15
+
+	lxr	%f2, %f0
+	lxr	%f15, %f0
+	lxr	%f0, %f2
+	lxr	%f0, %f15
diff --git a/test/MC/SystemZ/insn-ly-01.s b/test/MC/SystemZ/insn-ly-01.s
new file mode 100644
index 0000000..25bc3e8
--- /dev/null
+++ b/test/MC/SystemZ/insn-ly-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ly	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x58]
+#CHECK: ly	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x58]
+#CHECK: ly	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x58]
+#CHECK: ly	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x58]
+#CHECK: ly	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x58]
+#CHECK: ly	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x58]
+#CHECK: ly	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x58]
+#CHECK: ly	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x58]
+#CHECK: ly	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x58]
+#CHECK: ly	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x58]
+
+	ly	%r0, -524288
+	ly	%r0, -1
+	ly	%r0, 0
+	ly	%r0, 1
+	ly	%r0, 524287
+	ly	%r0, 0(%r1)
+	ly	%r0, 0(%r15)
+	ly	%r0, 524287(%r1,%r15)
+	ly	%r0, 524287(%r15,%r1)
+	ly	%r15, 0
diff --git a/test/MC/SystemZ/insn-ly-02.s b/test/MC/SystemZ/insn-ly-02.s
new file mode 100644
index 0000000..b2d424e
--- /dev/null
+++ b/test/MC/SystemZ/insn-ly-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ly	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ly	%r0, 524288
+
+	ly	%r0, -524289
+	ly	%r0, 524288
diff --git a/test/MC/SystemZ/insn-lzdr-01.s b/test/MC/SystemZ/insn-lzdr-01.s
new file mode 100644
index 0000000..c95082f
--- /dev/null
+++ b/test/MC/SystemZ/insn-lzdr-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lzdr	%f0                     # encoding: [0xb3,0x75,0x00,0x00]
+#CHECK: lzdr	%f7                     # encoding: [0xb3,0x75,0x00,0x70]
+#CHECK: lzdr	%f15                    # encoding: [0xb3,0x75,0x00,0xf0]
+
+	lzdr	%f0
+	lzdr	%f7
+	lzdr	%f15
diff --git a/test/MC/SystemZ/insn-lzer-01.s b/test/MC/SystemZ/insn-lzer-01.s
new file mode 100644
index 0000000..0944047
--- /dev/null
+++ b/test/MC/SystemZ/insn-lzer-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lzer	%f0                     # encoding: [0xb3,0x74,0x00,0x00]
+#CHECK: lzer	%f7                     # encoding: [0xb3,0x74,0x00,0x70]
+#CHECK: lzer	%f15                    # encoding: [0xb3,0x74,0x00,0xf0]
+
+	lzer	%f0
+	lzer	%f7
+	lzer	%f15
diff --git a/test/MC/SystemZ/insn-lzxr-01.s b/test/MC/SystemZ/insn-lzxr-01.s
new file mode 100644
index 0000000..bd5a5c2
--- /dev/null
+++ b/test/MC/SystemZ/insn-lzxr-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lzxr	%f0                     # encoding: [0xb3,0x76,0x00,0x00]
+#CHECK: lzxr	%f8                     # encoding: [0xb3,0x76,0x00,0x80]
+#CHECK: lzxr	%f13                    # encoding: [0xb3,0x76,0x00,0xd0]
+
+	lzxr	%f0
+	lzxr	%f8
+	lzxr	%f13
diff --git a/test/MC/SystemZ/insn-lzxr-02.s b/test/MC/SystemZ/insn-lzxr-02.s
new file mode 100644
index 0000000..4ce2ad0
--- /dev/null
+++ b/test/MC/SystemZ/insn-lzxr-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: lzxr	%f2
+#CHECK: error: invalid register
+#CHECK: lzxr	%f14
+#CHECK: error: invalid register
+#CHECK: lzxr	%f15
+
+	lzxr	%f2
+	lzxr	%f14
+	lzxr	%f15
diff --git a/test/MC/SystemZ/insn-madb-01.s b/test/MC/SystemZ/insn-madb-01.s
new file mode 100644
index 0000000..6eec4be
--- /dev/null
+++ b/test/MC/SystemZ/insn-madb-01.s
@@ -0,0 +1,21 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: madb	%f0, %f0, 0             # encoding: [0xed,0x00,0x00,0x00,0x00,0x1e]
+#CHECK: madb	%f0, %f0, 4095          # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1e]
+#CHECK: madb	%f0, %f0, 0(%r1)        # encoding: [0xed,0x00,0x10,0x00,0x00,0x1e]
+#CHECK: madb	%f0, %f0, 0(%r15)       # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1e]
+#CHECK: madb	%f0, %f0, 4095(%r1,%r15) # encoding: [0xed,0x01,0xff,0xff,0x00,0x1e]
+#CHECK: madb	%f0, %f0, 4095(%r15,%r1) # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1e]
+#CHECK: madb	%f0, %f15, 0            # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1e]
+#CHECK: madb	%f15, %f0, 0            # encoding: [0xed,0x00,0x00,0x00,0xf0,0x1e]
+#CHECK: madb	%f15, %f15, 0           # encoding: [0xed,0xf0,0x00,0x00,0xf0,0x1e]
+
+	madb	%f0, %f0, 0
+	madb	%f0, %f0, 4095
+	madb	%f0, %f0, 0(%r1)
+	madb	%f0, %f0, 0(%r15)
+	madb	%f0, %f0, 4095(%r1,%r15)
+	madb	%f0, %f0, 4095(%r15,%r1)
+	madb	%f0, %f15, 0
+	madb	%f15, %f0, 0
+	madb	%f15, %f15, 0
diff --git a/test/MC/SystemZ/insn-madb-02.s b/test/MC/SystemZ/insn-madb-02.s
new file mode 100644
index 0000000..f7fdee9
--- /dev/null
+++ b/test/MC/SystemZ/insn-madb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: madb	%f0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: madb	%f0, %f0, 4096
+
+	madb	%f0, %f0, -1
+	madb	%f0, %f0, 4096
diff --git a/test/MC/SystemZ/insn-madbr-01.s b/test/MC/SystemZ/insn-madbr-01.s
new file mode 100644
index 0000000..42142be
--- /dev/null
+++ b/test/MC/SystemZ/insn-madbr-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: madbr	%f0, %f0, %f0           # encoding: [0xb3,0x1e,0x00,0x00]
+#CHECK: madbr	%f0, %f0, %f15          # encoding: [0xb3,0x1e,0x00,0x0f]
+#CHECK: madbr	%f0, %f15, %f0          # encoding: [0xb3,0x1e,0x00,0xf0]
+#CHECK: madbr	%f15, %f0, %f0          # encoding: [0xb3,0x1e,0xf0,0x00]
+#CHECK: madbr	%f7, %f8, %f9           # encoding: [0xb3,0x1e,0x70,0x89]
+#CHECK: madbr	%f15, %f15, %f15        # encoding: [0xb3,0x1e,0xf0,0xff]
+
+	madbr	%f0, %f0, %f0
+	madbr	%f0, %f0, %f15
+	madbr	%f0, %f15, %f0
+	madbr	%f15, %f0, %f0
+	madbr	%f7, %f8, %f9
+	madbr	%f15, %f15, %f15
diff --git a/test/MC/SystemZ/insn-maeb-01.s b/test/MC/SystemZ/insn-maeb-01.s
new file mode 100644
index 0000000..7a998fd
--- /dev/null
+++ b/test/MC/SystemZ/insn-maeb-01.s
@@ -0,0 +1,21 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: maeb	%f0, %f0, 0             # encoding: [0xed,0x00,0x00,0x00,0x00,0x0e]
+#CHECK: maeb	%f0, %f0, 4095          # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0e]
+#CHECK: maeb	%f0, %f0, 0(%r1)        # encoding: [0xed,0x00,0x10,0x00,0x00,0x0e]
+#CHECK: maeb	%f0, %f0, 0(%r15)       # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0e]
+#CHECK: maeb	%f0, %f0, 4095(%r1,%r15) # encoding: [0xed,0x01,0xff,0xff,0x00,0x0e]
+#CHECK: maeb	%f0, %f0, 4095(%r15,%r1) # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0e]
+#CHECK: maeb	%f0, %f15, 0            # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0e]
+#CHECK: maeb	%f15, %f0, 0            # encoding: [0xed,0x00,0x00,0x00,0xf0,0x0e]
+#CHECK: maeb	%f15, %f15, 0           # encoding: [0xed,0xf0,0x00,0x00,0xf0,0x0e]
+
+	maeb	%f0, %f0, 0
+	maeb	%f0, %f0, 4095
+	maeb	%f0, %f0, 0(%r1)
+	maeb	%f0, %f0, 0(%r15)
+	maeb	%f0, %f0, 4095(%r1,%r15)
+	maeb	%f0, %f0, 4095(%r15,%r1)
+	maeb	%f0, %f15, 0
+	maeb	%f15, %f0, 0
+	maeb	%f15, %f15, 0
diff --git a/test/MC/SystemZ/insn-maeb-02.s b/test/MC/SystemZ/insn-maeb-02.s
new file mode 100644
index 0000000..e12407a
--- /dev/null
+++ b/test/MC/SystemZ/insn-maeb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: maeb	%f0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: maeb	%f0, %f0, 4096
+
+	maeb	%f0, %f0, -1
+	maeb	%f0, %f0, 4096
diff --git a/test/MC/SystemZ/insn-maebr-01.s b/test/MC/SystemZ/insn-maebr-01.s
new file mode 100644
index 0000000..be92aaf
--- /dev/null
+++ b/test/MC/SystemZ/insn-maebr-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: maebr	%f0, %f0, %f0           # encoding: [0xb3,0x0e,0x00,0x00]
+#CHECK: maebr	%f0, %f0, %f15          # encoding: [0xb3,0x0e,0x00,0x0f]
+#CHECK: maebr	%f0, %f15, %f0          # encoding: [0xb3,0x0e,0x00,0xf0]
+#CHECK: maebr	%f15, %f0, %f0          # encoding: [0xb3,0x0e,0xf0,0x00]
+#CHECK: maebr	%f7, %f8, %f9           # encoding: [0xb3,0x0e,0x70,0x89]
+#CHECK: maebr	%f15, %f15, %f15        # encoding: [0xb3,0x0e,0xf0,0xff]
+
+	maebr	%f0, %f0, %f0
+	maebr	%f0, %f0, %f15
+	maebr	%f0, %f15, %f0
+	maebr	%f15, %f0, %f0
+	maebr	%f7, %f8, %f9
+	maebr	%f15, %f15, %f15
diff --git a/test/MC/SystemZ/insn-mdb-01.s b/test/MC/SystemZ/insn-mdb-01.s
new file mode 100644
index 0000000..58be977
--- /dev/null
+++ b/test/MC/SystemZ/insn-mdb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x1c]
+#CHECK: mdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1c]
+#CHECK: mdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x1c]
+#CHECK: mdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1c]
+#CHECK: mdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x1c]
+#CHECK: mdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1c]
+#CHECK: mdb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1c]
+
+	mdb	%f0, 0
+	mdb	%f0, 4095
+	mdb	%f0, 0(%r1)
+	mdb	%f0, 0(%r15)
+	mdb	%f0, 4095(%r1,%r15)
+	mdb	%f0, 4095(%r15,%r1)
+	mdb	%f15, 0
diff --git a/test/MC/SystemZ/insn-mdb-02.s b/test/MC/SystemZ/insn-mdb-02.s
new file mode 100644
index 0000000..f1bdab0
--- /dev/null
+++ b/test/MC/SystemZ/insn-mdb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: mdb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: mdb	%f0, 4096
+
+	mdb	%f0, -1
+	mdb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-mdbr-01.s b/test/MC/SystemZ/insn-mdbr-01.s
new file mode 100644
index 0000000..4ff16b9
--- /dev/null
+++ b/test/MC/SystemZ/insn-mdbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mdbr	%f0, %f0                # encoding: [0xb3,0x1c,0x00,0x00]
+#CHECK: mdbr	%f0, %f15               # encoding: [0xb3,0x1c,0x00,0x0f]
+#CHECK: mdbr	%f7, %f8                # encoding: [0xb3,0x1c,0x00,0x78]
+#CHECK: mdbr	%f15, %f0               # encoding: [0xb3,0x1c,0x00,0xf0]
+
+	mdbr	%f0, %f0
+	mdbr	%f0, %f15
+	mdbr	%f7, %f8
+	mdbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-mdeb-01.s b/test/MC/SystemZ/insn-mdeb-01.s
new file mode 100644
index 0000000..5d85c07
--- /dev/null
+++ b/test/MC/SystemZ/insn-mdeb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mdeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x0c]
+#CHECK: mdeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0c]
+#CHECK: mdeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x0c]
+#CHECK: mdeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0c]
+#CHECK: mdeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x0c]
+#CHECK: mdeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0c]
+#CHECK: mdeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0c]
+
+	mdeb	%f0, 0
+	mdeb	%f0, 4095
+	mdeb	%f0, 0(%r1)
+	mdeb	%f0, 0(%r15)
+	mdeb	%f0, 4095(%r1,%r15)
+	mdeb	%f0, 4095(%r15,%r1)
+	mdeb	%f15, 0
diff --git a/test/MC/SystemZ/insn-mdeb-02.s b/test/MC/SystemZ/insn-mdeb-02.s
new file mode 100644
index 0000000..87ec676
--- /dev/null
+++ b/test/MC/SystemZ/insn-mdeb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: mdeb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: mdeb	%f0, 4096
+
+	mdeb	%f0, -1
+	mdeb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-mdebr-01.s b/test/MC/SystemZ/insn-mdebr-01.s
new file mode 100644
index 0000000..17c4955
--- /dev/null
+++ b/test/MC/SystemZ/insn-mdebr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mdebr	%f0, %f0                # encoding: [0xb3,0x0c,0x00,0x00]
+#CHECK: mdebr	%f0, %f15               # encoding: [0xb3,0x0c,0x00,0x0f]
+#CHECK: mdebr	%f7, %f8                # encoding: [0xb3,0x0c,0x00,0x78]
+#CHECK: mdebr	%f15, %f0               # encoding: [0xb3,0x0c,0x00,0xf0]
+
+	mdebr	%f0, %f0
+	mdebr	%f0, %f15
+	mdebr	%f7, %f8
+	mdebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-meeb-01.s b/test/MC/SystemZ/insn-meeb-01.s
new file mode 100644
index 0000000..bb14d04
--- /dev/null
+++ b/test/MC/SystemZ/insn-meeb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: meeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x17]
+#CHECK: meeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x17]
+#CHECK: meeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x17]
+#CHECK: meeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x17]
+#CHECK: meeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x17]
+#CHECK: meeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x17]
+#CHECK: meeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x17]
+
+	meeb	%f0, 0
+	meeb	%f0, 4095
+	meeb	%f0, 0(%r1)
+	meeb	%f0, 0(%r15)
+	meeb	%f0, 4095(%r1,%r15)
+	meeb	%f0, 4095(%r15,%r1)
+	meeb	%f15, 0
diff --git a/test/MC/SystemZ/insn-meeb-02.s b/test/MC/SystemZ/insn-meeb-02.s
new file mode 100644
index 0000000..ba5e3b2
--- /dev/null
+++ b/test/MC/SystemZ/insn-meeb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: meeb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: meeb	%f0, 4096
+
+	meeb	%f0, -1
+	meeb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-meebr-01.s b/test/MC/SystemZ/insn-meebr-01.s
new file mode 100644
index 0000000..99cd8a3
--- /dev/null
+++ b/test/MC/SystemZ/insn-meebr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: meebr	%f0, %f0                # encoding: [0xb3,0x17,0x00,0x00]
+#CHECK: meebr	%f0, %f15               # encoding: [0xb3,0x17,0x00,0x0f]
+#CHECK: meebr	%f7, %f8                # encoding: [0xb3,0x17,0x00,0x78]
+#CHECK: meebr	%f15, %f0               # encoding: [0xb3,0x17,0x00,0xf0]
+
+	meebr	%f0, %f0
+	meebr	%f0, %f15
+	meebr	%f7, %f8
+	meebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-mghi-01.s b/test/MC/SystemZ/insn-mghi-01.s
new file mode 100644
index 0000000..d07278f
--- /dev/null
+++ b/test/MC/SystemZ/insn-mghi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mghi	%r0, -32768             # encoding: [0xa7,0x0d,0x80,0x00]
+#CHECK: mghi	%r0, -1                 # encoding: [0xa7,0x0d,0xff,0xff]
+#CHECK: mghi	%r0, 0                  # encoding: [0xa7,0x0d,0x00,0x00]
+#CHECK: mghi	%r0, 1                  # encoding: [0xa7,0x0d,0x00,0x01]
+#CHECK: mghi	%r0, 32767              # encoding: [0xa7,0x0d,0x7f,0xff]
+#CHECK: mghi	%r15, 0                 # encoding: [0xa7,0xfd,0x00,0x00]
+
+	mghi	%r0, -32768
+	mghi	%r0, -1
+	mghi	%r0, 0
+	mghi	%r0, 1
+	mghi	%r0, 32767
+	mghi	%r15, 0
diff --git a/test/MC/SystemZ/insn-mghi-02.s b/test/MC/SystemZ/insn-mghi-02.s
new file mode 100644
index 0000000..860fa3b
--- /dev/null
+++ b/test/MC/SystemZ/insn-mghi-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: mghi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: mghi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: mghi	%r0, foo
+
+	mghi	%r0, -32769
+	mghi	%r0, 32768
+	mghi	%r0, foo
diff --git a/test/MC/SystemZ/insn-mh-01.s b/test/MC/SystemZ/insn-mh-01.s
new file mode 100644
index 0000000..59d5515
--- /dev/null
+++ b/test/MC/SystemZ/insn-mh-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mh	%r0, 0                  # encoding: [0x4c,0x00,0x00,0x00]
+#CHECK: mh	%r0, 4095               # encoding: [0x4c,0x00,0x0f,0xff]
+#CHECK: mh	%r0, 0(%r1)             # encoding: [0x4c,0x00,0x10,0x00]
+#CHECK: mh	%r0, 0(%r15)            # encoding: [0x4c,0x00,0xf0,0x00]
+#CHECK: mh	%r0, 4095(%r1,%r15)     # encoding: [0x4c,0x01,0xff,0xff]
+#CHECK: mh	%r0, 4095(%r15,%r1)     # encoding: [0x4c,0x0f,0x1f,0xff]
+#CHECK: mh	%r15, 0                 # encoding: [0x4c,0xf0,0x00,0x00]
+
+	mh	%r0, 0
+	mh	%r0, 4095
+	mh	%r0, 0(%r1)
+	mh	%r0, 0(%r15)
+	mh	%r0, 4095(%r1,%r15)
+	mh	%r0, 4095(%r15,%r1)
+	mh	%r15, 0
diff --git a/test/MC/SystemZ/insn-mh-02.s b/test/MC/SystemZ/insn-mh-02.s
new file mode 100644
index 0000000..4ea35fc
--- /dev/null
+++ b/test/MC/SystemZ/insn-mh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: mh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: mh	%r0, 4096
+
+	mh	%r0, -1
+	mh	%r0, 4096
diff --git a/test/MC/SystemZ/insn-mhi-01.s b/test/MC/SystemZ/insn-mhi-01.s
new file mode 100644
index 0000000..adf42ae
--- /dev/null
+++ b/test/MC/SystemZ/insn-mhi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mhi	%r0, -32768             # encoding: [0xa7,0x0c,0x80,0x00]
+#CHECK: mhi	%r0, -1                 # encoding: [0xa7,0x0c,0xff,0xff]
+#CHECK: mhi	%r0, 0                  # encoding: [0xa7,0x0c,0x00,0x00]
+#CHECK: mhi	%r0, 1                  # encoding: [0xa7,0x0c,0x00,0x01]
+#CHECK: mhi	%r0, 32767              # encoding: [0xa7,0x0c,0x7f,0xff]
+#CHECK: mhi	%r15, 0                 # encoding: [0xa7,0xfc,0x00,0x00]
+
+	mhi	%r0, -32768
+	mhi	%r0, -1
+	mhi	%r0, 0
+	mhi	%r0, 1
+	mhi	%r0, 32767
+	mhi	%r15, 0
diff --git a/test/MC/SystemZ/insn-mhi-02.s b/test/MC/SystemZ/insn-mhi-02.s
new file mode 100644
index 0000000..74e8357
--- /dev/null
+++ b/test/MC/SystemZ/insn-mhi-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: mhi	%r0, -32769
+#CHECK: error: invalid operand
+#CHECK: mhi	%r0, 32768
+#CHECK: error: invalid operand
+#CHECK: mhi	%r0, foo
+
+	mhi	%r0, -32769
+	mhi	%r0, 32768
+	mhi	%r0, foo
diff --git a/test/MC/SystemZ/insn-mhy-01.s b/test/MC/SystemZ/insn-mhy-01.s
new file mode 100644
index 0000000..89c394b
--- /dev/null
+++ b/test/MC/SystemZ/insn-mhy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mhy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x7c]
+#CHECK: mhy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x7c]
+#CHECK: mhy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x7c]
+#CHECK: mhy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x7c]
+#CHECK: mhy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x7c]
+#CHECK: mhy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x7c]
+#CHECK: mhy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x7c]
+#CHECK: mhy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x7c]
+#CHECK: mhy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x7c]
+#CHECK: mhy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x7c]
+
+	mhy	%r0, -524288
+	mhy	%r0, -1
+	mhy	%r0, 0
+	mhy	%r0, 1
+	mhy	%r0, 524287
+	mhy	%r0, 0(%r1)
+	mhy	%r0, 0(%r15)
+	mhy	%r0, 524287(%r1,%r15)
+	mhy	%r0, 524287(%r15,%r1)
+	mhy	%r15, 0
diff --git a/test/MC/SystemZ/insn-mhy-02.s b/test/MC/SystemZ/insn-mhy-02.s
new file mode 100644
index 0000000..bce62f0
--- /dev/null
+++ b/test/MC/SystemZ/insn-mhy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: mhy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: mhy	%r0, 524288
+
+	mhy	%r0, -524289
+	mhy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-mlg-01.s b/test/MC/SystemZ/insn-mlg-01.s
new file mode 100644
index 0000000..e9bd651
--- /dev/null
+++ b/test/MC/SystemZ/insn-mlg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mlg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x86]
+#CHECK: mlg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x86]
+#CHECK: mlg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x86]
+#CHECK: mlg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x86]
+#CHECK: mlg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x86]
+#CHECK: mlg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x86]
+#CHECK: mlg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x86]
+#CHECK: mlg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x86]
+#CHECK: mlg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x86]
+#CHECK: mlg	%r14, 0                 # encoding: [0xe3,0xe0,0x00,0x00,0x00,0x86]
+
+	mlg	%r0, -524288
+	mlg	%r0, -1
+	mlg	%r0, 0
+	mlg	%r0, 1
+	mlg	%r0, 524287
+	mlg	%r0, 0(%r1)
+	mlg	%r0, 0(%r15)
+	mlg	%r0, 524287(%r1,%r15)
+	mlg	%r0, 524287(%r15,%r1)
+	mlg	%r14, 0
diff --git a/test/MC/SystemZ/insn-mlg-02.s b/test/MC/SystemZ/insn-mlg-02.s
new file mode 100644
index 0000000..7174bc5
--- /dev/null
+++ b/test/MC/SystemZ/insn-mlg-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: mlg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: mlg	%r0, 524288
+#CHECK: error: invalid register
+#CHECK: mlg	%r1, 0
+#CHECK: error: invalid register
+#CHECK: mlg	%r15, 0
+
+	mlg	%r0, -524289
+	mlg	%r0, 524288
+	mlg	%r1, 0
+	mlg	%r15, 0
diff --git a/test/MC/SystemZ/insn-mlgr-01.s b/test/MC/SystemZ/insn-mlgr-01.s
new file mode 100644
index 0000000..215bde0
--- /dev/null
+++ b/test/MC/SystemZ/insn-mlgr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mlgr	%r0, %r0                # encoding: [0xb9,0x86,0x00,0x00]
+#CHECK: mlgr	%r0, %r15               # encoding: [0xb9,0x86,0x00,0x0f]
+#CHECK: mlgr	%r14, %r0               # encoding: [0xb9,0x86,0x00,0xe0]
+#CHECK: mlgr	%r6, %r9                # encoding: [0xb9,0x86,0x00,0x69]
+
+	mlgr	%r0,%r0
+	mlgr	%r0,%r15
+	mlgr	%r14,%r0
+	mlgr	%r6,%r9
diff --git a/test/MC/SystemZ/insn-mlgr-02.s b/test/MC/SystemZ/insn-mlgr-02.s
new file mode 100644
index 0000000..30f4259
--- /dev/null
+++ b/test/MC/SystemZ/insn-mlgr-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: mlgr	%r1, %r0
+#CHECK: error: invalid register
+#CHECK: mlgr	%r15, %r0
+
+	mlgr	%r1, %r0
+	mlgr	%r15, %r0
diff --git a/test/MC/SystemZ/insn-ms-01.s b/test/MC/SystemZ/insn-ms-01.s
new file mode 100644
index 0000000..e104e09
--- /dev/null
+++ b/test/MC/SystemZ/insn-ms-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ms	%r0, 0                  # encoding: [0x71,0x00,0x00,0x00]
+#CHECK: ms	%r0, 4095               # encoding: [0x71,0x00,0x0f,0xff]
+#CHECK: ms	%r0, 0(%r1)             # encoding: [0x71,0x00,0x10,0x00]
+#CHECK: ms	%r0, 0(%r15)            # encoding: [0x71,0x00,0xf0,0x00]
+#CHECK: ms	%r0, 4095(%r1,%r15)     # encoding: [0x71,0x01,0xff,0xff]
+#CHECK: ms	%r0, 4095(%r15,%r1)     # encoding: [0x71,0x0f,0x1f,0xff]
+#CHECK: ms	%r15, 0                 # encoding: [0x71,0xf0,0x00,0x00]
+
+	ms	%r0, 0
+	ms	%r0, 4095
+	ms	%r0, 0(%r1)
+	ms	%r0, 0(%r15)
+	ms	%r0, 4095(%r1,%r15)
+	ms	%r0, 4095(%r15,%r1)
+	ms	%r15, 0
diff --git a/test/MC/SystemZ/insn-ms-02.s b/test/MC/SystemZ/insn-ms-02.s
new file mode 100644
index 0000000..9cc7ecd
--- /dev/null
+++ b/test/MC/SystemZ/insn-ms-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ms	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: ms	%r0, 4096
+
+	ms	%r0, -1
+	ms	%r0, 4096
diff --git a/test/MC/SystemZ/insn-msdb-01.s b/test/MC/SystemZ/insn-msdb-01.s
new file mode 100644
index 0000000..50ef45b
--- /dev/null
+++ b/test/MC/SystemZ/insn-msdb-01.s
@@ -0,0 +1,21 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: msdb	%f0, %f0, 0             # encoding: [0xed,0x00,0x00,0x00,0x00,0x1f]
+#CHECK: msdb	%f0, %f0, 4095          # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1f]
+#CHECK: msdb	%f0, %f0, 0(%r1)        # encoding: [0xed,0x00,0x10,0x00,0x00,0x1f]
+#CHECK: msdb	%f0, %f0, 0(%r15)       # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1f]
+#CHECK: msdb	%f0, %f0, 4095(%r1,%r15) # encoding: [0xed,0x01,0xff,0xff,0x00,0x1f]
+#CHECK: msdb	%f0, %f0, 4095(%r15,%r1) # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1f]
+#CHECK: msdb	%f0, %f15, 0            # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1f]
+#CHECK: msdb	%f15, %f0, 0            # encoding: [0xed,0x00,0x00,0x00,0xf0,0x1f]
+#CHECK: msdb	%f15, %f15, 0           # encoding: [0xed,0xf0,0x00,0x00,0xf0,0x1f]
+
+	msdb	%f0, %f0, 0
+	msdb	%f0, %f0, 4095
+	msdb	%f0, %f0, 0(%r1)
+	msdb	%f0, %f0, 0(%r15)
+	msdb	%f0, %f0, 4095(%r1,%r15)
+	msdb	%f0, %f0, 4095(%r15,%r1)
+	msdb	%f0, %f15, 0
+	msdb	%f15, %f0, 0
+	msdb	%f15, %f15, 0
diff --git a/test/MC/SystemZ/insn-msdb-02.s b/test/MC/SystemZ/insn-msdb-02.s
new file mode 100644
index 0000000..552fc72
--- /dev/null
+++ b/test/MC/SystemZ/insn-msdb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: msdb	%f0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: msdb	%f0, %f0, 4096
+
+	msdb	%f0, %f0, -1
+	msdb	%f0, %f0, 4096
diff --git a/test/MC/SystemZ/insn-msdbr-01.s b/test/MC/SystemZ/insn-msdbr-01.s
new file mode 100644
index 0000000..0c81657
--- /dev/null
+++ b/test/MC/SystemZ/insn-msdbr-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: msdbr	%f0, %f0, %f0           # encoding: [0xb3,0x1f,0x00,0x00]
+#CHECK: msdbr	%f0, %f0, %f15          # encoding: [0xb3,0x1f,0x00,0x0f]
+#CHECK: msdbr	%f0, %f15, %f0          # encoding: [0xb3,0x1f,0x00,0xf0]
+#CHECK: msdbr	%f15, %f0, %f0          # encoding: [0xb3,0x1f,0xf0,0x00]
+#CHECK: msdbr	%f7, %f8, %f9           # encoding: [0xb3,0x1f,0x70,0x89]
+#CHECK: msdbr	%f15, %f15, %f15        # encoding: [0xb3,0x1f,0xf0,0xff]
+
+	msdbr	%f0, %f0, %f0
+	msdbr	%f0, %f0, %f15
+	msdbr	%f0, %f15, %f0
+	msdbr	%f15, %f0, %f0
+	msdbr	%f7, %f8, %f9
+	msdbr	%f15, %f15, %f15
diff --git a/test/MC/SystemZ/insn-mseb-01.s b/test/MC/SystemZ/insn-mseb-01.s
new file mode 100644
index 0000000..4464cfb
--- /dev/null
+++ b/test/MC/SystemZ/insn-mseb-01.s
@@ -0,0 +1,21 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mseb	%f0, %f0, 0             # encoding: [0xed,0x00,0x00,0x00,0x00,0x0f]
+#CHECK: mseb	%f0, %f0, 4095          # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0f]
+#CHECK: mseb	%f0, %f0, 0(%r1)        # encoding: [0xed,0x00,0x10,0x00,0x00,0x0f]
+#CHECK: mseb	%f0, %f0, 0(%r15)       # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0f]
+#CHECK: mseb	%f0, %f0, 4095(%r1,%r15) # encoding: [0xed,0x01,0xff,0xff,0x00,0x0f]
+#CHECK: mseb	%f0, %f0, 4095(%r15,%r1) # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0f]
+#CHECK: mseb	%f0, %f15, 0            # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0f]
+#CHECK: mseb	%f15, %f0, 0            # encoding: [0xed,0x00,0x00,0x00,0xf0,0x0f]
+#CHECK: mseb	%f15, %f15, 0           # encoding: [0xed,0xf0,0x00,0x00,0xf0,0x0f]
+
+	mseb	%f0, %f0, 0
+	mseb	%f0, %f0, 4095
+	mseb	%f0, %f0, 0(%r1)
+	mseb	%f0, %f0, 0(%r15)
+	mseb	%f0, %f0, 4095(%r1,%r15)
+	mseb	%f0, %f0, 4095(%r15,%r1)
+	mseb	%f0, %f15, 0
+	mseb	%f15, %f0, 0
+	mseb	%f15, %f15, 0
diff --git a/test/MC/SystemZ/insn-mseb-02.s b/test/MC/SystemZ/insn-mseb-02.s
new file mode 100644
index 0000000..03aaa0f
--- /dev/null
+++ b/test/MC/SystemZ/insn-mseb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: mseb	%f0, %f0, -1
+#CHECK: error: invalid operand
+#CHECK: mseb	%f0, %f0, 4096
+
+	mseb	%f0, %f0, -1
+	mseb	%f0, %f0, 4096
diff --git a/test/MC/SystemZ/insn-msebr-01.s b/test/MC/SystemZ/insn-msebr-01.s
new file mode 100644
index 0000000..f936cb6
--- /dev/null
+++ b/test/MC/SystemZ/insn-msebr-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: msebr	%f0, %f0, %f0           # encoding: [0xb3,0x0f,0x00,0x00]
+#CHECK: msebr	%f0, %f0, %f15          # encoding: [0xb3,0x0f,0x00,0x0f]
+#CHECK: msebr	%f0, %f15, %f0          # encoding: [0xb3,0x0f,0x00,0xf0]
+#CHECK: msebr	%f15, %f0, %f0          # encoding: [0xb3,0x0f,0xf0,0x00]
+#CHECK: msebr	%f7, %f8, %f9           # encoding: [0xb3,0x0f,0x70,0x89]
+#CHECK: msebr	%f15, %f15, %f15        # encoding: [0xb3,0x0f,0xf0,0xff]
+
+	msebr	%f0, %f0, %f0
+	msebr	%f0, %f0, %f15
+	msebr	%f0, %f15, %f0
+	msebr	%f15, %f0, %f0
+	msebr	%f7, %f8, %f9
+	msebr	%f15, %f15, %f15
diff --git a/test/MC/SystemZ/insn-msfi-01.s b/test/MC/SystemZ/insn-msfi-01.s
new file mode 100644
index 0000000..629260e
--- /dev/null
+++ b/test/MC/SystemZ/insn-msfi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: msfi	%r0, -2147483648        # encoding: [0xc2,0x01,0x80,0x00,0x00,0x00]
+#CHECK: msfi	%r0, -1                 # encoding: [0xc2,0x01,0xff,0xff,0xff,0xff]
+#CHECK: msfi	%r0, 0                  # encoding: [0xc2,0x01,0x00,0x00,0x00,0x00]
+#CHECK: msfi	%r0, 1                  # encoding: [0xc2,0x01,0x00,0x00,0x00,0x01]
+#CHECK: msfi	%r0, 2147483647         # encoding: [0xc2,0x01,0x7f,0xff,0xff,0xff]
+#CHECK: msfi	%r15, 0                 # encoding: [0xc2,0xf1,0x00,0x00,0x00,0x00]
+
+	msfi	%r0, -1 << 31
+	msfi	%r0, -1
+	msfi	%r0, 0
+	msfi	%r0, 1
+	msfi	%r0, (1 << 31) - 1
+	msfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-msfi-02.s b/test/MC/SystemZ/insn-msfi-02.s
new file mode 100644
index 0000000..2700ce7
--- /dev/null
+++ b/test/MC/SystemZ/insn-msfi-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: msfi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: msfi	%r0, (1 << 31)
+
+	msfi	%r0, (-1 << 31) - 1
+	msfi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-msg-01.s b/test/MC/SystemZ/insn-msg-01.s
new file mode 100644
index 0000000..298811c
--- /dev/null
+++ b/test/MC/SystemZ/insn-msg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: msg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0c]
+#CHECK: msg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0c]
+#CHECK: msg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0c]
+#CHECK: msg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0c]
+#CHECK: msg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0c]
+#CHECK: msg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0c]
+#CHECK: msg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0c]
+#CHECK: msg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0c]
+#CHECK: msg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0c]
+#CHECK: msg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x0c]
+
+	msg	%r0, -524288
+	msg	%r0, -1
+	msg	%r0, 0
+	msg	%r0, 1
+	msg	%r0, 524287
+	msg	%r0, 0(%r1)
+	msg	%r0, 0(%r15)
+	msg	%r0, 524287(%r1,%r15)
+	msg	%r0, 524287(%r15,%r1)
+	msg	%r15, 0
diff --git a/test/MC/SystemZ/insn-msg-02.s b/test/MC/SystemZ/insn-msg-02.s
new file mode 100644
index 0000000..3326f40
--- /dev/null
+++ b/test/MC/SystemZ/insn-msg-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: msg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: msg	%r0, 524288
+
+	msg	%r0, -524289
+	msg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-msgf-01.s b/test/MC/SystemZ/insn-msgf-01.s
new file mode 100644
index 0000000..9812bcc
--- /dev/null
+++ b/test/MC/SystemZ/insn-msgf-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: msgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1c]
+#CHECK: msgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1c]
+#CHECK: msgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1c]
+#CHECK: msgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1c]
+#CHECK: msgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1c]
+#CHECK: msgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1c]
+#CHECK: msgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1c]
+#CHECK: msgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1c]
+#CHECK: msgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1c]
+#CHECK: msgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x1c]
+
+	msgf	%r0, -524288
+	msgf	%r0, -1
+	msgf	%r0, 0
+	msgf	%r0, 1
+	msgf	%r0, 524287
+	msgf	%r0, 0(%r1)
+	msgf	%r0, 0(%r15)
+	msgf	%r0, 524287(%r1,%r15)
+	msgf	%r0, 524287(%r15,%r1)
+	msgf	%r15, 0
diff --git a/test/MC/SystemZ/insn-msgf-02.s b/test/MC/SystemZ/insn-msgf-02.s
new file mode 100644
index 0000000..03983b3
--- /dev/null
+++ b/test/MC/SystemZ/insn-msgf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: msgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: msgf	%r0, 524288
+
+	msgf	%r0, -524289
+	msgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-msgfi-01.s b/test/MC/SystemZ/insn-msgfi-01.s
new file mode 100644
index 0000000..802ad14
--- /dev/null
+++ b/test/MC/SystemZ/insn-msgfi-01.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: msgfi	%r0, -2147483648        # encoding: [0xc2,0x00,0x80,0x00,0x00,0x00]
+#CHECK: msgfi	%r0, -1                 # encoding: [0xc2,0x00,0xff,0xff,0xff,0xff]
+#CHECK: msgfi	%r0, 0                  # encoding: [0xc2,0x00,0x00,0x00,0x00,0x00]
+#CHECK: msgfi	%r0, 1                  # encoding: [0xc2,0x00,0x00,0x00,0x00,0x01]
+#CHECK: msgfi	%r0, 2147483647         # encoding: [0xc2,0x00,0x7f,0xff,0xff,0xff]
+#CHECK: msgfi	%r15, 0                 # encoding: [0xc2,0xf0,0x00,0x00,0x00,0x00]
+
+	msgfi	%r0, -1 << 31
+	msgfi	%r0, -1
+	msgfi	%r0, 0
+	msgfi	%r0, 1
+	msgfi	%r0, (1 << 31) - 1
+	msgfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-msgfi-02.s b/test/MC/SystemZ/insn-msgfi-02.s
new file mode 100644
index 0000000..82e1f8f
--- /dev/null
+++ b/test/MC/SystemZ/insn-msgfi-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: msgfi	%r0, (-1 << 31) - 1
+#CHECK: error: invalid operand
+#CHECK: msgfi	%r0, (1 << 31)
+
+	msgfi	%r0, (-1 << 31) - 1
+	msgfi	%r0, (1 << 31)
diff --git a/test/MC/SystemZ/insn-msgfr-01.s b/test/MC/SystemZ/insn-msgfr-01.s
new file mode 100644
index 0000000..e25f630
--- /dev/null
+++ b/test/MC/SystemZ/insn-msgfr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: msgfr	%r0, %r0                # encoding: [0xb9,0x1c,0x00,0x00]
+#CHECK: msgfr	%r0, %r15               # encoding: [0xb9,0x1c,0x00,0x0f]
+#CHECK: msgfr	%r15, %r0               # encoding: [0xb9,0x1c,0x00,0xf0]
+#CHECK: msgfr	%r7, %r8                # encoding: [0xb9,0x1c,0x00,0x78]
+
+	msgfr	%r0,%r0
+	msgfr	%r0,%r15
+	msgfr	%r15,%r0
+	msgfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-msgr-01.s b/test/MC/SystemZ/insn-msgr-01.s
new file mode 100644
index 0000000..0b9cd36
--- /dev/null
+++ b/test/MC/SystemZ/insn-msgr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: msgr	%r0, %r0                # encoding: [0xb9,0x0c,0x00,0x00]
+#CHECK: msgr	%r0, %r15               # encoding: [0xb9,0x0c,0x00,0x0f]
+#CHECK: msgr	%r15, %r0               # encoding: [0xb9,0x0c,0x00,0xf0]
+#CHECK: msgr	%r7, %r8                # encoding: [0xb9,0x0c,0x00,0x78]
+
+	msgr	%r0,%r0
+	msgr	%r0,%r15
+	msgr	%r15,%r0
+	msgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-msr-01.s b/test/MC/SystemZ/insn-msr-01.s
new file mode 100644
index 0000000..6f7d917
--- /dev/null
+++ b/test/MC/SystemZ/insn-msr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: msr	%r0, %r0                # encoding: [0xb2,0x52,0x00,0x00]
+#CHECK: msr	%r0, %r15               # encoding: [0xb2,0x52,0x00,0x0f]
+#CHECK: msr	%r15, %r0               # encoding: [0xb2,0x52,0x00,0xf0]
+#CHECK: msr	%r7, %r8                # encoding: [0xb2,0x52,0x00,0x78]
+
+	msr	%r0,%r0
+	msr	%r0,%r15
+	msr	%r15,%r0
+	msr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-msy-01.s b/test/MC/SystemZ/insn-msy-01.s
new file mode 100644
index 0000000..aed9318
--- /dev/null
+++ b/test/MC/SystemZ/insn-msy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: msy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x51]
+#CHECK: msy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x51]
+#CHECK: msy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x51]
+#CHECK: msy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x51]
+#CHECK: msy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x51]
+#CHECK: msy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x51]
+#CHECK: msy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x51]
+#CHECK: msy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x51]
+#CHECK: msy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x51]
+#CHECK: msy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x51]
+
+	msy	%r0, -524288
+	msy	%r0, -1
+	msy	%r0, 0
+	msy	%r0, 1
+	msy	%r0, 524287
+	msy	%r0, 0(%r1)
+	msy	%r0, 0(%r15)
+	msy	%r0, 524287(%r1,%r15)
+	msy	%r0, 524287(%r15,%r1)
+	msy	%r15, 0
diff --git a/test/MC/SystemZ/insn-msy-02.s b/test/MC/SystemZ/insn-msy-02.s
new file mode 100644
index 0000000..6f10069
--- /dev/null
+++ b/test/MC/SystemZ/insn-msy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: msy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: msy	%r0, 524288
+
+	msy	%r0, -524289
+	msy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-mvghi-01.s b/test/MC/SystemZ/insn-mvghi-01.s
new file mode 100644
index 0000000..191aa49
--- /dev/null
+++ b/test/MC/SystemZ/insn-mvghi-01.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mvghi	0, 0                    # encoding: [0xe5,0x48,0x00,0x00,0x00,0x00]
+#CHECK: mvghi	4095, 0                 # encoding: [0xe5,0x48,0x0f,0xff,0x00,0x00]
+#CHECK: mvghi	0, -32768               # encoding: [0xe5,0x48,0x00,0x00,0x80,0x00]
+#CHECK: mvghi	0, -1                   # encoding: [0xe5,0x48,0x00,0x00,0xff,0xff]
+#CHECK: mvghi	0, 0                    # encoding: [0xe5,0x48,0x00,0x00,0x00,0x00]
+#CHECK: mvghi	0, 1                    # encoding: [0xe5,0x48,0x00,0x00,0x00,0x01]
+#CHECK: mvghi	0, 32767                # encoding: [0xe5,0x48,0x00,0x00,0x7f,0xff]
+#CHECK: mvghi	0(%r1), 42              # encoding: [0xe5,0x48,0x10,0x00,0x00,0x2a]
+#CHECK: mvghi	0(%r15), 42             # encoding: [0xe5,0x48,0xf0,0x00,0x00,0x2a]
+#CHECK: mvghi	4095(%r1), 42           # encoding: [0xe5,0x48,0x1f,0xff,0x00,0x2a]
+#CHECK: mvghi	4095(%r15), 42          # encoding: [0xe5,0x48,0xff,0xff,0x00,0x2a]
+
+	mvghi	0, 0
+	mvghi	4095, 0
+	mvghi	0, -32768
+	mvghi	0, -1
+	mvghi	0, 0
+	mvghi	0, 1
+	mvghi	0, 32767
+	mvghi	0(%r1), 42
+	mvghi	0(%r15), 42
+	mvghi	4095(%r1), 42
+	mvghi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-mvghi-02.s b/test/MC/SystemZ/insn-mvghi-02.s
new file mode 100644
index 0000000..38b38a5
--- /dev/null
+++ b/test/MC/SystemZ/insn-mvghi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: mvghi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: mvghi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvghi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: mvghi	0, -32769
+#CHECK: error: invalid operand
+#CHECK: mvghi	0, 32768
+
+	mvghi	-1, 0
+	mvghi	4096, 0
+	mvghi	0(%r1,%r2), 0
+	mvghi	0, -32769
+	mvghi	0, 32768
diff --git a/test/MC/SystemZ/insn-mvhhi-01.s b/test/MC/SystemZ/insn-mvhhi-01.s
new file mode 100644
index 0000000..63574a4
--- /dev/null
+++ b/test/MC/SystemZ/insn-mvhhi-01.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mvhhi	0, 0                    # encoding: [0xe5,0x44,0x00,0x00,0x00,0x00]
+#CHECK: mvhhi	4095, 0                 # encoding: [0xe5,0x44,0x0f,0xff,0x00,0x00]
+#CHECK: mvhhi	0, -32768               # encoding: [0xe5,0x44,0x00,0x00,0x80,0x00]
+#CHECK: mvhhi	0, -1                   # encoding: [0xe5,0x44,0x00,0x00,0xff,0xff]
+#CHECK: mvhhi	0, 0                    # encoding: [0xe5,0x44,0x00,0x00,0x00,0x00]
+#CHECK: mvhhi	0, 1                    # encoding: [0xe5,0x44,0x00,0x00,0x00,0x01]
+#CHECK: mvhhi	0, 32767                # encoding: [0xe5,0x44,0x00,0x00,0x7f,0xff]
+#CHECK: mvhhi	0(%r1), 42              # encoding: [0xe5,0x44,0x10,0x00,0x00,0x2a]
+#CHECK: mvhhi	0(%r15), 42             # encoding: [0xe5,0x44,0xf0,0x00,0x00,0x2a]
+#CHECK: mvhhi	4095(%r1), 42           # encoding: [0xe5,0x44,0x1f,0xff,0x00,0x2a]
+#CHECK: mvhhi	4095(%r15), 42          # encoding: [0xe5,0x44,0xff,0xff,0x00,0x2a]
+
+	mvhhi	0, 0
+	mvhhi	4095, 0
+	mvhhi	0, -32768
+	mvhhi	0, -1
+	mvhhi	0, 0
+	mvhhi	0, 1
+	mvhhi	0, 32767
+	mvhhi	0(%r1), 42
+	mvhhi	0(%r15), 42
+	mvhhi	4095(%r1), 42
+	mvhhi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-mvhhi-02.s b/test/MC/SystemZ/insn-mvhhi-02.s
new file mode 100644
index 0000000..58abb02
--- /dev/null
+++ b/test/MC/SystemZ/insn-mvhhi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: mvhhi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: mvhhi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvhhi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: mvhhi	0, -32769
+#CHECK: error: invalid operand
+#CHECK: mvhhi	0, 32768
+
+	mvhhi	-1, 0
+	mvhhi	4096, 0
+	mvhhi	0(%r1,%r2), 0
+	mvhhi	0, -32769
+	mvhhi	0, 32768
diff --git a/test/MC/SystemZ/insn-mvhi-01.s b/test/MC/SystemZ/insn-mvhi-01.s
new file mode 100644
index 0000000..5bf9fd3
--- /dev/null
+++ b/test/MC/SystemZ/insn-mvhi-01.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mvhi	0, 0                    # encoding: [0xe5,0x4c,0x00,0x00,0x00,0x00]
+#CHECK: mvhi	4095, 0                 # encoding: [0xe5,0x4c,0x0f,0xff,0x00,0x00]
+#CHECK: mvhi	0, -32768               # encoding: [0xe5,0x4c,0x00,0x00,0x80,0x00]
+#CHECK: mvhi	0, -1                   # encoding: [0xe5,0x4c,0x00,0x00,0xff,0xff]
+#CHECK: mvhi	0, 0                    # encoding: [0xe5,0x4c,0x00,0x00,0x00,0x00]
+#CHECK: mvhi	0, 1                    # encoding: [0xe5,0x4c,0x00,0x00,0x00,0x01]
+#CHECK: mvhi	0, 32767                # encoding: [0xe5,0x4c,0x00,0x00,0x7f,0xff]
+#CHECK: mvhi	0(%r1), 42              # encoding: [0xe5,0x4c,0x10,0x00,0x00,0x2a]
+#CHECK: mvhi	0(%r15), 42             # encoding: [0xe5,0x4c,0xf0,0x00,0x00,0x2a]
+#CHECK: mvhi	4095(%r1), 42           # encoding: [0xe5,0x4c,0x1f,0xff,0x00,0x2a]
+#CHECK: mvhi	4095(%r15), 42          # encoding: [0xe5,0x4c,0xff,0xff,0x00,0x2a]
+
+	mvhi	0, 0
+	mvhi	4095, 0
+	mvhi	0, -32768
+	mvhi	0, -1
+	mvhi	0, 0
+	mvhi	0, 1
+	mvhi	0, 32767
+	mvhi	0(%r1), 42
+	mvhi	0(%r15), 42
+	mvhi	4095(%r1), 42
+	mvhi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-mvhi-02.s b/test/MC/SystemZ/insn-mvhi-02.s
new file mode 100644
index 0000000..517301c
--- /dev/null
+++ b/test/MC/SystemZ/insn-mvhi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: mvhi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: mvhi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvhi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: mvhi	0, -32769
+#CHECK: error: invalid operand
+#CHECK: mvhi	0, 32768
+
+	mvhi	-1, 0
+	mvhi	4096, 0
+	mvhi	0(%r1,%r2), 0
+	mvhi	0, -32769
+	mvhi	0, 32768
diff --git a/test/MC/SystemZ/insn-mvi-01.s b/test/MC/SystemZ/insn-mvi-01.s
new file mode 100644
index 0000000..83e3090
--- /dev/null
+++ b/test/MC/SystemZ/insn-mvi-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mvi	0, 0                    # encoding: [0x92,0x00,0x00,0x00]
+#CHECK: mvi	4095, 0                 # encoding: [0x92,0x00,0x0f,0xff]
+#CHECK: mvi	0, 255                  # encoding: [0x92,0xff,0x00,0x00]
+#CHECK: mvi	0(%r1), 42              # encoding: [0x92,0x2a,0x10,0x00]
+#CHECK: mvi	0(%r15), 42             # encoding: [0x92,0x2a,0xf0,0x00]
+#CHECK: mvi	4095(%r1), 42           # encoding: [0x92,0x2a,0x1f,0xff]
+#CHECK: mvi	4095(%r15), 42          # encoding: [0x92,0x2a,0xff,0xff]
+
+	mvi	0, 0
+	mvi	4095, 0
+	mvi	0, 255
+	mvi	0(%r1), 42
+	mvi	0(%r15), 42
+	mvi	4095(%r1), 42
+	mvi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-mvi-02.s b/test/MC/SystemZ/insn-mvi-02.s
new file mode 100644
index 0000000..ddd5909
--- /dev/null
+++ b/test/MC/SystemZ/insn-mvi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: mvi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: mvi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: mvi	0, -1
+#CHECK: error: invalid operand
+#CHECK: mvi	0, 256
+
+	mvi	-1, 0
+	mvi	4096, 0
+	mvi	0(%r1,%r2), 0
+	mvi	0, -1
+	mvi	0, 256
diff --git a/test/MC/SystemZ/insn-mviy-01.s b/test/MC/SystemZ/insn-mviy-01.s
new file mode 100644
index 0000000..8bd6979
--- /dev/null
+++ b/test/MC/SystemZ/insn-mviy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mviy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x52]
+#CHECK: mviy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x52]
+#CHECK: mviy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x52]
+#CHECK: mviy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x52]
+#CHECK: mviy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x52]
+#CHECK: mviy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x52]
+#CHECK: mviy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x52]
+#CHECK: mviy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x52]
+#CHECK: mviy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x52]
+#CHECK: mviy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x52]
+
+	mviy	-524288, 0
+	mviy	-1, 0
+	mviy	0, 0
+	mviy	1, 0
+	mviy	524287, 0
+	mviy	0, 255
+	mviy	0(%r1), 42
+	mviy	0(%r15), 42
+	mviy	524287(%r1), 42
+	mviy	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-mviy-02.s b/test/MC/SystemZ/insn-mviy-02.s
new file mode 100644
index 0000000..ab78dab
--- /dev/null
+++ b/test/MC/SystemZ/insn-mviy-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: mviy	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: mviy	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mviy	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: mviy	0, -1
+#CHECK: error: invalid operand
+#CHECK: mviy	0, 256
+
+	mviy	-524289, 0
+	mviy	524288, 0
+	mviy	0(%r1,%r2), 0
+	mviy	0, -1
+	mviy	0, 256
diff --git a/test/MC/SystemZ/insn-mxbr-01.s b/test/MC/SystemZ/insn-mxbr-01.s
new file mode 100644
index 0000000..60c8ebad
--- /dev/null
+++ b/test/MC/SystemZ/insn-mxbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mxbr	%f0, %f0                # encoding: [0xb3,0x4c,0x00,0x00]
+#CHECK: mxbr	%f0, %f13               # encoding: [0xb3,0x4c,0x00,0x0d]
+#CHECK: mxbr	%f8, %f5                # encoding: [0xb3,0x4c,0x00,0x85]
+#CHECK: mxbr	%f13, %f13              # encoding: [0xb3,0x4c,0x00,0xdd]
+
+	mxbr	%f0, %f0
+	mxbr	%f0, %f13
+	mxbr	%f8, %f5
+	mxbr	%f13, %f13
diff --git a/test/MC/SystemZ/insn-mxbr-02.s b/test/MC/SystemZ/insn-mxbr-02.s
new file mode 100644
index 0000000..9282023
--- /dev/null
+++ b/test/MC/SystemZ/insn-mxbr-02.s
@@ -0,0 +1,17 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: mxbr	%f0, %f2
+#CHECK: error: invalid register
+#CHECK: mxbr	%f0, %f14
+#CHECK: error: invalid register
+#CHECK: mxbr	%f2, %f0
+#CHECK: error: invalid register
+#CHECK: mxbr	%f14, %f0
+
+	mxbr	%f0, %f2
+	mxbr	%f0, %f14
+	mxbr	%f2, %f0
+	mxbr	%f14, %f0
+
diff --git a/test/MC/SystemZ/insn-mxdb-01.s b/test/MC/SystemZ/insn-mxdb-01.s
new file mode 100644
index 0000000..46a723e
--- /dev/null
+++ b/test/MC/SystemZ/insn-mxdb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mxdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x07]
+#CHECK: mxdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x07]
+#CHECK: mxdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x07]
+#CHECK: mxdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x07]
+#CHECK: mxdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x07]
+#CHECK: mxdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x07]
+#CHECK: mxdb	%f13, 0                 # encoding: [0xed,0xd0,0x00,0x00,0x00,0x07]
+
+	mxdb	%f0, 0
+	mxdb	%f0, 4095
+	mxdb	%f0, 0(%r1)
+	mxdb	%f0, 0(%r15)
+	mxdb	%f0, 4095(%r1,%r15)
+	mxdb	%f0, 4095(%r15,%r1)
+	mxdb	%f13, 0
diff --git a/test/MC/SystemZ/insn-mxdb-02.s b/test/MC/SystemZ/insn-mxdb-02.s
new file mode 100644
index 0000000..44c821ca
--- /dev/null
+++ b/test/MC/SystemZ/insn-mxdb-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: mxdb	%f2, 0
+#CHECK: error: invalid register
+#CHECK: mxdb	%f15, 0
+#CHECK: error: invalid operand
+#CHECK: mxdb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: mxdb	%f0, 4096
+
+	mxdb	%f2, 0
+	mxdb	%f15, 0
+	mxdb	%f0, -1
+	mxdb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-mxdbr-01.s b/test/MC/SystemZ/insn-mxdbr-01.s
new file mode 100644
index 0000000..dfb898f
--- /dev/null
+++ b/test/MC/SystemZ/insn-mxdbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: mxdbr	%f0, %f0                # encoding: [0xb3,0x07,0x00,0x00]
+#CHECK: mxdbr	%f0, %f15               # encoding: [0xb3,0x07,0x00,0x0f]
+#CHECK: mxdbr	%f8, %f8                # encoding: [0xb3,0x07,0x00,0x88]
+#CHECK: mxdbr	%f13, %f0               # encoding: [0xb3,0x07,0x00,0xd0]
+
+	mxdbr	%f0, %f0
+	mxdbr	%f0, %f15
+	mxdbr	%f8, %f8
+	mxdbr	%f13, %f0
diff --git a/test/MC/SystemZ/insn-mxdbr-02.s b/test/MC/SystemZ/insn-mxdbr-02.s
new file mode 100644
index 0000000..9026048
--- /dev/null
+++ b/test/MC/SystemZ/insn-mxdbr-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: mxdbr	%f2, %f0
+#CHECK: error: invalid register
+#CHECK: mxdbr	%f15, %f0
+
+	mxdbr	%f2, %f0
+	mxdbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-n-01.s b/test/MC/SystemZ/insn-n-01.s
new file mode 100644
index 0000000..75fa141
--- /dev/null
+++ b/test/MC/SystemZ/insn-n-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: n	%r0, 0                  # encoding: [0x54,0x00,0x00,0x00]
+#CHECK: n	%r0, 4095               # encoding: [0x54,0x00,0x0f,0xff]
+#CHECK: n	%r0, 0(%r1)             # encoding: [0x54,0x00,0x10,0x00]
+#CHECK: n	%r0, 0(%r15)            # encoding: [0x54,0x00,0xf0,0x00]
+#CHECK: n	%r0, 4095(%r1,%r15)     # encoding: [0x54,0x01,0xff,0xff]
+#CHECK: n	%r0, 4095(%r15,%r1)     # encoding: [0x54,0x0f,0x1f,0xff]
+#CHECK: n	%r15, 0                 # encoding: [0x54,0xf0,0x00,0x00]
+
+	n	%r0, 0
+	n	%r0, 4095
+	n	%r0, 0(%r1)
+	n	%r0, 0(%r15)
+	n	%r0, 4095(%r1,%r15)
+	n	%r0, 4095(%r15,%r1)
+	n	%r15, 0
diff --git a/test/MC/SystemZ/insn-n-02.s b/test/MC/SystemZ/insn-n-02.s
new file mode 100644
index 0000000..7c14b1f
--- /dev/null
+++ b/test/MC/SystemZ/insn-n-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: n	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: n	%r0, 4096
+
+	n	%r0, -1
+	n	%r0, 4096
diff --git a/test/MC/SystemZ/insn-ng-01.s b/test/MC/SystemZ/insn-ng-01.s
new file mode 100644
index 0000000..bf71a21
--- /dev/null
+++ b/test/MC/SystemZ/insn-ng-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ng	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x80]
+#CHECK: ng	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x80]
+#CHECK: ng	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x80]
+#CHECK: ng	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x80]
+#CHECK: ng	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x80]
+#CHECK: ng	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x80]
+#CHECK: ng	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x80]
+#CHECK: ng	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x80]
+#CHECK: ng	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x80]
+#CHECK: ng	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x80]
+
+	ng	%r0, -524288
+	ng	%r0, -1
+	ng	%r0, 0
+	ng	%r0, 1
+	ng	%r0, 524287
+	ng	%r0, 0(%r1)
+	ng	%r0, 0(%r15)
+	ng	%r0, 524287(%r1,%r15)
+	ng	%r0, 524287(%r15,%r1)
+	ng	%r15, 0
diff --git a/test/MC/SystemZ/insn-ng-02.s b/test/MC/SystemZ/insn-ng-02.s
new file mode 100644
index 0000000..a6f3260
--- /dev/null
+++ b/test/MC/SystemZ/insn-ng-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ng	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ng	%r0, 524288
+
+	ng	%r0, -524289
+	ng	%r0, 524288
diff --git a/test/MC/SystemZ/insn-ngr-01.s b/test/MC/SystemZ/insn-ngr-01.s
new file mode 100644
index 0000000..714b9fa
--- /dev/null
+++ b/test/MC/SystemZ/insn-ngr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ngr	%r0, %r0                # encoding: [0xb9,0x80,0x00,0x00]
+#CHECK: ngr	%r0, %r15               # encoding: [0xb9,0x80,0x00,0x0f]
+#CHECK: ngr	%r15, %r0               # encoding: [0xb9,0x80,0x00,0xf0]
+#CHECK: ngr	%r7, %r8                # encoding: [0xb9,0x80,0x00,0x78]
+
+	ngr	%r0,%r0
+	ngr	%r0,%r15
+	ngr	%r15,%r0
+	ngr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-ni-01.s b/test/MC/SystemZ/insn-ni-01.s
new file mode 100644
index 0000000..d075674
--- /dev/null
+++ b/test/MC/SystemZ/insn-ni-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ni	0, 0                    # encoding: [0x94,0x00,0x00,0x00]
+#CHECK: ni	4095, 0                 # encoding: [0x94,0x00,0x0f,0xff]
+#CHECK: ni	0, 255                  # encoding: [0x94,0xff,0x00,0x00]
+#CHECK: ni	0(%r1), 42              # encoding: [0x94,0x2a,0x10,0x00]
+#CHECK: ni	0(%r15), 42             # encoding: [0x94,0x2a,0xf0,0x00]
+#CHECK: ni	4095(%r1), 42           # encoding: [0x94,0x2a,0x1f,0xff]
+#CHECK: ni	4095(%r15), 42          # encoding: [0x94,0x2a,0xff,0xff]
+
+	ni	0, 0
+	ni	4095, 0
+	ni	0, 255
+	ni	0(%r1), 42
+	ni	0(%r15), 42
+	ni	4095(%r1), 42
+	ni	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-ni-02.s b/test/MC/SystemZ/insn-ni-02.s
new file mode 100644
index 0000000..1b9a6a7
--- /dev/null
+++ b/test/MC/SystemZ/insn-ni-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ni	-1, 0
+#CHECK: error: invalid operand
+#CHECK: ni	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: ni	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: ni	0, -1
+#CHECK: error: invalid operand
+#CHECK: ni	0, 256
+
+	ni	-1, 0
+	ni	4096, 0
+	ni	0(%r1,%r2), 0
+	ni	0, -1
+	ni	0, 256
diff --git a/test/MC/SystemZ/insn-nihf-01.s b/test/MC/SystemZ/insn-nihf-01.s
new file mode 100644
index 0000000..dceb8d1
--- /dev/null
+++ b/test/MC/SystemZ/insn-nihf-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: nihf	%r0, 0                  # encoding: [0xc0,0x0a,0x00,0x00,0x00,0x00]
+#CHECK: nihf	%r0, 4294967295         # encoding: [0xc0,0x0a,0xff,0xff,0xff,0xff]
+#CHECK: nihf	%r15, 0                 # encoding: [0xc0,0xfa,0x00,0x00,0x00,0x00]
+
+	nihf	%r0, 0
+	nihf	%r0, 0xffffffff
+	nihf	%r15, 0
diff --git a/test/MC/SystemZ/insn-nihf-02.s b/test/MC/SystemZ/insn-nihf-02.s
new file mode 100644
index 0000000..5f7f10a
--- /dev/null
+++ b/test/MC/SystemZ/insn-nihf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: nihf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: nihf	%r0, 1 << 32
+
+	nihf	%r0, -1
+	nihf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-nihh-01.s b/test/MC/SystemZ/insn-nihh-01.s
new file mode 100644
index 0000000..a87540d
--- /dev/null
+++ b/test/MC/SystemZ/insn-nihh-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: nihh	%r0, 0                  # encoding: [0xa5,0x04,0x00,0x00]
+#CHECK: nihh	%r0, 32768              # encoding: [0xa5,0x04,0x80,0x00]
+#CHECK: nihh	%r0, 65535              # encoding: [0xa5,0x04,0xff,0xff]
+#CHECK: nihh	%r15, 0                 # encoding: [0xa5,0xf4,0x00,0x00]
+
+	nihh	%r0, 0
+	nihh	%r0, 0x8000
+	nihh	%r0, 0xffff
+	nihh	%r15, 0
diff --git a/test/MC/SystemZ/insn-nihh-02.s b/test/MC/SystemZ/insn-nihh-02.s
new file mode 100644
index 0000000..3df88e4
--- /dev/null
+++ b/test/MC/SystemZ/insn-nihh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: nihh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: nihh	%r0, 0x10000
+
+	nihh	%r0, -1
+	nihh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-nihl-01.s b/test/MC/SystemZ/insn-nihl-01.s
new file mode 100644
index 0000000..6eab58c
--- /dev/null
+++ b/test/MC/SystemZ/insn-nihl-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: nihl	%r0, 0                  # encoding: [0xa5,0x05,0x00,0x00]
+#CHECK: nihl	%r0, 32768              # encoding: [0xa5,0x05,0x80,0x00]
+#CHECK: nihl	%r0, 65535              # encoding: [0xa5,0x05,0xff,0xff]
+#CHECK: nihl	%r15, 0                 # encoding: [0xa5,0xf5,0x00,0x00]
+
+	nihl	%r0, 0
+	nihl	%r0, 0x8000
+	nihl	%r0, 0xffff
+	nihl	%r15, 0
diff --git a/test/MC/SystemZ/insn-nihl-02.s b/test/MC/SystemZ/insn-nihl-02.s
new file mode 100644
index 0000000..6e2d52f
--- /dev/null
+++ b/test/MC/SystemZ/insn-nihl-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: nihl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: nihl	%r0, 0x10000
+
+	nihl	%r0, -1
+	nihl	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-nilf-01.s b/test/MC/SystemZ/insn-nilf-01.s
new file mode 100644
index 0000000..0b3a13e
--- /dev/null
+++ b/test/MC/SystemZ/insn-nilf-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: nilf	%r0, 0                  # encoding: [0xc0,0x0b,0x00,0x00,0x00,0x00]
+#CHECK: nilf	%r0, 4294967295         # encoding: [0xc0,0x0b,0xff,0xff,0xff,0xff]
+#CHECK: nilf	%r15, 0                 # encoding: [0xc0,0xfb,0x00,0x00,0x00,0x00]
+
+	nilf	%r0, 0
+	nilf	%r0, 0xffffffff
+	nilf	%r15, 0
diff --git a/test/MC/SystemZ/insn-nilf-02.s b/test/MC/SystemZ/insn-nilf-02.s
new file mode 100644
index 0000000..87b65e4
--- /dev/null
+++ b/test/MC/SystemZ/insn-nilf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: nilf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: nilf	%r0, 1 << 32
+
+	nilf	%r0, -1
+	nilf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-nilh-01.s b/test/MC/SystemZ/insn-nilh-01.s
new file mode 100644
index 0000000..4bc9353
--- /dev/null
+++ b/test/MC/SystemZ/insn-nilh-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: nilh	%r0, 0                  # encoding: [0xa5,0x06,0x00,0x00]
+#CHECK: nilh	%r0, 32768              # encoding: [0xa5,0x06,0x80,0x00]
+#CHECK: nilh	%r0, 65535              # encoding: [0xa5,0x06,0xff,0xff]
+#CHECK: nilh	%r15, 0                 # encoding: [0xa5,0xf6,0x00,0x00]
+
+	nilh	%r0, 0
+	nilh	%r0, 0x8000
+	nilh	%r0, 0xffff
+	nilh	%r15, 0
diff --git a/test/MC/SystemZ/insn-nilh-02.s b/test/MC/SystemZ/insn-nilh-02.s
new file mode 100644
index 0000000..ae5a852
--- /dev/null
+++ b/test/MC/SystemZ/insn-nilh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: nilh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: nilh	%r0, 0x10000
+
+	nilh	%r0, -1
+	nilh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-nill-01.s b/test/MC/SystemZ/insn-nill-01.s
new file mode 100644
index 0000000..5f4f877
--- /dev/null
+++ b/test/MC/SystemZ/insn-nill-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: nill	%r0, 0                  # encoding: [0xa5,0x07,0x00,0x00]
+#CHECK: nill	%r0, 32768              # encoding: [0xa5,0x07,0x80,0x00]
+#CHECK: nill	%r0, 65535              # encoding: [0xa5,0x07,0xff,0xff]
+#CHECK: nill	%r15, 0                 # encoding: [0xa5,0xf7,0x00,0x00]
+
+	nill	%r0, 0
+	nill	%r0, 0x8000
+	nill	%r0, 0xffff
+	nill	%r15, 0
diff --git a/test/MC/SystemZ/insn-nill-02.s b/test/MC/SystemZ/insn-nill-02.s
new file mode 100644
index 0000000..27fbc4a
--- /dev/null
+++ b/test/MC/SystemZ/insn-nill-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: nill	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: nill	%r0, 0x10000
+
+	nill	%r0, -1
+	nill	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-niy-01.s b/test/MC/SystemZ/insn-niy-01.s
new file mode 100644
index 0000000..4c007e9
--- /dev/null
+++ b/test/MC/SystemZ/insn-niy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: niy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x54]
+#CHECK: niy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x54]
+#CHECK: niy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x54]
+#CHECK: niy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x54]
+#CHECK: niy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x54]
+#CHECK: niy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x54]
+#CHECK: niy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x54]
+#CHECK: niy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x54]
+#CHECK: niy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x54]
+#CHECK: niy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x54]
+
+	niy	-524288, 0
+	niy	-1, 0
+	niy	0, 0
+	niy	1, 0
+	niy	524287, 0
+	niy	0, 255
+	niy	0(%r1), 42
+	niy	0(%r15), 42
+	niy	524287(%r1), 42
+	niy	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-niy-02.s b/test/MC/SystemZ/insn-niy-02.s
new file mode 100644
index 0000000..ca398e6
--- /dev/null
+++ b/test/MC/SystemZ/insn-niy-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: niy	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: niy	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: niy	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: niy	0, -1
+#CHECK: error: invalid operand
+#CHECK: niy	0, 256
+
+	niy	-524289, 0
+	niy	524288, 0
+	niy	0(%r1,%r2), 0
+	niy	0, -1
+	niy	0, 256
diff --git a/test/MC/SystemZ/insn-nr-01.s b/test/MC/SystemZ/insn-nr-01.s
new file mode 100644
index 0000000..c10216d
--- /dev/null
+++ b/test/MC/SystemZ/insn-nr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: nr	%r0, %r0                # encoding: [0x14,0x00]
+#CHECK: nr	%r0, %r15               # encoding: [0x14,0x0f]
+#CHECK: nr	%r15, %r0               # encoding: [0x14,0xf0]
+#CHECK: nr	%r7, %r8                # encoding: [0x14,0x78]
+
+	nr	%r0,%r0
+	nr	%r0,%r15
+	nr	%r15,%r0
+	nr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-ny-01.s b/test/MC/SystemZ/insn-ny-01.s
new file mode 100644
index 0000000..a12bb67
--- /dev/null
+++ b/test/MC/SystemZ/insn-ny-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ny	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x54]
+#CHECK: ny	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x54]
+#CHECK: ny	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x54]
+#CHECK: ny	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x54]
+#CHECK: ny	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x54]
+#CHECK: ny	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x54]
+#CHECK: ny	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x54]
+#CHECK: ny	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x54]
+#CHECK: ny	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x54]
+#CHECK: ny	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x54]
+
+	ny	%r0, -524288
+	ny	%r0, -1
+	ny	%r0, 0
+	ny	%r0, 1
+	ny	%r0, 524287
+	ny	%r0, 0(%r1)
+	ny	%r0, 0(%r15)
+	ny	%r0, 524287(%r1,%r15)
+	ny	%r0, 524287(%r15,%r1)
+	ny	%r15, 0
diff --git a/test/MC/SystemZ/insn-ny-02.s b/test/MC/SystemZ/insn-ny-02.s
new file mode 100644
index 0000000..5f53ebd3
--- /dev/null
+++ b/test/MC/SystemZ/insn-ny-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ny	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: ny	%r0, 524288
+
+	ny	%r0, -524289
+	ny	%r0, 524288
diff --git a/test/MC/SystemZ/insn-o-01.s b/test/MC/SystemZ/insn-o-01.s
new file mode 100644
index 0000000..0c74e9c
--- /dev/null
+++ b/test/MC/SystemZ/insn-o-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: o	%r0, 0                  # encoding: [0x56,0x00,0x00,0x00]
+#CHECK: o	%r0, 4095               # encoding: [0x56,0x00,0x0f,0xff]
+#CHECK: o	%r0, 0(%r1)             # encoding: [0x56,0x00,0x10,0x00]
+#CHECK: o	%r0, 0(%r15)            # encoding: [0x56,0x00,0xf0,0x00]
+#CHECK: o	%r0, 4095(%r1,%r15)     # encoding: [0x56,0x01,0xff,0xff]
+#CHECK: o	%r0, 4095(%r15,%r1)     # encoding: [0x56,0x0f,0x1f,0xff]
+#CHECK: o	%r15, 0                 # encoding: [0x56,0xf0,0x00,0x00]
+
+	o	%r0, 0
+	o	%r0, 4095
+	o	%r0, 0(%r1)
+	o	%r0, 0(%r15)
+	o	%r0, 4095(%r1,%r15)
+	o	%r0, 4095(%r15,%r1)
+	o	%r15, 0
diff --git a/test/MC/SystemZ/insn-o-02.s b/test/MC/SystemZ/insn-o-02.s
new file mode 100644
index 0000000..34b7418
--- /dev/null
+++ b/test/MC/SystemZ/insn-o-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: o	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: o	%r0, 4096
+
+	o	%r0, -1
+	o	%r0, 4096
diff --git a/test/MC/SystemZ/insn-og-01.s b/test/MC/SystemZ/insn-og-01.s
new file mode 100644
index 0000000..3c9811b
--- /dev/null
+++ b/test/MC/SystemZ/insn-og-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: og	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x81]
+#CHECK: og	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x81]
+#CHECK: og	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x81]
+#CHECK: og	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x81]
+#CHECK: og	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x81]
+#CHECK: og	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x81]
+#CHECK: og	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x81]
+#CHECK: og	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x81]
+#CHECK: og	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x81]
+#CHECK: og	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x81]
+
+	og	%r0, -524288
+	og	%r0, -1
+	og	%r0, 0
+	og	%r0, 1
+	og	%r0, 524287
+	og	%r0, 0(%r1)
+	og	%r0, 0(%r15)
+	og	%r0, 524287(%r1,%r15)
+	og	%r0, 524287(%r15,%r1)
+	og	%r15, 0
diff --git a/test/MC/SystemZ/insn-og-02.s b/test/MC/SystemZ/insn-og-02.s
new file mode 100644
index 0000000..7f4e453
--- /dev/null
+++ b/test/MC/SystemZ/insn-og-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: og	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: og	%r0, 524288
+
+	og	%r0, -524289
+	og	%r0, 524288
diff --git a/test/MC/SystemZ/insn-ogr-01.s b/test/MC/SystemZ/insn-ogr-01.s
new file mode 100644
index 0000000..25ba913
--- /dev/null
+++ b/test/MC/SystemZ/insn-ogr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ogr	%r0, %r0                # encoding: [0xb9,0x81,0x00,0x00]
+#CHECK: ogr	%r0, %r15               # encoding: [0xb9,0x81,0x00,0x0f]
+#CHECK: ogr	%r15, %r0               # encoding: [0xb9,0x81,0x00,0xf0]
+#CHECK: ogr	%r7, %r8                # encoding: [0xb9,0x81,0x00,0x78]
+
+	ogr	%r0,%r0
+	ogr	%r0,%r15
+	ogr	%r15,%r0
+	ogr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-oi-01.s b/test/MC/SystemZ/insn-oi-01.s
new file mode 100644
index 0000000..5d52fd2
--- /dev/null
+++ b/test/MC/SystemZ/insn-oi-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: oi	0, 0                    # encoding: [0x96,0x00,0x00,0x00]
+#CHECK: oi	4095, 0                 # encoding: [0x96,0x00,0x0f,0xff]
+#CHECK: oi	0, 255                  # encoding: [0x96,0xff,0x00,0x00]
+#CHECK: oi	0(%r1), 42              # encoding: [0x96,0x2a,0x10,0x00]
+#CHECK: oi	0(%r15), 42             # encoding: [0x96,0x2a,0xf0,0x00]
+#CHECK: oi	4095(%r1), 42           # encoding: [0x96,0x2a,0x1f,0xff]
+#CHECK: oi	4095(%r15), 42          # encoding: [0x96,0x2a,0xff,0xff]
+
+	oi	0, 0
+	oi	4095, 0
+	oi	0, 255
+	oi	0(%r1), 42
+	oi	0(%r15), 42
+	oi	4095(%r1), 42
+	oi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-oi-02.s b/test/MC/SystemZ/insn-oi-02.s
new file mode 100644
index 0000000..330a290
--- /dev/null
+++ b/test/MC/SystemZ/insn-oi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: oi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: oi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: oi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: oi	0, -1
+#CHECK: error: invalid operand
+#CHECK: oi	0, 256
+
+	oi	-1, 0
+	oi	4096, 0
+	oi	0(%r1,%r2), 0
+	oi	0, -1
+	oi	0, 256
diff --git a/test/MC/SystemZ/insn-oihf-01.s b/test/MC/SystemZ/insn-oihf-01.s
new file mode 100644
index 0000000..627820d
--- /dev/null
+++ b/test/MC/SystemZ/insn-oihf-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: oihf	%r0, 0                  # encoding: [0xc0,0x0c,0x00,0x00,0x00,0x00]
+#CHECK: oihf	%r0, 4294967295         # encoding: [0xc0,0x0c,0xff,0xff,0xff,0xff]
+#CHECK: oihf	%r15, 0                 # encoding: [0xc0,0xfc,0x00,0x00,0x00,0x00]
+
+	oihf	%r0, 0
+	oihf	%r0, 0xffffffff
+	oihf	%r15, 0
diff --git a/test/MC/SystemZ/insn-oihf-02.s b/test/MC/SystemZ/insn-oihf-02.s
new file mode 100644
index 0000000..a944cb0
--- /dev/null
+++ b/test/MC/SystemZ/insn-oihf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: oihf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: oihf	%r0, 1 << 32
+
+	oihf	%r0, -1
+	oihf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-oihh-01.s b/test/MC/SystemZ/insn-oihh-01.s
new file mode 100644
index 0000000..f62f61f
--- /dev/null
+++ b/test/MC/SystemZ/insn-oihh-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: oihh	%r0, 0                  # encoding: [0xa5,0x08,0x00,0x00]
+#CHECK: oihh	%r0, 32768              # encoding: [0xa5,0x08,0x80,0x00]
+#CHECK: oihh	%r0, 65535              # encoding: [0xa5,0x08,0xff,0xff]
+#CHECK: oihh	%r15, 0                 # encoding: [0xa5,0xf8,0x00,0x00]
+
+	oihh	%r0, 0
+	oihh	%r0, 0x8000
+	oihh	%r0, 0xffff
+	oihh	%r15, 0
diff --git a/test/MC/SystemZ/insn-oihh-02.s b/test/MC/SystemZ/insn-oihh-02.s
new file mode 100644
index 0000000..6bf7e23
--- /dev/null
+++ b/test/MC/SystemZ/insn-oihh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: oihh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: oihh	%r0, 0x10000
+
+	oihh	%r0, -1
+	oihh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-oihl-01.s b/test/MC/SystemZ/insn-oihl-01.s
new file mode 100644
index 0000000..437b15c
--- /dev/null
+++ b/test/MC/SystemZ/insn-oihl-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: oihl	%r0, 0                  # encoding: [0xa5,0x09,0x00,0x00]
+#CHECK: oihl	%r0, 32768              # encoding: [0xa5,0x09,0x80,0x00]
+#CHECK: oihl	%r0, 65535              # encoding: [0xa5,0x09,0xff,0xff]
+#CHECK: oihl	%r15, 0                 # encoding: [0xa5,0xf9,0x00,0x00]
+
+	oihl	%r0, 0
+	oihl	%r0, 0x8000
+	oihl	%r0, 0xffff
+	oihl	%r15, 0
diff --git a/test/MC/SystemZ/insn-oihl-02.s b/test/MC/SystemZ/insn-oihl-02.s
new file mode 100644
index 0000000..f4f7a59
--- /dev/null
+++ b/test/MC/SystemZ/insn-oihl-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: oihl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: oihl	%r0, 0x10000
+
+	oihl	%r0, -1
+	oihl	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-oilf-01.s b/test/MC/SystemZ/insn-oilf-01.s
new file mode 100644
index 0000000..6f0c071
--- /dev/null
+++ b/test/MC/SystemZ/insn-oilf-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: oilf	%r0, 0                  # encoding: [0xc0,0x0d,0x00,0x00,0x00,0x00]
+#CHECK: oilf	%r0, 4294967295         # encoding: [0xc0,0x0d,0xff,0xff,0xff,0xff]
+#CHECK: oilf	%r15, 0                 # encoding: [0xc0,0xfd,0x00,0x00,0x00,0x00]
+
+	oilf	%r0, 0
+	oilf	%r0, 0xffffffff
+	oilf	%r15, 0
diff --git a/test/MC/SystemZ/insn-oilf-02.s b/test/MC/SystemZ/insn-oilf-02.s
new file mode 100644
index 0000000..5501724
--- /dev/null
+++ b/test/MC/SystemZ/insn-oilf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: oilf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: oilf	%r0, 1 << 32
+
+	oilf	%r0, -1
+	oilf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-oilh-01.s b/test/MC/SystemZ/insn-oilh-01.s
new file mode 100644
index 0000000..0140500
--- /dev/null
+++ b/test/MC/SystemZ/insn-oilh-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: oilh	%r0, 0                  # encoding: [0xa5,0x0a,0x00,0x00]
+#CHECK: oilh	%r0, 32768              # encoding: [0xa5,0x0a,0x80,0x00]
+#CHECK: oilh	%r0, 65535              # encoding: [0xa5,0x0a,0xff,0xff]
+#CHECK: oilh	%r15, 0                 # encoding: [0xa5,0xfa,0x00,0x00]
+
+	oilh	%r0, 0
+	oilh	%r0, 0x8000
+	oilh	%r0, 0xffff
+	oilh	%r15, 0
diff --git a/test/MC/SystemZ/insn-oilh-02.s b/test/MC/SystemZ/insn-oilh-02.s
new file mode 100644
index 0000000..d2f180d
--- /dev/null
+++ b/test/MC/SystemZ/insn-oilh-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: oilh	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: oilh	%r0, 0x10000
+
+	oilh	%r0, -1
+	oilh	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-oill-01.s b/test/MC/SystemZ/insn-oill-01.s
new file mode 100644
index 0000000..ef95d2d
--- /dev/null
+++ b/test/MC/SystemZ/insn-oill-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: oill	%r0, 0                  # encoding: [0xa5,0x0b,0x00,0x00]
+#CHECK: oill	%r0, 32768              # encoding: [0xa5,0x0b,0x80,0x00]
+#CHECK: oill	%r0, 65535              # encoding: [0xa5,0x0b,0xff,0xff]
+#CHECK: oill	%r15, 0                 # encoding: [0xa5,0xfb,0x00,0x00]
+
+	oill	%r0, 0
+	oill	%r0, 0x8000
+	oill	%r0, 0xffff
+	oill	%r15, 0
diff --git a/test/MC/SystemZ/insn-oill-02.s b/test/MC/SystemZ/insn-oill-02.s
new file mode 100644
index 0000000..01321db
--- /dev/null
+++ b/test/MC/SystemZ/insn-oill-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: oill	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: oill	%r0, 0x10000
+
+	oill	%r0, -1
+	oill	%r0, 0x10000
diff --git a/test/MC/SystemZ/insn-oiy-01.s b/test/MC/SystemZ/insn-oiy-01.s
new file mode 100644
index 0000000..ba060ca
--- /dev/null
+++ b/test/MC/SystemZ/insn-oiy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: oiy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x56]
+#CHECK: oiy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x56]
+#CHECK: oiy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x56]
+#CHECK: oiy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x56]
+#CHECK: oiy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x56]
+#CHECK: oiy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x56]
+#CHECK: oiy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x56]
+#CHECK: oiy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x56]
+#CHECK: oiy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x56]
+#CHECK: oiy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x56]
+
+	oiy	-524288, 0
+	oiy	-1, 0
+	oiy	0, 0
+	oiy	1, 0
+	oiy	524287, 0
+	oiy	0, 255
+	oiy	0(%r1), 42
+	oiy	0(%r15), 42
+	oiy	524287(%r1), 42
+	oiy	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-oiy-02.s b/test/MC/SystemZ/insn-oiy-02.s
new file mode 100644
index 0000000..c1c5569
--- /dev/null
+++ b/test/MC/SystemZ/insn-oiy-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: oiy	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: oiy	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: oiy	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: oiy	0, -1
+#CHECK: error: invalid operand
+#CHECK: oiy	0, 256
+
+	oiy	-524289, 0
+	oiy	524288, 0
+	oiy	0(%r1,%r2), 0
+	oiy	0, -1
+	oiy	0, 256
diff --git a/test/MC/SystemZ/insn-or-01.s b/test/MC/SystemZ/insn-or-01.s
new file mode 100644
index 0000000..8ac366d
--- /dev/null
+++ b/test/MC/SystemZ/insn-or-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: or	%r0, %r0                # encoding: [0x16,0x00]
+#CHECK: or	%r0, %r15               # encoding: [0x16,0x0f]
+#CHECK: or	%r15, %r0               # encoding: [0x16,0xf0]
+#CHECK: or	%r7, %r8                # encoding: [0x16,0x78]
+
+	or	%r0,%r0
+	or	%r0,%r15
+	or	%r15,%r0
+	or	%r7,%r8
diff --git a/test/MC/SystemZ/insn-oy-01.s b/test/MC/SystemZ/insn-oy-01.s
new file mode 100644
index 0000000..58013d0
--- /dev/null
+++ b/test/MC/SystemZ/insn-oy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: oy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x56]
+#CHECK: oy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x56]
+#CHECK: oy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x56]
+#CHECK: oy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x56]
+#CHECK: oy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x56]
+#CHECK: oy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x56]
+#CHECK: oy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x56]
+#CHECK: oy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x56]
+#CHECK: oy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x56]
+#CHECK: oy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x56]
+
+	oy	%r0, -524288
+	oy	%r0, -1
+	oy	%r0, 0
+	oy	%r0, 1
+	oy	%r0, 524287
+	oy	%r0, 0(%r1)
+	oy	%r0, 0(%r15)
+	oy	%r0, 524287(%r1,%r15)
+	oy	%r0, 524287(%r15,%r1)
+	oy	%r15, 0
diff --git a/test/MC/SystemZ/insn-oy-02.s b/test/MC/SystemZ/insn-oy-02.s
new file mode 100644
index 0000000..a9ae5b2
--- /dev/null
+++ b/test/MC/SystemZ/insn-oy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: oy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: oy	%r0, 524288
+
+	oy	%r0, -524289
+	oy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-risbg-01.s b/test/MC/SystemZ/insn-risbg-01.s
new file mode 100644
index 0000000..b50fbe7
--- /dev/null
+++ b/test/MC/SystemZ/insn-risbg-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: risbg	%r0, %r0, 0, 0, 0       # encoding: [0xec,0x00,0x00,0x00,0x00,0x55]
+#CHECK: risbg	%r0, %r0, 0, 0, 63      # encoding: [0xec,0x00,0x00,0x00,0x3f,0x55]
+#CHECK: risbg	%r0, %r0, 0, 63, 0      # encoding: [0xec,0x00,0x00,0x3f,0x00,0x55]
+#CHECK: risbg	%r0, %r0, 63, 0, 0      # encoding: [0xec,0x00,0x3f,0x00,0x00,0x55]
+#CHECK: risbg	%r0, %r15, 0, 0, 0      # encoding: [0xec,0x0f,0x00,0x00,0x00,0x55]
+#CHECK: risbg	%r15, %r0, 0, 0, 0      # encoding: [0xec,0xf0,0x00,0x00,0x00,0x55]
+#CHECK: risbg	%r4, %r5, 6, 7, 8       # encoding: [0xec,0x45,0x06,0x07,0x08,0x55]
+
+	risbg	%r0,%r0,0,0,0
+	risbg	%r0,%r0,0,0,63
+	risbg	%r0,%r0,0,63,0
+	risbg	%r0,%r0,63,0,0
+	risbg	%r0,%r15,0,0,0
+	risbg	%r15,%r0,0,0,0
+	risbg	%r4,%r5,6,7,8
diff --git a/test/MC/SystemZ/insn-risbg-02.s b/test/MC/SystemZ/insn-risbg-02.s
new file mode 100644
index 0000000..781cb56
--- /dev/null
+++ b/test/MC/SystemZ/insn-risbg-02.s
@@ -0,0 +1,22 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: risbg	%r0,%r0,0,0,-1
+#CHECK: error: invalid operand
+#CHECK: risbg	%r0,%r0,0,0,64
+#CHECK: error: invalid operand
+#CHECK: risbg	%r0,%r0,0,-1,0
+#CHECK: error: invalid operand
+#CHECK: risbg	%r0,%r0,0,64,0
+#CHECK: error: invalid operand
+#CHECK: risbg	%r0,%r0,-1,0,0
+#CHECK: error: invalid operand
+#CHECK: risbg	%r0,%r0,64,0,0
+
+	risbg	%r0,%r0,0,0,-1
+	risbg	%r0,%r0,0,0,64
+	risbg	%r0,%r0,0,-1,0
+	risbg	%r0,%r0,0,64,0
+	risbg	%r0,%r0,-1,0,0
+	risbg	%r0,%r0,64,0,0
diff --git a/test/MC/SystemZ/insn-rll-01.s b/test/MC/SystemZ/insn-rll-01.s
new file mode 100644
index 0000000..06e3774
--- /dev/null
+++ b/test/MC/SystemZ/insn-rll-01.s
@@ -0,0 +1,27 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: rll	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x1d]
+#CHECK: rll	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x1d]
+#CHECK: rll	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x1d]
+#CHECK: rll	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x1d]
+#CHECK: rll	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x1d]
+#CHECK: rll	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x1d]
+#CHECK: rll	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x1d]
+#CHECK: rll	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x1d]
+#CHECK: rll	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x1d]
+#CHECK: rll	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x1d]
+#CHECK: rll	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x1d]
+#CHECK: rll	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x1d]
+
+	rll	%r0,%r0,0
+	rll	%r15,%r1,0
+	rll	%r1,%r15,0
+	rll	%r15,%r15,0
+	rll	%r0,%r0,-524288
+	rll	%r0,%r0,-1
+	rll	%r0,%r0,1
+	rll	%r0,%r0,524287
+	rll	%r0,%r0,0(%r1)
+	rll	%r0,%r0,0(%r15)
+	rll	%r0,%r0,524287(%r1)
+	rll	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-rll-02.s b/test/MC/SystemZ/insn-rll-02.s
new file mode 100644
index 0000000..baf1607
--- /dev/null
+++ b/test/MC/SystemZ/insn-rll-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: rll	%r0,%r0,-524289
+#CHECK: error: invalid operand
+#CHECK: rll	%r0,%r0,524288
+#CHECK: error: %r0 used in an address
+#CHECK: rll	%r0,%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: rll	%r0,%r0,0(%r1,%r2)
+
+	rll	%r0,%r0,-524289
+	rll	%r0,%r0,524288
+	rll	%r0,%r0,0(%r0)
+	rll	%r0,%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-rllg-01.s b/test/MC/SystemZ/insn-rllg-01.s
new file mode 100644
index 0000000..c36dc6d
--- /dev/null
+++ b/test/MC/SystemZ/insn-rllg-01.s
@@ -0,0 +1,27 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: rllg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x1c]
+#CHECK: rllg	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x1c]
+#CHECK: rllg	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x1c]
+#CHECK: rllg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x1c]
+#CHECK: rllg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x1c]
+#CHECK: rllg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x1c]
+#CHECK: rllg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x1c]
+#CHECK: rllg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x1c]
+#CHECK: rllg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x1c]
+#CHECK: rllg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x1c]
+#CHECK: rllg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x1c]
+#CHECK: rllg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x1c]
+
+	rllg	%r0,%r0,0
+	rllg	%r15,%r1,0
+	rllg	%r1,%r15,0
+	rllg	%r15,%r15,0
+	rllg	%r0,%r0,-524288
+	rllg	%r0,%r0,-1
+	rllg	%r0,%r0,1
+	rllg	%r0,%r0,524287
+	rllg	%r0,%r0,0(%r1)
+	rllg	%r0,%r0,0(%r15)
+	rllg	%r0,%r0,524287(%r1)
+	rllg	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-rllg-02.s b/test/MC/SystemZ/insn-rllg-02.s
new file mode 100644
index 0000000..7f82845
--- /dev/null
+++ b/test/MC/SystemZ/insn-rllg-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: rllg	%r0,%r0,-524289
+#CHECK: error: invalid operand
+#CHECK: rllg	%r0,%r0,524288
+#CHECK: error: %r0 used in an address
+#CHECK: rllg	%r0,%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: rllg	%r0,%r0,0(%r1,%r2)
+
+	rllg	%r0,%r0,-524289
+	rllg	%r0,%r0,524288
+	rllg	%r0,%r0,0(%r0)
+	rllg	%r0,%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-s-01.s b/test/MC/SystemZ/insn-s-01.s
new file mode 100644
index 0000000..2effedb
--- /dev/null
+++ b/test/MC/SystemZ/insn-s-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: s	%r0, 0                  # encoding: [0x5b,0x00,0x00,0x00]
+#CHECK: s	%r0, 4095               # encoding: [0x5b,0x00,0x0f,0xff]
+#CHECK: s	%r0, 0(%r1)             # encoding: [0x5b,0x00,0x10,0x00]
+#CHECK: s	%r0, 0(%r15)            # encoding: [0x5b,0x00,0xf0,0x00]
+#CHECK: s	%r0, 4095(%r1,%r15)     # encoding: [0x5b,0x01,0xff,0xff]
+#CHECK: s	%r0, 4095(%r15,%r1)     # encoding: [0x5b,0x0f,0x1f,0xff]
+#CHECK: s	%r15, 0                 # encoding: [0x5b,0xf0,0x00,0x00]
+
+	s	%r0, 0
+	s	%r0, 4095
+	s	%r0, 0(%r1)
+	s	%r0, 0(%r15)
+	s	%r0, 4095(%r1,%r15)
+	s	%r0, 4095(%r15,%r1)
+	s	%r15, 0
diff --git a/test/MC/SystemZ/insn-s-02.s b/test/MC/SystemZ/insn-s-02.s
new file mode 100644
index 0000000..f0b4a13
--- /dev/null
+++ b/test/MC/SystemZ/insn-s-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: s	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: s	%r0, 4096
+
+	s	%r0, -1
+	s	%r0, 4096
diff --git a/test/MC/SystemZ/insn-sdb-01.s b/test/MC/SystemZ/insn-sdb-01.s
new file mode 100644
index 0000000..9267796
--- /dev/null
+++ b/test/MC/SystemZ/insn-sdb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x1b]
+#CHECK: sdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x1b]
+#CHECK: sdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x1b]
+#CHECK: sdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x1b]
+#CHECK: sdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x1b]
+#CHECK: sdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x1b]
+#CHECK: sdb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x1b]
+
+	sdb	%f0, 0
+	sdb	%f0, 4095
+	sdb	%f0, 0(%r1)
+	sdb	%f0, 0(%r15)
+	sdb	%f0, 4095(%r1,%r15)
+	sdb	%f0, 4095(%r15,%r1)
+	sdb	%f15, 0
diff --git a/test/MC/SystemZ/insn-sdb-02.s b/test/MC/SystemZ/insn-sdb-02.s
new file mode 100644
index 0000000..c77284f
--- /dev/null
+++ b/test/MC/SystemZ/insn-sdb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sdb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: sdb	%f0, 4096
+
+	sdb	%f0, -1
+	sdb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-sdbr-01.s b/test/MC/SystemZ/insn-sdbr-01.s
new file mode 100644
index 0000000..b07f5f2
--- /dev/null
+++ b/test/MC/SystemZ/insn-sdbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sdbr	%f0, %f0                # encoding: [0xb3,0x1b,0x00,0x00]
+#CHECK: sdbr	%f0, %f15               # encoding: [0xb3,0x1b,0x00,0x0f]
+#CHECK: sdbr	%f7, %f8                # encoding: [0xb3,0x1b,0x00,0x78]
+#CHECK: sdbr	%f15, %f0               # encoding: [0xb3,0x1b,0x00,0xf0]
+
+	sdbr	%f0, %f0
+	sdbr	%f0, %f15
+	sdbr	%f7, %f8
+	sdbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-seb-01.s b/test/MC/SystemZ/insn-seb-01.s
new file mode 100644
index 0000000..4bf5cfa
--- /dev/null
+++ b/test/MC/SystemZ/insn-seb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: seb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x0b]
+#CHECK: seb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x0b]
+#CHECK: seb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x0b]
+#CHECK: seb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x0b]
+#CHECK: seb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x0b]
+#CHECK: seb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x0b]
+#CHECK: seb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x0b]
+
+	seb	%f0, 0
+	seb	%f0, 4095
+	seb	%f0, 0(%r1)
+	seb	%f0, 0(%r15)
+	seb	%f0, 4095(%r1,%r15)
+	seb	%f0, 4095(%r15,%r1)
+	seb	%f15, 0
diff --git a/test/MC/SystemZ/insn-seb-02.s b/test/MC/SystemZ/insn-seb-02.s
new file mode 100644
index 0000000..e185a20
--- /dev/null
+++ b/test/MC/SystemZ/insn-seb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: seb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: seb	%f0, 4096
+
+	seb	%f0, -1
+	seb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-sebr-01.s b/test/MC/SystemZ/insn-sebr-01.s
new file mode 100644
index 0000000..467b57c
--- /dev/null
+++ b/test/MC/SystemZ/insn-sebr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sebr	%f0, %f0                # encoding: [0xb3,0x0b,0x00,0x00]
+#CHECK: sebr	%f0, %f15               # encoding: [0xb3,0x0b,0x00,0x0f]
+#CHECK: sebr	%f7, %f8                # encoding: [0xb3,0x0b,0x00,0x78]
+#CHECK: sebr	%f15, %f0               # encoding: [0xb3,0x0b,0x00,0xf0]
+
+	sebr	%f0, %f0
+	sebr	%f0, %f15
+	sebr	%f7, %f8
+	sebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-sg-01.s b/test/MC/SystemZ/insn-sg-01.s
new file mode 100644
index 0000000..153df1a
--- /dev/null
+++ b/test/MC/SystemZ/insn-sg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x09]
+#CHECK: sg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x09]
+#CHECK: sg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x09]
+#CHECK: sg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x09]
+#CHECK: sg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x09]
+#CHECK: sg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x09]
+#CHECK: sg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x09]
+#CHECK: sg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x09]
+#CHECK: sg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x09]
+#CHECK: sg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x09]
+
+	sg	%r0, -524288
+	sg	%r0, -1
+	sg	%r0, 0
+	sg	%r0, 1
+	sg	%r0, 524287
+	sg	%r0, 0(%r1)
+	sg	%r0, 0(%r15)
+	sg	%r0, 524287(%r1,%r15)
+	sg	%r0, 524287(%r15,%r1)
+	sg	%r15, 0
diff --git a/test/MC/SystemZ/insn-sg-02.s b/test/MC/SystemZ/insn-sg-02.s
new file mode 100644
index 0000000..f183e58
--- /dev/null
+++ b/test/MC/SystemZ/insn-sg-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sg	%r0, 524288
+
+	sg	%r0, -524289
+	sg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-sgf-01.s b/test/MC/SystemZ/insn-sgf-01.s
new file mode 100644
index 0000000..844c099
--- /dev/null
+++ b/test/MC/SystemZ/insn-sgf-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x19]
+#CHECK: sgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x19]
+#CHECK: sgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x19]
+#CHECK: sgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x19]
+#CHECK: sgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x19]
+#CHECK: sgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x19]
+#CHECK: sgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x19]
+#CHECK: sgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x19]
+#CHECK: sgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x19]
+#CHECK: sgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x19]
+
+	sgf	%r0, -524288
+	sgf	%r0, -1
+	sgf	%r0, 0
+	sgf	%r0, 1
+	sgf	%r0, 524287
+	sgf	%r0, 0(%r1)
+	sgf	%r0, 0(%r15)
+	sgf	%r0, 524287(%r1,%r15)
+	sgf	%r0, 524287(%r15,%r1)
+	sgf	%r15, 0
diff --git a/test/MC/SystemZ/insn-sgf-02.s b/test/MC/SystemZ/insn-sgf-02.s
new file mode 100644
index 0000000..7eba3ab
--- /dev/null
+++ b/test/MC/SystemZ/insn-sgf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sgf	%r0, 524288
+
+	sgf	%r0, -524289
+	sgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-sgfr-01.s b/test/MC/SystemZ/insn-sgfr-01.s
new file mode 100644
index 0000000..49a1412
--- /dev/null
+++ b/test/MC/SystemZ/insn-sgfr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sgfr	%r0, %r0                # encoding: [0xb9,0x19,0x00,0x00]
+#CHECK: sgfr	%r0, %r15               # encoding: [0xb9,0x19,0x00,0x0f]
+#CHECK: sgfr	%r15, %r0               # encoding: [0xb9,0x19,0x00,0xf0]
+#CHECK: sgfr	%r7, %r8                # encoding: [0xb9,0x19,0x00,0x78]
+
+	sgfr	%r0,%r0
+	sgfr	%r0,%r15
+	sgfr	%r15,%r0
+	sgfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-sgr-01.s b/test/MC/SystemZ/insn-sgr-01.s
new file mode 100644
index 0000000..86c59a1
--- /dev/null
+++ b/test/MC/SystemZ/insn-sgr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sgr	%r0, %r0                # encoding: [0xb9,0x09,0x00,0x00]
+#CHECK: sgr	%r0, %r15               # encoding: [0xb9,0x09,0x00,0x0f]
+#CHECK: sgr	%r15, %r0               # encoding: [0xb9,0x09,0x00,0xf0]
+#CHECK: sgr	%r7, %r8                # encoding: [0xb9,0x09,0x00,0x78]
+
+	sgr	%r0,%r0
+	sgr	%r0,%r15
+	sgr	%r15,%r0
+	sgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-sl-01.s b/test/MC/SystemZ/insn-sl-01.s
new file mode 100644
index 0000000..c2186da
--- /dev/null
+++ b/test/MC/SystemZ/insn-sl-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sl	%r0, 0                  # encoding: [0x5f,0x00,0x00,0x00]
+#CHECK: sl	%r0, 4095               # encoding: [0x5f,0x00,0x0f,0xff]
+#CHECK: sl	%r0, 0(%r1)             # encoding: [0x5f,0x00,0x10,0x00]
+#CHECK: sl	%r0, 0(%r15)            # encoding: [0x5f,0x00,0xf0,0x00]
+#CHECK: sl	%r0, 4095(%r1,%r15)     # encoding: [0x5f,0x01,0xff,0xff]
+#CHECK: sl	%r0, 4095(%r15,%r1)     # encoding: [0x5f,0x0f,0x1f,0xff]
+#CHECK: sl	%r15, 0                 # encoding: [0x5f,0xf0,0x00,0x00]
+
+	sl	%r0, 0
+	sl	%r0, 4095
+	sl	%r0, 0(%r1)
+	sl	%r0, 0(%r15)
+	sl	%r0, 4095(%r1,%r15)
+	sl	%r0, 4095(%r15,%r1)
+	sl	%r15, 0
diff --git a/test/MC/SystemZ/insn-sl-02.s b/test/MC/SystemZ/insn-sl-02.s
new file mode 100644
index 0000000..8abd99d
--- /dev/null
+++ b/test/MC/SystemZ/insn-sl-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sl	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: sl	%r0, 4096
+
+	sl	%r0, -1
+	sl	%r0, 4096
diff --git a/test/MC/SystemZ/insn-slb-01.s b/test/MC/SystemZ/insn-slb-01.s
new file mode 100644
index 0000000..4bc79f6
--- /dev/null
+++ b/test/MC/SystemZ/insn-slb-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: slb	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x99]
+#CHECK: slb	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x99]
+#CHECK: slb	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x99]
+#CHECK: slb	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x99]
+#CHECK: slb	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x99]
+#CHECK: slb	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x99]
+#CHECK: slb	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x99]
+#CHECK: slb	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x99]
+#CHECK: slb	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x99]
+#CHECK: slb	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x99]
+
+	slb	%r0, -524288
+	slb	%r0, -1
+	slb	%r0, 0
+	slb	%r0, 1
+	slb	%r0, 524287
+	slb	%r0, 0(%r1)
+	slb	%r0, 0(%r15)
+	slb	%r0, 524287(%r1,%r15)
+	slb	%r0, 524287(%r15,%r1)
+	slb	%r15, 0
diff --git a/test/MC/SystemZ/insn-slb-02.s b/test/MC/SystemZ/insn-slb-02.s
new file mode 100644
index 0000000..ac87128
--- /dev/null
+++ b/test/MC/SystemZ/insn-slb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: slb	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: slb	%r0, 524288
+
+	slb	%r0, -524289
+	slb	%r0, 524288
diff --git a/test/MC/SystemZ/insn-slbg-01.s b/test/MC/SystemZ/insn-slbg-01.s
new file mode 100644
index 0000000..8878aed
--- /dev/null
+++ b/test/MC/SystemZ/insn-slbg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: slbg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x89]
+#CHECK: slbg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x89]
+#CHECK: slbg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x89]
+#CHECK: slbg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x89]
+#CHECK: slbg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x89]
+#CHECK: slbg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x89]
+#CHECK: slbg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x89]
+#CHECK: slbg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x89]
+#CHECK: slbg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x89]
+#CHECK: slbg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x89]
+
+	slbg	%r0, -524288
+	slbg	%r0, -1
+	slbg	%r0, 0
+	slbg	%r0, 1
+	slbg	%r0, 524287
+	slbg	%r0, 0(%r1)
+	slbg	%r0, 0(%r15)
+	slbg	%r0, 524287(%r1,%r15)
+	slbg	%r0, 524287(%r15,%r1)
+	slbg	%r15, 0
diff --git a/test/MC/SystemZ/insn-slbg-02.s b/test/MC/SystemZ/insn-slbg-02.s
new file mode 100644
index 0000000..ce09c8a
--- /dev/null
+++ b/test/MC/SystemZ/insn-slbg-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: slbg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: slbg	%r0, 524288
+
+	slbg	%r0, -524289
+	slbg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-slbgr-01.s b/test/MC/SystemZ/insn-slbgr-01.s
new file mode 100644
index 0000000..bcc2b53
--- /dev/null
+++ b/test/MC/SystemZ/insn-slbgr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: slbgr	%r0, %r0                # encoding: [0xb9,0x89,0x00,0x00]
+#CHECK: slbgr	%r0, %r15               # encoding: [0xb9,0x89,0x00,0x0f]
+#CHECK: slbgr	%r15, %r0               # encoding: [0xb9,0x89,0x00,0xf0]
+#CHECK: slbgr	%r7, %r8                # encoding: [0xb9,0x89,0x00,0x78]
+
+	slbgr	%r0,%r0
+	slbgr	%r0,%r15
+	slbgr	%r15,%r0
+	slbgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-slbr-01.s b/test/MC/SystemZ/insn-slbr-01.s
new file mode 100644
index 0000000..9c10dbd
--- /dev/null
+++ b/test/MC/SystemZ/insn-slbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: slbr	%r0, %r0                # encoding: [0xb9,0x99,0x00,0x00]
+#CHECK: slbr	%r0, %r15               # encoding: [0xb9,0x99,0x00,0x0f]
+#CHECK: slbr	%r15, %r0               # encoding: [0xb9,0x99,0x00,0xf0]
+#CHECK: slbr	%r7, %r8                # encoding: [0xb9,0x99,0x00,0x78]
+
+	slbr	%r0,%r0
+	slbr	%r0,%r15
+	slbr	%r15,%r0
+	slbr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-slfi-01.s b/test/MC/SystemZ/insn-slfi-01.s
new file mode 100644
index 0000000..4c8e5b4
--- /dev/null
+++ b/test/MC/SystemZ/insn-slfi-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: slfi	%r0, 0                  # encoding: [0xc2,0x05,0x00,0x00,0x00,0x00]
+#CHECK: slfi	%r0, 4294967295         # encoding: [0xc2,0x05,0xff,0xff,0xff,0xff]
+#CHECK: slfi	%r15, 0                 # encoding: [0xc2,0xf5,0x00,0x00,0x00,0x00]
+
+	slfi	%r0, 0
+	slfi	%r0, (1 << 32) - 1
+	slfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-slfi-02.s b/test/MC/SystemZ/insn-slfi-02.s
new file mode 100644
index 0000000..12e14f6
--- /dev/null
+++ b/test/MC/SystemZ/insn-slfi-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: slfi	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: slfi	%r0, (1 << 32)
+
+	slfi	%r0, -1
+	slfi	%r0, (1 << 32)
diff --git a/test/MC/SystemZ/insn-slg-01.s b/test/MC/SystemZ/insn-slg-01.s
new file mode 100644
index 0000000..0b4f99e
--- /dev/null
+++ b/test/MC/SystemZ/insn-slg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: slg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x0b]
+#CHECK: slg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x0b]
+#CHECK: slg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x0b]
+#CHECK: slg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x0b]
+#CHECK: slg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x0b]
+#CHECK: slg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x0b]
+#CHECK: slg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x0b]
+#CHECK: slg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x0b]
+#CHECK: slg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x0b]
+#CHECK: slg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x0b]
+
+	slg	%r0, -524288
+	slg	%r0, -1
+	slg	%r0, 0
+	slg	%r0, 1
+	slg	%r0, 524287
+	slg	%r0, 0(%r1)
+	slg	%r0, 0(%r15)
+	slg	%r0, 524287(%r1,%r15)
+	slg	%r0, 524287(%r15,%r1)
+	slg	%r15, 0
diff --git a/test/MC/SystemZ/insn-slg-02.s b/test/MC/SystemZ/insn-slg-02.s
new file mode 100644
index 0000000..1ad04c6
--- /dev/null
+++ b/test/MC/SystemZ/insn-slg-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: slg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: slg	%r0, 524288
+
+	slg	%r0, -524289
+	slg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-slgf-01.s b/test/MC/SystemZ/insn-slgf-01.s
new file mode 100644
index 0000000..bca480e
--- /dev/null
+++ b/test/MC/SystemZ/insn-slgf-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: slgf	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1b]
+#CHECK: slgf	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1b]
+#CHECK: slgf	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1b]
+#CHECK: slgf	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1b]
+#CHECK: slgf	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1b]
+#CHECK: slgf	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1b]
+#CHECK: slgf	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1b]
+#CHECK: slgf	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1b]
+#CHECK: slgf	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1b]
+#CHECK: slgf	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x1b]
+
+	slgf	%r0, -524288
+	slgf	%r0, -1
+	slgf	%r0, 0
+	slgf	%r0, 1
+	slgf	%r0, 524287
+	slgf	%r0, 0(%r1)
+	slgf	%r0, 0(%r15)
+	slgf	%r0, 524287(%r1,%r15)
+	slgf	%r0, 524287(%r15,%r1)
+	slgf	%r15, 0
diff --git a/test/MC/SystemZ/insn-slgf-02.s b/test/MC/SystemZ/insn-slgf-02.s
new file mode 100644
index 0000000..71a9aa7
--- /dev/null
+++ b/test/MC/SystemZ/insn-slgf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: slgf	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: slgf	%r0, 524288
+
+	slgf	%r0, -524289
+	slgf	%r0, 524288
diff --git a/test/MC/SystemZ/insn-slgfi-01.s b/test/MC/SystemZ/insn-slgfi-01.s
new file mode 100644
index 0000000..c9fef18
--- /dev/null
+++ b/test/MC/SystemZ/insn-slgfi-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: slgfi	%r0, 0                  # encoding: [0xc2,0x04,0x00,0x00,0x00,0x00]
+#CHECK: slgfi	%r0, 4294967295         # encoding: [0xc2,0x04,0xff,0xff,0xff,0xff]
+#CHECK: slgfi	%r15, 0                 # encoding: [0xc2,0xf4,0x00,0x00,0x00,0x00]
+
+	slgfi	%r0, 0
+	slgfi	%r0, (1 << 32) - 1
+	slgfi	%r15, 0
diff --git a/test/MC/SystemZ/insn-slgfi-02.s b/test/MC/SystemZ/insn-slgfi-02.s
new file mode 100644
index 0000000..696408d
--- /dev/null
+++ b/test/MC/SystemZ/insn-slgfi-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: slgfi	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: slgfi	%r0, (1 << 32)
+
+	slgfi	%r0, -1
+	slgfi	%r0, (1 << 32)
diff --git a/test/MC/SystemZ/insn-slgfr-01.s b/test/MC/SystemZ/insn-slgfr-01.s
new file mode 100644
index 0000000..94c10ed
--- /dev/null
+++ b/test/MC/SystemZ/insn-slgfr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: slgfr	%r0, %r0                # encoding: [0xb9,0x1b,0x00,0x00]
+#CHECK: slgfr	%r0, %r15               # encoding: [0xb9,0x1b,0x00,0x0f]
+#CHECK: slgfr	%r15, %r0               # encoding: [0xb9,0x1b,0x00,0xf0]
+#CHECK: slgfr	%r7, %r8                # encoding: [0xb9,0x1b,0x00,0x78]
+
+	slgfr	%r0,%r0
+	slgfr	%r0,%r15
+	slgfr	%r15,%r0
+	slgfr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-slgr-01.s b/test/MC/SystemZ/insn-slgr-01.s
new file mode 100644
index 0000000..4d226be
--- /dev/null
+++ b/test/MC/SystemZ/insn-slgr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: slgr	%r0, %r0                # encoding: [0xb9,0x0b,0x00,0x00]
+#CHECK: slgr	%r0, %r15               # encoding: [0xb9,0x0b,0x00,0x0f]
+#CHECK: slgr	%r15, %r0               # encoding: [0xb9,0x0b,0x00,0xf0]
+#CHECK: slgr	%r7, %r8                # encoding: [0xb9,0x0b,0x00,0x78]
+
+	slgr	%r0,%r0
+	slgr	%r0,%r15
+	slgr	%r15,%r0
+	slgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-sll-01.s b/test/MC/SystemZ/insn-sll-01.s
new file mode 100644
index 0000000..5bc1128
--- /dev/null
+++ b/test/MC/SystemZ/insn-sll-01.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sll	%r0, 0                  # encoding: [0x89,0x00,0x00,0x00]
+#CHECK: sll	%r7, 0                  # encoding: [0x89,0x70,0x00,0x00]
+#CHECK: sll	%r15, 0                 # encoding: [0x89,0xf0,0x00,0x00]
+#CHECK: sll	%r0, 4095               # encoding: [0x89,0x00,0x0f,0xff]
+#CHECK: sll	%r0, 0(%r1)             # encoding: [0x89,0x00,0x10,0x00]
+#CHECK: sll	%r0, 0(%r15)            # encoding: [0x89,0x00,0xf0,0x00]
+#CHECK: sll	%r0, 4095(%r1)          # encoding: [0x89,0x00,0x1f,0xff]
+#CHECK: sll	%r0, 4095(%r15)         # encoding: [0x89,0x00,0xff,0xff]
+
+	sll	%r0,0
+	sll	%r7,0
+	sll	%r15,0
+	sll	%r0,4095
+	sll	%r0,0(%r1)
+	sll	%r0,0(%r15)
+	sll	%r0,4095(%r1)
+	sll	%r0,4095(%r15)
diff --git a/test/MC/SystemZ/insn-sll-02.s b/test/MC/SystemZ/insn-sll-02.s
new file mode 100644
index 0000000..1b951be
--- /dev/null
+++ b/test/MC/SystemZ/insn-sll-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sll	%r0,-1
+#CHECK: error: invalid operand
+#CHECK: sll	%r0,4096
+#CHECK: error: %r0 used in an address
+#CHECK: sll	%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: sll	%r0,0(%r1,%r2)
+
+	sll	%r0,-1
+	sll	%r0,4096
+	sll	%r0,0(%r0)
+	sll	%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-sllg-01.s b/test/MC/SystemZ/insn-sllg-01.s
new file mode 100644
index 0000000..1b0f009
--- /dev/null
+++ b/test/MC/SystemZ/insn-sllg-01.s
@@ -0,0 +1,27 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sllg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x0d]
+#CHECK: sllg	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x0d]
+#CHECK: sllg	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x0d]
+#CHECK: sllg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x0d]
+#CHECK: sllg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x0d]
+#CHECK: sllg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x0d]
+#CHECK: sllg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x0d]
+#CHECK: sllg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x0d]
+#CHECK: sllg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x0d]
+#CHECK: sllg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x0d]
+#CHECK: sllg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x0d]
+#CHECK: sllg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x0d]
+
+	sllg	%r0,%r0,0
+	sllg	%r15,%r1,0
+	sllg	%r1,%r15,0
+	sllg	%r15,%r15,0
+	sllg	%r0,%r0,-524288
+	sllg	%r0,%r0,-1
+	sllg	%r0,%r0,1
+	sllg	%r0,%r0,524287
+	sllg	%r0,%r0,0(%r1)
+	sllg	%r0,%r0,0(%r15)
+	sllg	%r0,%r0,524287(%r1)
+	sllg	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-sllg-02.s b/test/MC/SystemZ/insn-sllg-02.s
new file mode 100644
index 0000000..68c3d1d
--- /dev/null
+++ b/test/MC/SystemZ/insn-sllg-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sllg	%r0,%r0,-524289
+#CHECK: error: invalid operand
+#CHECK: sllg	%r0,%r0,524288
+#CHECK: error: %r0 used in an address
+#CHECK: sllg	%r0,%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: sllg	%r0,%r0,0(%r1,%r2)
+
+	sllg	%r0,%r0,-524289
+	sllg	%r0,%r0,524288
+	sllg	%r0,%r0,0(%r0)
+	sllg	%r0,%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-slr-01.s b/test/MC/SystemZ/insn-slr-01.s
new file mode 100644
index 0000000..c142407
--- /dev/null
+++ b/test/MC/SystemZ/insn-slr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: slr	%r0, %r0                # encoding: [0x1f,0x00]
+#CHECK: slr	%r0, %r15               # encoding: [0x1f,0x0f]
+#CHECK: slr	%r15, %r0               # encoding: [0x1f,0xf0]
+#CHECK: slr	%r7, %r8                # encoding: [0x1f,0x78]
+
+	slr	%r0,%r0
+	slr	%r0,%r15
+	slr	%r15,%r0
+	slr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-sly-01.s b/test/MC/SystemZ/insn-sly-01.s
new file mode 100644
index 0000000..59d2907
--- /dev/null
+++ b/test/MC/SystemZ/insn-sly-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sly	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x5f]
+#CHECK: sly	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x5f]
+#CHECK: sly	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x5f]
+#CHECK: sly	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x5f]
+#CHECK: sly	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x5f]
+#CHECK: sly	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x5f]
+#CHECK: sly	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x5f]
+#CHECK: sly	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x5f]
+#CHECK: sly	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x5f]
+#CHECK: sly	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x5f]
+
+	sly	%r0, -524288
+	sly	%r0, -1
+	sly	%r0, 0
+	sly	%r0, 1
+	sly	%r0, 524287
+	sly	%r0, 0(%r1)
+	sly	%r0, 0(%r15)
+	sly	%r0, 524287(%r1,%r15)
+	sly	%r0, 524287(%r15,%r1)
+	sly	%r15, 0
diff --git a/test/MC/SystemZ/insn-sly-02.s b/test/MC/SystemZ/insn-sly-02.s
new file mode 100644
index 0000000..9abd53e
--- /dev/null
+++ b/test/MC/SystemZ/insn-sly-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sly	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sly	%r0, 524288
+
+	sly	%r0, -524289
+	sly	%r0, 524288
diff --git a/test/MC/SystemZ/insn-sqdb-01.s b/test/MC/SystemZ/insn-sqdb-01.s
new file mode 100644
index 0000000..b79aae1
--- /dev/null
+++ b/test/MC/SystemZ/insn-sqdb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sqdb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x15]
+#CHECK: sqdb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x15]
+#CHECK: sqdb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x15]
+#CHECK: sqdb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x15]
+#CHECK: sqdb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x15]
+#CHECK: sqdb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x15]
+#CHECK: sqdb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x15]
+
+	sqdb	%f0, 0
+	sqdb	%f0, 4095
+	sqdb	%f0, 0(%r1)
+	sqdb	%f0, 0(%r15)
+	sqdb	%f0, 4095(%r1,%r15)
+	sqdb	%f0, 4095(%r15,%r1)
+	sqdb	%f15, 0
diff --git a/test/MC/SystemZ/insn-sqdb-02.s b/test/MC/SystemZ/insn-sqdb-02.s
new file mode 100644
index 0000000..68df267
--- /dev/null
+++ b/test/MC/SystemZ/insn-sqdb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sqdb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: sqdb	%f0, 4096
+
+	sqdb	%f0, -1
+	sqdb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-sqdbr-01.s b/test/MC/SystemZ/insn-sqdbr-01.s
new file mode 100644
index 0000000..d66415f
--- /dev/null
+++ b/test/MC/SystemZ/insn-sqdbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sqdbr	%f0, %f0                # encoding: [0xb3,0x15,0x00,0x00]
+#CHECK: sqdbr	%f0, %f15               # encoding: [0xb3,0x15,0x00,0x0f]
+#CHECK: sqdbr	%f7, %f8                # encoding: [0xb3,0x15,0x00,0x78]
+#CHECK: sqdbr	%f15, %f0               # encoding: [0xb3,0x15,0x00,0xf0]
+
+	sqdbr	%f0, %f0
+	sqdbr	%f0, %f15
+	sqdbr	%f7, %f8
+	sqdbr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-sqeb-01.s b/test/MC/SystemZ/insn-sqeb-01.s
new file mode 100644
index 0000000..60f6e90
--- /dev/null
+++ b/test/MC/SystemZ/insn-sqeb-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sqeb	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x14]
+#CHECK: sqeb	%f0, 4095               # encoding: [0xed,0x00,0x0f,0xff,0x00,0x14]
+#CHECK: sqeb	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x14]
+#CHECK: sqeb	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x14]
+#CHECK: sqeb	%f0, 4095(%r1,%r15)     # encoding: [0xed,0x01,0xff,0xff,0x00,0x14]
+#CHECK: sqeb	%f0, 4095(%r15,%r1)     # encoding: [0xed,0x0f,0x1f,0xff,0x00,0x14]
+#CHECK: sqeb	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x14]
+
+	sqeb	%f0, 0
+	sqeb	%f0, 4095
+	sqeb	%f0, 0(%r1)
+	sqeb	%f0, 0(%r15)
+	sqeb	%f0, 4095(%r1,%r15)
+	sqeb	%f0, 4095(%r15,%r1)
+	sqeb	%f15, 0
diff --git a/test/MC/SystemZ/insn-sqeb-02.s b/test/MC/SystemZ/insn-sqeb-02.s
new file mode 100644
index 0000000..efb09fc
--- /dev/null
+++ b/test/MC/SystemZ/insn-sqeb-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sqeb	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: sqeb	%f0, 4096
+
+	sqeb	%f0, -1
+	sqeb	%f0, 4096
diff --git a/test/MC/SystemZ/insn-sqebr-01.s b/test/MC/SystemZ/insn-sqebr-01.s
new file mode 100644
index 0000000..2d13dbe
--- /dev/null
+++ b/test/MC/SystemZ/insn-sqebr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sqebr	%f0, %f0                # encoding: [0xb3,0x14,0x00,0x00]
+#CHECK: sqebr	%f0, %f15               # encoding: [0xb3,0x14,0x00,0x0f]
+#CHECK: sqebr	%f7, %f8                # encoding: [0xb3,0x14,0x00,0x78]
+#CHECK: sqebr	%f15, %f0               # encoding: [0xb3,0x14,0x00,0xf0]
+
+	sqebr	%f0, %f0
+	sqebr	%f0, %f15
+	sqebr	%f7, %f8
+	sqebr	%f15, %f0
diff --git a/test/MC/SystemZ/insn-sqxbr-01.s b/test/MC/SystemZ/insn-sqxbr-01.s
new file mode 100644
index 0000000..78ba908
--- /dev/null
+++ b/test/MC/SystemZ/insn-sqxbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sqxbr	%f0, %f0                # encoding: [0xb3,0x16,0x00,0x00]
+#CHECK: sqxbr	%f0, %f13               # encoding: [0xb3,0x16,0x00,0x0d]
+#CHECK: sqxbr	%f8, %f8                # encoding: [0xb3,0x16,0x00,0x88]
+#CHECK: sqxbr	%f13, %f0               # encoding: [0xb3,0x16,0x00,0xd0]
+
+	sqxbr	%f0, %f0
+	sqxbr	%f0, %f13
+	sqxbr	%f8, %f8
+	sqxbr	%f13, %f0
diff --git a/test/MC/SystemZ/insn-sqxbr-02.s b/test/MC/SystemZ/insn-sqxbr-02.s
new file mode 100644
index 0000000..e51e552
--- /dev/null
+++ b/test/MC/SystemZ/insn-sqxbr-02.s
@@ -0,0 +1,17 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: sqxbr	%f0, %f2
+#CHECK: error: invalid register
+#CHECK: sqxbr	%f0, %f14
+#CHECK: error: invalid register
+#CHECK: sqxbr	%f2, %f0
+#CHECK: error: invalid register
+#CHECK: sqxbr	%f14, %f0
+
+	sqxbr	%f0, %f2
+	sqxbr	%f0, %f14
+	sqxbr	%f2, %f0
+	sqxbr	%f14, %f0
+
diff --git a/test/MC/SystemZ/insn-sr-01.s b/test/MC/SystemZ/insn-sr-01.s
new file mode 100644
index 0000000..856bef5
--- /dev/null
+++ b/test/MC/SystemZ/insn-sr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sr	%r0, %r0                # encoding: [0x1b,0x00]
+#CHECK: sr	%r0, %r15               # encoding: [0x1b,0x0f]
+#CHECK: sr	%r15, %r0               # encoding: [0x1b,0xf0]
+#CHECK: sr	%r7, %r8                # encoding: [0x1b,0x78]
+
+	sr	%r0,%r0
+	sr	%r0,%r15
+	sr	%r15,%r0
+	sr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-sra-01.s b/test/MC/SystemZ/insn-sra-01.s
new file mode 100644
index 0000000..fcdaf5d
--- /dev/null
+++ b/test/MC/SystemZ/insn-sra-01.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sra	%r0, 0                  # encoding: [0x8a,0x00,0x00,0x00]
+#CHECK: sra	%r7, 0                  # encoding: [0x8a,0x70,0x00,0x00]
+#CHECK: sra	%r15, 0                 # encoding: [0x8a,0xf0,0x00,0x00]
+#CHECK: sra	%r0, 4095               # encoding: [0x8a,0x00,0x0f,0xff]
+#CHECK: sra	%r0, 0(%r1)             # encoding: [0x8a,0x00,0x10,0x00]
+#CHECK: sra	%r0, 0(%r15)            # encoding: [0x8a,0x00,0xf0,0x00]
+#CHECK: sra	%r0, 4095(%r1)          # encoding: [0x8a,0x00,0x1f,0xff]
+#CHECK: sra	%r0, 4095(%r15)         # encoding: [0x8a,0x00,0xff,0xff]
+
+	sra	%r0,0
+	sra	%r7,0
+	sra	%r15,0
+	sra	%r0,4095
+	sra	%r0,0(%r1)
+	sra	%r0,0(%r15)
+	sra	%r0,4095(%r1)
+	sra	%r0,4095(%r15)
diff --git a/test/MC/SystemZ/insn-sra-02.s b/test/MC/SystemZ/insn-sra-02.s
new file mode 100644
index 0000000..7a84f17
--- /dev/null
+++ b/test/MC/SystemZ/insn-sra-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sra	%r0,-1
+#CHECK: error: invalid operand
+#CHECK: sra	%r0,4096
+#CHECK: error: %r0 used in an address
+#CHECK: sra	%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: sra	%r0,0(%r1,%r2)
+
+	sra	%r0,-1
+	sra	%r0,4096
+	sra	%r0,0(%r0)
+	sra	%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-srag-01.s b/test/MC/SystemZ/insn-srag-01.s
new file mode 100644
index 0000000..9271db2
--- /dev/null
+++ b/test/MC/SystemZ/insn-srag-01.s
@@ -0,0 +1,27 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: srag	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x0a]
+#CHECK: srag	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x0a]
+#CHECK: srag	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x0a]
+#CHECK: srag	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x0a]
+#CHECK: srag	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x0a]
+#CHECK: srag	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x0a]
+#CHECK: srag	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x0a]
+#CHECK: srag	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x0a]
+#CHECK: srag	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x0a]
+#CHECK: srag	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x0a]
+#CHECK: srag	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x0a]
+#CHECK: srag	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x0a]
+
+	srag	%r0,%r0,0
+	srag	%r15,%r1,0
+	srag	%r1,%r15,0
+	srag	%r15,%r15,0
+	srag	%r0,%r0,-524288
+	srag	%r0,%r0,-1
+	srag	%r0,%r0,1
+	srag	%r0,%r0,524287
+	srag	%r0,%r0,0(%r1)
+	srag	%r0,%r0,0(%r15)
+	srag	%r0,%r0,524287(%r1)
+	srag	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-srag-02.s b/test/MC/SystemZ/insn-srag-02.s
new file mode 100644
index 0000000..7413cba
--- /dev/null
+++ b/test/MC/SystemZ/insn-srag-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: srag	%r0,%r0,-524289
+#CHECK: error: invalid operand
+#CHECK: srag	%r0,%r0,524288
+#CHECK: error: %r0 used in an address
+#CHECK: srag	%r0,%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: srag	%r0,%r0,0(%r1,%r2)
+
+	srag	%r0,%r0,-524289
+	srag	%r0,%r0,524288
+	srag	%r0,%r0,0(%r0)
+	srag	%r0,%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-srl-01.s b/test/MC/SystemZ/insn-srl-01.s
new file mode 100644
index 0000000..a3a5df8
--- /dev/null
+++ b/test/MC/SystemZ/insn-srl-01.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: srl	%r0, 0                  # encoding: [0x88,0x00,0x00,0x00]
+#CHECK: srl	%r7, 0                  # encoding: [0x88,0x70,0x00,0x00]
+#CHECK: srl	%r15, 0                 # encoding: [0x88,0xf0,0x00,0x00]
+#CHECK: srl	%r0, 4095               # encoding: [0x88,0x00,0x0f,0xff]
+#CHECK: srl	%r0, 0(%r1)             # encoding: [0x88,0x00,0x10,0x00]
+#CHECK: srl	%r0, 0(%r15)            # encoding: [0x88,0x00,0xf0,0x00]
+#CHECK: srl	%r0, 4095(%r1)          # encoding: [0x88,0x00,0x1f,0xff]
+#CHECK: srl	%r0, 4095(%r15)         # encoding: [0x88,0x00,0xff,0xff]
+
+	srl	%r0,0
+	srl	%r7,0
+	srl	%r15,0
+	srl	%r0,4095
+	srl	%r0,0(%r1)
+	srl	%r0,0(%r15)
+	srl	%r0,4095(%r1)
+	srl	%r0,4095(%r15)
diff --git a/test/MC/SystemZ/insn-srl-02.s b/test/MC/SystemZ/insn-srl-02.s
new file mode 100644
index 0000000..212d16b
--- /dev/null
+++ b/test/MC/SystemZ/insn-srl-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: srl	%r0,-1
+#CHECK: error: invalid operand
+#CHECK: srl	%r0,4096
+#CHECK: error: %r0 used in an address
+#CHECK: srl	%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: srl	%r0,0(%r1,%r2)
+
+	srl	%r0,-1
+	srl	%r0,4096
+	srl	%r0,0(%r0)
+	srl	%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-srlg-01.s b/test/MC/SystemZ/insn-srlg-01.s
new file mode 100644
index 0000000..0087fef
--- /dev/null
+++ b/test/MC/SystemZ/insn-srlg-01.s
@@ -0,0 +1,27 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: srlg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x0c]
+#CHECK: srlg	%r15, %r1, 0            # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x0c]
+#CHECK: srlg	%r1, %r15, 0            # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x0c]
+#CHECK: srlg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x0c]
+#CHECK: srlg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x0c]
+#CHECK: srlg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x0c]
+#CHECK: srlg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x0c]
+#CHECK: srlg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x0c]
+#CHECK: srlg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x0c]
+#CHECK: srlg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x0c]
+#CHECK: srlg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x0c]
+#CHECK: srlg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x0c]
+
+	srlg	%r0,%r0,0
+	srlg	%r15,%r1,0
+	srlg	%r1,%r15,0
+	srlg	%r15,%r15,0
+	srlg	%r0,%r0,-524288
+	srlg	%r0,%r0,-1
+	srlg	%r0,%r0,1
+	srlg	%r0,%r0,524287
+	srlg	%r0,%r0,0(%r1)
+	srlg	%r0,%r0,0(%r15)
+	srlg	%r0,%r0,524287(%r1)
+	srlg	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-srlg-02.s b/test/MC/SystemZ/insn-srlg-02.s
new file mode 100644
index 0000000..1e24d0e
--- /dev/null
+++ b/test/MC/SystemZ/insn-srlg-02.s
@@ -0,0 +1,16 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: srlg	%r0,%r0,-524289
+#CHECK: error: invalid operand
+#CHECK: srlg	%r0,%r0,524288
+#CHECK: error: %r0 used in an address
+#CHECK: srlg	%r0,%r0,0(%r0)
+#CHECK: error: invalid use of indexed addressing
+#CHECK: srlg	%r0,%r0,0(%r1,%r2)
+
+	srlg	%r0,%r0,-524289
+	srlg	%r0,%r0,524288
+	srlg	%r0,%r0,0(%r0)
+	srlg	%r0,%r0,0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-st-01.s b/test/MC/SystemZ/insn-st-01.s
new file mode 100644
index 0000000..0b5fdb6
--- /dev/null
+++ b/test/MC/SystemZ/insn-st-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: st	%r0, 0                  # encoding: [0x50,0x00,0x00,0x00]
+#CHECK: st	%r0, 4095               # encoding: [0x50,0x00,0x0f,0xff]
+#CHECK: st	%r0, 0(%r1)             # encoding: [0x50,0x00,0x10,0x00]
+#CHECK: st	%r0, 0(%r15)            # encoding: [0x50,0x00,0xf0,0x00]
+#CHECK: st	%r0, 4095(%r1,%r15)     # encoding: [0x50,0x01,0xff,0xff]
+#CHECK: st	%r0, 4095(%r15,%r1)     # encoding: [0x50,0x0f,0x1f,0xff]
+#CHECK: st	%r15, 0                 # encoding: [0x50,0xf0,0x00,0x00]
+
+	st	%r0, 0
+	st	%r0, 4095
+	st	%r0, 0(%r1)
+	st	%r0, 0(%r15)
+	st	%r0, 4095(%r1,%r15)
+	st	%r0, 4095(%r15,%r1)
+	st	%r15, 0
diff --git a/test/MC/SystemZ/insn-st-02.s b/test/MC/SystemZ/insn-st-02.s
new file mode 100644
index 0000000..63e547a
--- /dev/null
+++ b/test/MC/SystemZ/insn-st-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: st	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: st	%r0, 4096
+
+	st	%r0, -1
+	st	%r0, 4096
diff --git a/test/MC/SystemZ/insn-stc-01.s b/test/MC/SystemZ/insn-stc-01.s
new file mode 100644
index 0000000..563f891
--- /dev/null
+++ b/test/MC/SystemZ/insn-stc-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: stc	%r0, 0                  # encoding: [0x42,0x00,0x00,0x00]
+#CHECK: stc	%r0, 4095               # encoding: [0x42,0x00,0x0f,0xff]
+#CHECK: stc	%r0, 0(%r1)             # encoding: [0x42,0x00,0x10,0x00]
+#CHECK: stc	%r0, 0(%r15)            # encoding: [0x42,0x00,0xf0,0x00]
+#CHECK: stc	%r0, 4095(%r1,%r15)     # encoding: [0x42,0x01,0xff,0xff]
+#CHECK: stc	%r0, 4095(%r15,%r1)     # encoding: [0x42,0x0f,0x1f,0xff]
+#CHECK: stc	%r15, 0                 # encoding: [0x42,0xf0,0x00,0x00]
+
+	stc	%r0, 0
+	stc	%r0, 4095
+	stc	%r0, 0(%r1)
+	stc	%r0, 0(%r15)
+	stc	%r0, 4095(%r1,%r15)
+	stc	%r0, 4095(%r15,%r1)
+	stc	%r15, 0
diff --git a/test/MC/SystemZ/insn-stc-02.s b/test/MC/SystemZ/insn-stc-02.s
new file mode 100644
index 0000000..aa7dcb2
--- /dev/null
+++ b/test/MC/SystemZ/insn-stc-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: stc	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: stc	%r0, 4096
+
+	stc	%r0, -1
+	stc	%r0, 4096
diff --git a/test/MC/SystemZ/insn-stcy-01.s b/test/MC/SystemZ/insn-stcy-01.s
new file mode 100644
index 0000000..acc7ac5
--- /dev/null
+++ b/test/MC/SystemZ/insn-stcy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: stcy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x72]
+#CHECK: stcy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x72]
+#CHECK: stcy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x72]
+#CHECK: stcy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x72]
+#CHECK: stcy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x72]
+#CHECK: stcy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x72]
+#CHECK: stcy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x72]
+#CHECK: stcy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x72]
+#CHECK: stcy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x72]
+#CHECK: stcy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x72]
+
+	stcy	%r0, -524288
+	stcy	%r0, -1
+	stcy	%r0, 0
+	stcy	%r0, 1
+	stcy	%r0, 524287
+	stcy	%r0, 0(%r1)
+	stcy	%r0, 0(%r15)
+	stcy	%r0, 524287(%r1,%r15)
+	stcy	%r0, 524287(%r15,%r1)
+	stcy	%r15, 0
diff --git a/test/MC/SystemZ/insn-stcy-02.s b/test/MC/SystemZ/insn-stcy-02.s
new file mode 100644
index 0000000..cbd7f7a
--- /dev/null
+++ b/test/MC/SystemZ/insn-stcy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: stcy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: stcy	%r0, 524288
+
+	stcy	%r0, -524289
+	stcy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-std-01.s b/test/MC/SystemZ/insn-std-01.s
new file mode 100644
index 0000000..6867df8
--- /dev/null
+++ b/test/MC/SystemZ/insn-std-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: std	%f0, 0                  # encoding: [0x60,0x00,0x00,0x00]
+#CHECK: std	%f0, 4095               # encoding: [0x60,0x00,0x0f,0xff]
+#CHECK: std	%f0, 0(%r1)             # encoding: [0x60,0x00,0x10,0x00]
+#CHECK: std	%f0, 0(%r15)            # encoding: [0x60,0x00,0xf0,0x00]
+#CHECK: std	%f0, 4095(%r1,%r15)     # encoding: [0x60,0x01,0xff,0xff]
+#CHECK: std	%f0, 4095(%r15,%r1)     # encoding: [0x60,0x0f,0x1f,0xff]
+#CHECK: std	%f15, 0                 # encoding: [0x60,0xf0,0x00,0x00]
+
+	std	%f0, 0
+	std	%f0, 4095
+	std	%f0, 0(%r1)
+	std	%f0, 0(%r15)
+	std	%f0, 4095(%r1,%r15)
+	std	%f0, 4095(%r15,%r1)
+	std	%f15, 0
diff --git a/test/MC/SystemZ/insn-std-02.s b/test/MC/SystemZ/insn-std-02.s
new file mode 100644
index 0000000..62bb9eb
--- /dev/null
+++ b/test/MC/SystemZ/insn-std-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: std	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: std	%f0, 4096
+
+	std	%f0, -1
+	std	%f0, 4096
diff --git a/test/MC/SystemZ/insn-stdy-01.s b/test/MC/SystemZ/insn-stdy-01.s
new file mode 100644
index 0000000..1ae9a7d
--- /dev/null
+++ b/test/MC/SystemZ/insn-stdy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: stdy	%f0, -524288            # encoding: [0xed,0x00,0x00,0x00,0x80,0x67]
+#CHECK: stdy	%f0, -1                 # encoding: [0xed,0x00,0x0f,0xff,0xff,0x67]
+#CHECK: stdy	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x67]
+#CHECK: stdy	%f0, 1                  # encoding: [0xed,0x00,0x00,0x01,0x00,0x67]
+#CHECK: stdy	%f0, 524287             # encoding: [0xed,0x00,0x0f,0xff,0x7f,0x67]
+#CHECK: stdy	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x67]
+#CHECK: stdy	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x67]
+#CHECK: stdy	%f0, 524287(%r1,%r15)   # encoding: [0xed,0x01,0xff,0xff,0x7f,0x67]
+#CHECK: stdy	%f0, 524287(%r15,%r1)   # encoding: [0xed,0x0f,0x1f,0xff,0x7f,0x67]
+#CHECK: stdy	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x67]
+
+	stdy	%f0, -524288
+	stdy	%f0, -1
+	stdy	%f0, 0
+	stdy	%f0, 1
+	stdy	%f0, 524287
+	stdy	%f0, 0(%r1)
+	stdy	%f0, 0(%r15)
+	stdy	%f0, 524287(%r1,%r15)
+	stdy	%f0, 524287(%r15,%r1)
+	stdy	%f15, 0
diff --git a/test/MC/SystemZ/insn-stdy-02.s b/test/MC/SystemZ/insn-stdy-02.s
new file mode 100644
index 0000000..f9a09a5
--- /dev/null
+++ b/test/MC/SystemZ/insn-stdy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: stdy	%f0, -524289
+#CHECK: error: invalid operand
+#CHECK: stdy	%f0, 524288
+
+	stdy	%f0, -524289
+	stdy	%f0, 524288
diff --git a/test/MC/SystemZ/insn-ste-01.s b/test/MC/SystemZ/insn-ste-01.s
new file mode 100644
index 0000000..8e245df
--- /dev/null
+++ b/test/MC/SystemZ/insn-ste-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ste	%f0, 0                  # encoding: [0x70,0x00,0x00,0x00]
+#CHECK: ste	%f0, 4095               # encoding: [0x70,0x00,0x0f,0xff]
+#CHECK: ste	%f0, 0(%r1)             # encoding: [0x70,0x00,0x10,0x00]
+#CHECK: ste	%f0, 0(%r15)            # encoding: [0x70,0x00,0xf0,0x00]
+#CHECK: ste	%f0, 4095(%r1,%r15)     # encoding: [0x70,0x01,0xff,0xff]
+#CHECK: ste	%f0, 4095(%r15,%r1)     # encoding: [0x70,0x0f,0x1f,0xff]
+#CHECK: ste	%f15, 0                 # encoding: [0x70,0xf0,0x00,0x00]
+
+	ste	%f0, 0
+	ste	%f0, 4095
+	ste	%f0, 0(%r1)
+	ste	%f0, 0(%r15)
+	ste	%f0, 4095(%r1,%r15)
+	ste	%f0, 4095(%r15,%r1)
+	ste	%f15, 0
diff --git a/test/MC/SystemZ/insn-ste-02.s b/test/MC/SystemZ/insn-ste-02.s
new file mode 100644
index 0000000..acc50ea
--- /dev/null
+++ b/test/MC/SystemZ/insn-ste-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: ste	%f0, -1
+#CHECK: error: invalid operand
+#CHECK: ste	%f0, 4096
+
+	ste	%f0, -1
+	ste	%f0, 4096
diff --git a/test/MC/SystemZ/insn-stey-01.s b/test/MC/SystemZ/insn-stey-01.s
new file mode 100644
index 0000000..1f82593
--- /dev/null
+++ b/test/MC/SystemZ/insn-stey-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: stey	%f0, -524288            # encoding: [0xed,0x00,0x00,0x00,0x80,0x66]
+#CHECK: stey	%f0, -1                 # encoding: [0xed,0x00,0x0f,0xff,0xff,0x66]
+#CHECK: stey	%f0, 0                  # encoding: [0xed,0x00,0x00,0x00,0x00,0x66]
+#CHECK: stey	%f0, 1                  # encoding: [0xed,0x00,0x00,0x01,0x00,0x66]
+#CHECK: stey	%f0, 524287             # encoding: [0xed,0x00,0x0f,0xff,0x7f,0x66]
+#CHECK: stey	%f0, 0(%r1)             # encoding: [0xed,0x00,0x10,0x00,0x00,0x66]
+#CHECK: stey	%f0, 0(%r15)            # encoding: [0xed,0x00,0xf0,0x00,0x00,0x66]
+#CHECK: stey	%f0, 524287(%r1,%r15)   # encoding: [0xed,0x01,0xff,0xff,0x7f,0x66]
+#CHECK: stey	%f0, 524287(%r15,%r1)   # encoding: [0xed,0x0f,0x1f,0xff,0x7f,0x66]
+#CHECK: stey	%f15, 0                 # encoding: [0xed,0xf0,0x00,0x00,0x00,0x66]
+
+	stey	%f0, -524288
+	stey	%f0, -1
+	stey	%f0, 0
+	stey	%f0, 1
+	stey	%f0, 524287
+	stey	%f0, 0(%r1)
+	stey	%f0, 0(%r15)
+	stey	%f0, 524287(%r1,%r15)
+	stey	%f0, 524287(%r15,%r1)
+	stey	%f15, 0
diff --git a/test/MC/SystemZ/insn-stey-02.s b/test/MC/SystemZ/insn-stey-02.s
new file mode 100644
index 0000000..203b016
--- /dev/null
+++ b/test/MC/SystemZ/insn-stey-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: stey	%f0, -524289
+#CHECK: error: invalid operand
+#CHECK: stey	%f0, 524288
+
+	stey	%f0, -524289
+	stey	%f0, 524288
diff --git a/test/MC/SystemZ/insn-stg-01.s b/test/MC/SystemZ/insn-stg-01.s
new file mode 100644
index 0000000..e8508d9
--- /dev/null
+++ b/test/MC/SystemZ/insn-stg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: stg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x24]
+#CHECK: stg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x24]
+#CHECK: stg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x24]
+#CHECK: stg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x24]
+#CHECK: stg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x24]
+#CHECK: stg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x24]
+#CHECK: stg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x24]
+#CHECK: stg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x24]
+#CHECK: stg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x24]
+#CHECK: stg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x24]
+
+	stg	%r0, -524288
+	stg	%r0, -1
+	stg	%r0, 0
+	stg	%r0, 1
+	stg	%r0, 524287
+	stg	%r0, 0(%r1)
+	stg	%r0, 0(%r15)
+	stg	%r0, 524287(%r1,%r15)
+	stg	%r0, 524287(%r15,%r1)
+	stg	%r15, 0
diff --git a/test/MC/SystemZ/insn-stg-02.s b/test/MC/SystemZ/insn-stg-02.s
new file mode 100644
index 0000000..1214ad1
--- /dev/null
+++ b/test/MC/SystemZ/insn-stg-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: stg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: stg	%r0, 524288
+
+	stg	%r0, -524289
+	stg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-stgrl-01.s b/test/MC/SystemZ/insn-stgrl-01.s
new file mode 100644
index 0000000..729b01d
--- /dev/null
+++ b/test/MC/SystemZ/insn-stgrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: stgrl	%r0, 2864434397         # encoding: [0xc4,0x0b,0x55,0x5d,0xe6,0x6e]
+#CHECK: stgrl	%r15, 2864434397        # encoding: [0xc4,0xfb,0x55,0x5d,0xe6,0x6e]
+
+	stgrl	%r0,0xaabbccdd
+	stgrl	%r15,0xaabbccdd
+
+#CHECK: stgrl	%r0, foo                # encoding: [0xc4,0x0b,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: stgrl	%r15, foo               # encoding: [0xc4,0xfb,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	stgrl	%r0,foo
+	stgrl	%r15,foo
+
+#CHECK: stgrl	%r3, bar+100            # encoding: [0xc4,0x3b,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: stgrl	%r4, bar+100            # encoding: [0xc4,0x4b,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	stgrl	%r3,bar+100
+	stgrl	%r4,bar+100
+
+#CHECK: stgrl	%r7, frob@PLT           # encoding: [0xc4,0x7b,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: stgrl	%r8, frob@PLT           # encoding: [0xc4,0x8b,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	stgrl	%r7,frob@PLT
+	stgrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-sth-01.s b/test/MC/SystemZ/insn-sth-01.s
new file mode 100644
index 0000000..0dabe34
--- /dev/null
+++ b/test/MC/SystemZ/insn-sth-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sth	%r0, 0                  # encoding: [0x40,0x00,0x00,0x00]
+#CHECK: sth	%r0, 4095               # encoding: [0x40,0x00,0x0f,0xff]
+#CHECK: sth	%r0, 0(%r1)             # encoding: [0x40,0x00,0x10,0x00]
+#CHECK: sth	%r0, 0(%r15)            # encoding: [0x40,0x00,0xf0,0x00]
+#CHECK: sth	%r0, 4095(%r1,%r15)     # encoding: [0x40,0x01,0xff,0xff]
+#CHECK: sth	%r0, 4095(%r15,%r1)     # encoding: [0x40,0x0f,0x1f,0xff]
+#CHECK: sth	%r15, 0                 # encoding: [0x40,0xf0,0x00,0x00]
+
+	sth	%r0, 0
+	sth	%r0, 4095
+	sth	%r0, 0(%r1)
+	sth	%r0, 0(%r15)
+	sth	%r0, 4095(%r1,%r15)
+	sth	%r0, 4095(%r15,%r1)
+	sth	%r15, 0
diff --git a/test/MC/SystemZ/insn-sth-02.s b/test/MC/SystemZ/insn-sth-02.s
new file mode 100644
index 0000000..e73c289
--- /dev/null
+++ b/test/MC/SystemZ/insn-sth-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sth	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: sth	%r0, 4096
+
+	sth	%r0, -1
+	sth	%r0, 4096
diff --git a/test/MC/SystemZ/insn-sthrl-01.s b/test/MC/SystemZ/insn-sthrl-01.s
new file mode 100644
index 0000000..0bcdbd4
--- /dev/null
+++ b/test/MC/SystemZ/insn-sthrl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sthrl	%r0, 2864434397         # encoding: [0xc4,0x07,0x55,0x5d,0xe6,0x6e]
+#CHECK: sthrl	%r15, 2864434397        # encoding: [0xc4,0xf7,0x55,0x5d,0xe6,0x6e]
+
+	sthrl	%r0,0xaabbccdd
+	sthrl	%r15,0xaabbccdd
+
+#CHECK: sthrl	%r0, foo                # encoding: [0xc4,0x07,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: sthrl	%r15, foo               # encoding: [0xc4,0xf7,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	sthrl	%r0,foo
+	sthrl	%r15,foo
+
+#CHECK: sthrl	%r3, bar+100            # encoding: [0xc4,0x37,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: sthrl	%r4, bar+100            # encoding: [0xc4,0x47,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	sthrl	%r3,bar+100
+	sthrl	%r4,bar+100
+
+#CHECK: sthrl	%r7, frob@PLT           # encoding: [0xc4,0x77,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: sthrl	%r8, frob@PLT           # encoding: [0xc4,0x87,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	sthrl	%r7,frob@PLT
+	sthrl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-sthy-01.s b/test/MC/SystemZ/insn-sthy-01.s
new file mode 100644
index 0000000..259c5e1c
--- /dev/null
+++ b/test/MC/SystemZ/insn-sthy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sthy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x70]
+#CHECK: sthy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x70]
+#CHECK: sthy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x70]
+#CHECK: sthy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x70]
+#CHECK: sthy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x70]
+#CHECK: sthy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x70]
+#CHECK: sthy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x70]
+#CHECK: sthy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x70]
+#CHECK: sthy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x70]
+#CHECK: sthy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x70]
+
+	sthy	%r0, -524288
+	sthy	%r0, -1
+	sthy	%r0, 0
+	sthy	%r0, 1
+	sthy	%r0, 524287
+	sthy	%r0, 0(%r1)
+	sthy	%r0, 0(%r15)
+	sthy	%r0, 524287(%r1,%r15)
+	sthy	%r0, 524287(%r15,%r1)
+	sthy	%r15, 0
diff --git a/test/MC/SystemZ/insn-sthy-02.s b/test/MC/SystemZ/insn-sthy-02.s
new file mode 100644
index 0000000..0ad547b
--- /dev/null
+++ b/test/MC/SystemZ/insn-sthy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sthy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sthy	%r0, 524288
+
+	sthy	%r0, -524289
+	sthy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-stmg-01.s b/test/MC/SystemZ/insn-stmg-01.s
new file mode 100644
index 0000000..d189014
--- /dev/null
+++ b/test/MC/SystemZ/insn-stmg-01.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: stmg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x24]
+#CHECK: stmg	%r0, %r15, 0            # encoding: [0xeb,0x0f,0x00,0x00,0x00,0x24]
+#CHECK: stmg	%r14, %r15, 0           # encoding: [0xeb,0xef,0x00,0x00,0x00,0x24]
+#CHECK: stmg	%r15, %r15, 0           # encoding: [0xeb,0xff,0x00,0x00,0x00,0x24]
+#CHECK: stmg	%r0, %r0, -524288       # encoding: [0xeb,0x00,0x00,0x00,0x80,0x24]
+#CHECK: stmg	%r0, %r0, -1            # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x24]
+#CHECK: stmg	%r0, %r0, 0             # encoding: [0xeb,0x00,0x00,0x00,0x00,0x24]
+#CHECK: stmg	%r0, %r0, 1             # encoding: [0xeb,0x00,0x00,0x01,0x00,0x24]
+#CHECK: stmg	%r0, %r0, 524287        # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x24]
+#CHECK: stmg	%r0, %r0, 0(%r1)        # encoding: [0xeb,0x00,0x10,0x00,0x00,0x24]
+#CHECK: stmg	%r0, %r0, 0(%r15)       # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x24]
+#CHECK: stmg	%r0, %r0, 524287(%r1)   # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x24]
+#CHECK: stmg	%r0, %r0, 524287(%r15)  # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x24]
+
+	stmg	%r0,%r0,0
+	stmg	%r0,%r15,0
+	stmg	%r14,%r15,0
+	stmg	%r15,%r15,0
+	stmg	%r0,%r0,-524288
+	stmg	%r0,%r0,-1
+	stmg	%r0,%r0,0
+	stmg	%r0,%r0,1
+	stmg	%r0,%r0,524287
+	stmg	%r0,%r0,0(%r1)
+	stmg	%r0,%r0,0(%r15)
+	stmg	%r0,%r0,524287(%r1)
+	stmg	%r0,%r0,524287(%r15)
diff --git a/test/MC/SystemZ/insn-stmg-02.s b/test/MC/SystemZ/insn-stmg-02.s
new file mode 100644
index 0000000..342c38a
--- /dev/null
+++ b/test/MC/SystemZ/insn-stmg-02.s
@@ -0,0 +1,13 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: stmg	%r0, %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: stmg	%r0, %r0, 524288
+#CHECK: error: invalid use of indexed addressing
+#CHECK: stmg	%r0, %r0, 0(%r1,%r2)
+
+	stmg	%r0, %r0, -524289
+	stmg	%r0, %r0, 524288
+	stmg	%r0, %r0, 0(%r1,%r2)
diff --git a/test/MC/SystemZ/insn-strl-01.s b/test/MC/SystemZ/insn-strl-01.s
new file mode 100644
index 0000000..84bd41f
--- /dev/null
+++ b/test/MC/SystemZ/insn-strl-01.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: strl	%r0, 2864434397         # encoding: [0xc4,0x0f,0x55,0x5d,0xe6,0x6e]
+#CHECK: strl	%r15, 2864434397        # encoding: [0xc4,0xff,0x55,0x5d,0xe6,0x6e]
+
+	strl	%r0,0xaabbccdd
+	strl	%r15,0xaabbccdd
+
+#CHECK: strl	%r0, foo                # encoding: [0xc4,0x0f,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+#CHECK: strl	%r15, foo               # encoding: [0xc4,0xff,A,A,A,A]
+# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL
+
+	strl	%r0,foo
+	strl	%r15,foo
+
+#CHECK: strl	%r3, bar+100            # encoding: [0xc4,0x3f,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+#CHECK: strl	%r4, bar+100            # encoding: [0xc4,0x4f,A,A,A,A]
+# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL
+
+	strl	%r3,bar+100
+	strl	%r4,bar+100
+
+#CHECK: strl	%r7, frob@PLT           # encoding: [0xc4,0x7f,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+#CHECK: strl	%r8, frob@PLT           # encoding: [0xc4,0x8f,A,A,A,A]
+# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL
+
+	strl	%r7,frob@PLT
+	strl	%r8,frob@PLT
diff --git a/test/MC/SystemZ/insn-strv-01.s b/test/MC/SystemZ/insn-strv-01.s
new file mode 100644
index 0000000..6a818a8
--- /dev/null
+++ b/test/MC/SystemZ/insn-strv-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: strv	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x3e]
+#CHECK: strv	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x3e]
+#CHECK: strv	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x3e]
+#CHECK: strv	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x3e]
+#CHECK: strv	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x3e]
+#CHECK: strv	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x3e]
+#CHECK: strv	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x3e]
+#CHECK: strv	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x3e]
+#CHECK: strv	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x3e]
+#CHECK: strv	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x3e]
+
+	strv	%r0,-524288
+	strv	%r0,-1
+	strv	%r0,0
+	strv	%r0,1
+	strv	%r0,524287
+	strv	%r0,0(%r1)
+	strv	%r0,0(%r15)
+	strv	%r0,524287(%r1,%r15)
+	strv	%r0,524287(%r15,%r1)
+	strv	%r15,0
diff --git a/test/MC/SystemZ/insn-strv-02.s b/test/MC/SystemZ/insn-strv-02.s
new file mode 100644
index 0000000..24460ed
--- /dev/null
+++ b/test/MC/SystemZ/insn-strv-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: strv	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: strv	%r0, 524288
+
+	strv	%r0, -524289
+	strv	%r0, 524288
diff --git a/test/MC/SystemZ/insn-strvg-01.s b/test/MC/SystemZ/insn-strvg-01.s
new file mode 100644
index 0000000..6a4d49d
--- /dev/null
+++ b/test/MC/SystemZ/insn-strvg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: strvg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x2f]
+#CHECK: strvg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x2f]
+#CHECK: strvg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x2f]
+#CHECK: strvg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x2f]
+#CHECK: strvg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x2f]
+#CHECK: strvg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x2f]
+#CHECK: strvg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x2f]
+#CHECK: strvg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x2f]
+#CHECK: strvg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x2f]
+#CHECK: strvg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x2f]
+
+	strvg	%r0,-524288
+	strvg	%r0,-1
+	strvg	%r0,0
+	strvg	%r0,1
+	strvg	%r0,524287
+	strvg	%r0,0(%r1)
+	strvg	%r0,0(%r15)
+	strvg	%r0,524287(%r1,%r15)
+	strvg	%r0,524287(%r15,%r1)
+	strvg	%r15,0
diff --git a/test/MC/SystemZ/insn-strvg-02.s b/test/MC/SystemZ/insn-strvg-02.s
new file mode 100644
index 0000000..ebb0d5b
--- /dev/null
+++ b/test/MC/SystemZ/insn-strvg-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: strvg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: strvg	%r0, 524288
+
+	strvg	%r0, -524289
+	strvg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-sty-01.s b/test/MC/SystemZ/insn-sty-01.s
new file mode 100644
index 0000000..1ca2d5c
--- /dev/null
+++ b/test/MC/SystemZ/insn-sty-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sty	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x50]
+#CHECK: sty	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x50]
+#CHECK: sty	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x50]
+#CHECK: sty	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x50]
+#CHECK: sty	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x50]
+#CHECK: sty	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x50]
+#CHECK: sty	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x50]
+#CHECK: sty	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x50]
+#CHECK: sty	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x50]
+#CHECK: sty	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x50]
+
+	sty	%r0, -524288
+	sty	%r0, -1
+	sty	%r0, 0
+	sty	%r0, 1
+	sty	%r0, 524287
+	sty	%r0, 0(%r1)
+	sty	%r0, 0(%r15)
+	sty	%r0, 524287(%r1,%r15)
+	sty	%r0, 524287(%r15,%r1)
+	sty	%r15, 0
diff --git a/test/MC/SystemZ/insn-sty-02.s b/test/MC/SystemZ/insn-sty-02.s
new file mode 100644
index 0000000..fea7c08
--- /dev/null
+++ b/test/MC/SystemZ/insn-sty-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sty	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sty	%r0, 524288
+
+	sty	%r0, -524289
+	sty	%r0, 524288
diff --git a/test/MC/SystemZ/insn-sxbr-01.s b/test/MC/SystemZ/insn-sxbr-01.s
new file mode 100644
index 0000000..e7f4ed2
--- /dev/null
+++ b/test/MC/SystemZ/insn-sxbr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sxbr	%f0, %f0                # encoding: [0xb3,0x4b,0x00,0x00]
+#CHECK: sxbr	%f0, %f13               # encoding: [0xb3,0x4b,0x00,0x0d]
+#CHECK: sxbr	%f8, %f8                # encoding: [0xb3,0x4b,0x00,0x88]
+#CHECK: sxbr	%f13, %f0               # encoding: [0xb3,0x4b,0x00,0xd0]
+
+	sxbr	%f0, %f0
+	sxbr	%f0, %f13
+	sxbr	%f8, %f8
+	sxbr	%f13, %f0
diff --git a/test/MC/SystemZ/insn-sxbr-02.s b/test/MC/SystemZ/insn-sxbr-02.s
new file mode 100644
index 0000000..397238b
--- /dev/null
+++ b/test/MC/SystemZ/insn-sxbr-02.s
@@ -0,0 +1,17 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: sxbr	%f0, %f2
+#CHECK: error: invalid register
+#CHECK: sxbr	%f0, %f14
+#CHECK: error: invalid register
+#CHECK: sxbr	%f2, %f0
+#CHECK: error: invalid register
+#CHECK: sxbr	%f14, %f0
+
+	sxbr	%f0, %f2
+	sxbr	%f0, %f14
+	sxbr	%f2, %f0
+	sxbr	%f14, %f0
+
diff --git a/test/MC/SystemZ/insn-sy-01.s b/test/MC/SystemZ/insn-sy-01.s
new file mode 100644
index 0000000..bc56bd7
--- /dev/null
+++ b/test/MC/SystemZ/insn-sy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: sy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x5b]
+#CHECK: sy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x5b]
+#CHECK: sy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x5b]
+#CHECK: sy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x5b]
+#CHECK: sy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x5b]
+#CHECK: sy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x5b]
+#CHECK: sy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x5b]
+#CHECK: sy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x5b]
+#CHECK: sy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x5b]
+#CHECK: sy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x5b]
+
+	sy	%r0, -524288
+	sy	%r0, -1
+	sy	%r0, 0
+	sy	%r0, 1
+	sy	%r0, 524287
+	sy	%r0, 0(%r1)
+	sy	%r0, 0(%r15)
+	sy	%r0, 524287(%r1,%r15)
+	sy	%r0, 524287(%r15,%r1)
+	sy	%r15, 0
diff --git a/test/MC/SystemZ/insn-sy-02.s b/test/MC/SystemZ/insn-sy-02.s
new file mode 100644
index 0000000..7d64ca9
--- /dev/null
+++ b/test/MC/SystemZ/insn-sy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: sy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: sy	%r0, 524288
+
+	sy	%r0, -524289
+	sy	%r0, 524288
diff --git a/test/MC/SystemZ/insn-x-01.s b/test/MC/SystemZ/insn-x-01.s
new file mode 100644
index 0000000..a2e3a26
--- /dev/null
+++ b/test/MC/SystemZ/insn-x-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: x	%r0, 0                  # encoding: [0x57,0x00,0x00,0x00]
+#CHECK: x	%r0, 4095               # encoding: [0x57,0x00,0x0f,0xff]
+#CHECK: x	%r0, 0(%r1)             # encoding: [0x57,0x00,0x10,0x00]
+#CHECK: x	%r0, 0(%r15)            # encoding: [0x57,0x00,0xf0,0x00]
+#CHECK: x	%r0, 4095(%r1,%r15)     # encoding: [0x57,0x01,0xff,0xff]
+#CHECK: x	%r0, 4095(%r15,%r1)     # encoding: [0x57,0x0f,0x1f,0xff]
+#CHECK: x	%r15, 0                 # encoding: [0x57,0xf0,0x00,0x00]
+
+	x	%r0, 0
+	x	%r0, 4095
+	x	%r0, 0(%r1)
+	x	%r0, 0(%r15)
+	x	%r0, 4095(%r1,%r15)
+	x	%r0, 4095(%r15,%r1)
+	x	%r15, 0
diff --git a/test/MC/SystemZ/insn-x-02.s b/test/MC/SystemZ/insn-x-02.s
new file mode 100644
index 0000000..3719740
--- /dev/null
+++ b/test/MC/SystemZ/insn-x-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: x	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: x	%r0, 4096
+
+	x	%r0, -1
+	x	%r0, 4096
diff --git a/test/MC/SystemZ/insn-xg-01.s b/test/MC/SystemZ/insn-xg-01.s
new file mode 100644
index 0000000..6cf5e7e
--- /dev/null
+++ b/test/MC/SystemZ/insn-xg-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: xg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x82]
+#CHECK: xg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x82]
+#CHECK: xg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x82]
+#CHECK: xg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x82]
+#CHECK: xg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x82]
+#CHECK: xg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x82]
+#CHECK: xg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x82]
+#CHECK: xg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x82]
+#CHECK: xg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x82]
+#CHECK: xg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x82]
+
+	xg	%r0, -524288
+	xg	%r0, -1
+	xg	%r0, 0
+	xg	%r0, 1
+	xg	%r0, 524287
+	xg	%r0, 0(%r1)
+	xg	%r0, 0(%r15)
+	xg	%r0, 524287(%r1,%r15)
+	xg	%r0, 524287(%r15,%r1)
+	xg	%r15, 0
diff --git a/test/MC/SystemZ/insn-xg-02.s b/test/MC/SystemZ/insn-xg-02.s
new file mode 100644
index 0000000..0505b9f
--- /dev/null
+++ b/test/MC/SystemZ/insn-xg-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: xg	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: xg	%r0, 524288
+
+	xg	%r0, -524289
+	xg	%r0, 524288
diff --git a/test/MC/SystemZ/insn-xgr-01.s b/test/MC/SystemZ/insn-xgr-01.s
new file mode 100644
index 0000000..1a5a6d6
--- /dev/null
+++ b/test/MC/SystemZ/insn-xgr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: xgr	%r0, %r0                # encoding: [0xb9,0x82,0x00,0x00]
+#CHECK: xgr	%r0, %r15               # encoding: [0xb9,0x82,0x00,0x0f]
+#CHECK: xgr	%r15, %r0               # encoding: [0xb9,0x82,0x00,0xf0]
+#CHECK: xgr	%r7, %r8                # encoding: [0xb9,0x82,0x00,0x78]
+
+	xgr	%r0,%r0
+	xgr	%r0,%r15
+	xgr	%r15,%r0
+	xgr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-xi-01.s b/test/MC/SystemZ/insn-xi-01.s
new file mode 100644
index 0000000..2a7670c
--- /dev/null
+++ b/test/MC/SystemZ/insn-xi-01.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: xi	0, 0                    # encoding: [0x97,0x00,0x00,0x00]
+#CHECK: xi	4095, 0                 # encoding: [0x97,0x00,0x0f,0xff]
+#CHECK: xi	0, 255                  # encoding: [0x97,0xff,0x00,0x00]
+#CHECK: xi	0(%r1), 42              # encoding: [0x97,0x2a,0x10,0x00]
+#CHECK: xi	0(%r15), 42             # encoding: [0x97,0x2a,0xf0,0x00]
+#CHECK: xi	4095(%r1), 42           # encoding: [0x97,0x2a,0x1f,0xff]
+#CHECK: xi	4095(%r15), 42          # encoding: [0x97,0x2a,0xff,0xff]
+
+	xi	0, 0
+	xi	4095, 0
+	xi	0, 255
+	xi	0(%r1), 42
+	xi	0(%r15), 42
+	xi	4095(%r1), 42
+	xi	4095(%r15), 42
diff --git a/test/MC/SystemZ/insn-xi-02.s b/test/MC/SystemZ/insn-xi-02.s
new file mode 100644
index 0000000..a1ce668
--- /dev/null
+++ b/test/MC/SystemZ/insn-xi-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: xi	-1, 0
+#CHECK: error: invalid operand
+#CHECK: xi	4096, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: xi	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: xi	0, -1
+#CHECK: error: invalid operand
+#CHECK: xi	0, 256
+
+	xi	-1, 0
+	xi	4096, 0
+	xi	0(%r1,%r2), 0
+	xi	0, -1
+	xi	0, 256
diff --git a/test/MC/SystemZ/insn-xihf-01.s b/test/MC/SystemZ/insn-xihf-01.s
new file mode 100644
index 0000000..ad2ec19
--- /dev/null
+++ b/test/MC/SystemZ/insn-xihf-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: xihf	%r0, 0                  # encoding: [0xc0,0x06,0x00,0x00,0x00,0x00]
+#CHECK: xihf	%r0, 4294967295         # encoding: [0xc0,0x06,0xff,0xff,0xff,0xff]
+#CHECK: xihf	%r15, 0                 # encoding: [0xc0,0xf6,0x00,0x00,0x00,0x00]
+
+	xihf	%r0, 0
+	xihf	%r0, 0xffffffff
+	xihf	%r15, 0
diff --git a/test/MC/SystemZ/insn-xihf-02.s b/test/MC/SystemZ/insn-xihf-02.s
new file mode 100644
index 0000000..945993b
--- /dev/null
+++ b/test/MC/SystemZ/insn-xihf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: xihf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: xihf	%r0, 1 << 32
+
+	xihf	%r0, -1
+	xihf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-xilf-01.s b/test/MC/SystemZ/insn-xilf-01.s
new file mode 100644
index 0000000..475e57332
--- /dev/null
+++ b/test/MC/SystemZ/insn-xilf-01.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: xilf	%r0, 0                  # encoding: [0xc0,0x07,0x00,0x00,0x00,0x00]
+#CHECK: xilf	%r0, 4294967295         # encoding: [0xc0,0x07,0xff,0xff,0xff,0xff]
+#CHECK: xilf	%r15, 0                 # encoding: [0xc0,0xf7,0x00,0x00,0x00,0x00]
+
+	xilf	%r0, 0
+	xilf	%r0, 0xffffffff
+	xilf	%r15, 0
diff --git a/test/MC/SystemZ/insn-xilf-02.s b/test/MC/SystemZ/insn-xilf-02.s
new file mode 100644
index 0000000..df02b7c
--- /dev/null
+++ b/test/MC/SystemZ/insn-xilf-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: xilf	%r0, -1
+#CHECK: error: invalid operand
+#CHECK: xilf	%r0, 1 << 32
+
+	xilf	%r0, -1
+	xilf	%r0, 1 << 32
diff --git a/test/MC/SystemZ/insn-xiy-01.s b/test/MC/SystemZ/insn-xiy-01.s
new file mode 100644
index 0000000..c329ce0
--- /dev/null
+++ b/test/MC/SystemZ/insn-xiy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: xiy	-524288, 0              # encoding: [0xeb,0x00,0x00,0x00,0x80,0x57]
+#CHECK: xiy	-1, 0                   # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x57]
+#CHECK: xiy	0, 0                    # encoding: [0xeb,0x00,0x00,0x00,0x00,0x57]
+#CHECK: xiy	1, 0                    # encoding: [0xeb,0x00,0x00,0x01,0x00,0x57]
+#CHECK: xiy	524287, 0               # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x57]
+#CHECK: xiy	0, 255                  # encoding: [0xeb,0xff,0x00,0x00,0x00,0x57]
+#CHECK: xiy	0(%r1), 42              # encoding: [0xeb,0x2a,0x10,0x00,0x00,0x57]
+#CHECK: xiy	0(%r15), 42             # encoding: [0xeb,0x2a,0xf0,0x00,0x00,0x57]
+#CHECK: xiy	524287(%r1), 42         # encoding: [0xeb,0x2a,0x1f,0xff,0x7f,0x57]
+#CHECK: xiy	524287(%r15), 42        # encoding: [0xeb,0x2a,0xff,0xff,0x7f,0x57]
+
+	xiy	-524288, 0
+	xiy	-1, 0
+	xiy	0, 0
+	xiy	1, 0
+	xiy	524287, 0
+	xiy	0, 255
+	xiy	0(%r1), 42
+	xiy	0(%r15), 42
+	xiy	524287(%r1), 42
+	xiy	524287(%r15), 42
diff --git a/test/MC/SystemZ/insn-xiy-02.s b/test/MC/SystemZ/insn-xiy-02.s
new file mode 100644
index 0000000..519c26c
--- /dev/null
+++ b/test/MC/SystemZ/insn-xiy-02.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: xiy	-524289, 0
+#CHECK: error: invalid operand
+#CHECK: xiy	524288, 0
+#CHECK: error: invalid use of indexed addressing
+#CHECK: xiy	0(%r1,%r2), 0
+#CHECK: error: invalid operand
+#CHECK: xiy	0, -1
+#CHECK: error: invalid operand
+#CHECK: xiy	0, 256
+
+	xiy	-524289, 0
+	xiy	524288, 0
+	xiy	0(%r1,%r2), 0
+	xiy	0, -1
+	xiy	0, 256
diff --git a/test/MC/SystemZ/insn-xr-01.s b/test/MC/SystemZ/insn-xr-01.s
new file mode 100644
index 0000000..471e6a6
--- /dev/null
+++ b/test/MC/SystemZ/insn-xr-01.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: xr	%r0, %r0                # encoding: [0x17,0x00]
+#CHECK: xr	%r0, %r15               # encoding: [0x17,0x0f]
+#CHECK: xr	%r15, %r0               # encoding: [0x17,0xf0]
+#CHECK: xr	%r7, %r8                # encoding: [0x17,0x78]
+
+	xr	%r0,%r0
+	xr	%r0,%r15
+	xr	%r15,%r0
+	xr	%r7,%r8
diff --git a/test/MC/SystemZ/insn-xy-01.s b/test/MC/SystemZ/insn-xy-01.s
new file mode 100644
index 0000000..132db04
--- /dev/null
+++ b/test/MC/SystemZ/insn-xy-01.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: xy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x57]
+#CHECK: xy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x57]
+#CHECK: xy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x57]
+#CHECK: xy	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x57]
+#CHECK: xy	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x57]
+#CHECK: xy	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x57]
+#CHECK: xy	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x57]
+#CHECK: xy	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x57]
+#CHECK: xy	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x57]
+#CHECK: xy	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x57]
+
+	xy	%r0, -524288
+	xy	%r0, -1
+	xy	%r0, 0
+	xy	%r0, 1
+	xy	%r0, 524287
+	xy	%r0, 0(%r1)
+	xy	%r0, 0(%r15)
+	xy	%r0, 524287(%r1,%r15)
+	xy	%r0, 524287(%r15,%r1)
+	xy	%r15, 0
diff --git a/test/MC/SystemZ/insn-xy-02.s b/test/MC/SystemZ/insn-xy-02.s
new file mode 100644
index 0000000..6ba3bad
--- /dev/null
+++ b/test/MC/SystemZ/insn-xy-02.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid operand
+#CHECK: xy	%r0, -524289
+#CHECK: error: invalid operand
+#CHECK: xy	%r0, 524288
+
+	xy	%r0, -524289
+	xy	%r0, 524288
diff --git a/test/MC/SystemZ/lit.local.cfg b/test/MC/SystemZ/lit.local.cfg
new file mode 100644
index 0000000..abb6974
--- /dev/null
+++ b/test/MC/SystemZ/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'SystemZ' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/SystemZ/regs-01.s b/test/MC/SystemZ/regs-01.s
new file mode 100644
index 0000000..df11fee
--- /dev/null
+++ b/test/MC/SystemZ/regs-01.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lr	%r0, %r1                # encoding: [0x18,0x01]
+#CHECK: lr	%r2, %r3                # encoding: [0x18,0x23]
+#CHECK: lr	%r4, %r5                # encoding: [0x18,0x45]
+#CHECK: lr	%r6, %r7                # encoding: [0x18,0x67]
+#CHECK: lr	%r8, %r9                # encoding: [0x18,0x89]
+#CHECK: lr	%r10, %r11              # encoding: [0x18,0xab]
+#CHECK: lr	%r12, %r13              # encoding: [0x18,0xcd]
+#CHECK: lr	%r14, %r15              # encoding: [0x18,0xef]
+
+	lr	%r0,%r1
+	lr	%r2,%r3
+	lr	%r4,%r5
+	lr	%r6,%r7
+	lr	%r8,%r9
+	lr	%r10,%r11
+	lr	%r12,%r13
+	lr	%r14,%r15
diff --git a/test/MC/SystemZ/regs-02.s b/test/MC/SystemZ/regs-02.s
new file mode 100644
index 0000000..baaa0f9
--- /dev/null
+++ b/test/MC/SystemZ/regs-02.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lgr	%r0, %r1                # encoding: [0xb9,0x04,0x00,0x01]
+#CHECK: lgr	%r2, %r3                # encoding: [0xb9,0x04,0x00,0x23]
+#CHECK: lgr	%r4, %r5                # encoding: [0xb9,0x04,0x00,0x45]
+#CHECK: lgr	%r6, %r7                # encoding: [0xb9,0x04,0x00,0x67]
+#CHECK: lgr	%r8, %r9                # encoding: [0xb9,0x04,0x00,0x89]
+#CHECK: lgr	%r10, %r11              # encoding: [0xb9,0x04,0x00,0xab]
+#CHECK: lgr	%r12, %r13              # encoding: [0xb9,0x04,0x00,0xcd]
+#CHECK: lgr	%r14, %r15              # encoding: [0xb9,0x04,0x00,0xef]
+
+	lgr	%r0,%r1
+	lgr	%r2,%r3
+	lgr	%r4,%r5
+	lgr	%r6,%r7
+	lgr	%r8,%r9
+	lgr	%r10,%r11
+	lgr	%r12,%r13
+	lgr	%r14,%r15
diff --git a/test/MC/SystemZ/regs-03.s b/test/MC/SystemZ/regs-03.s
new file mode 100644
index 0000000..6ced415
--- /dev/null
+++ b/test/MC/SystemZ/regs-03.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: dlr	%r0, %r0                # encoding: [0xb9,0x97,0x00,0x00]
+#CHECK: dlr	%r2, %r0                # encoding: [0xb9,0x97,0x00,0x20]
+#CHECK: dlr	%r4, %r0                # encoding: [0xb9,0x97,0x00,0x40]
+#CHECK: dlr	%r6, %r0                # encoding: [0xb9,0x97,0x00,0x60]
+#CHECK: dlr	%r8, %r0                # encoding: [0xb9,0x97,0x00,0x80]
+#CHECK: dlr	%r10, %r0               # encoding: [0xb9,0x97,0x00,0xa0]
+#CHECK: dlr	%r12, %r0               # encoding: [0xb9,0x97,0x00,0xc0]
+#CHECK: dlr	%r14, %r0               # encoding: [0xb9,0x97,0x00,0xe0]
+
+	dlr	%r0,%r0
+	dlr	%r2,%r0
+	dlr	%r4,%r0
+	dlr	%r6,%r0
+	dlr	%r8,%r0
+	dlr	%r10,%r0
+	dlr	%r12,%r0
+	dlr	%r14,%r0
diff --git a/test/MC/SystemZ/regs-04.s b/test/MC/SystemZ/regs-04.s
new file mode 100644
index 0000000..a2da671
--- /dev/null
+++ b/test/MC/SystemZ/regs-04.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ler	%f0, %f1                # encoding: [0x38,0x01]
+#CHECK: ler	%f2, %f3                # encoding: [0x38,0x23]
+#CHECK: ler	%f4, %f5                # encoding: [0x38,0x45]
+#CHECK: ler	%f6, %f7                # encoding: [0x38,0x67]
+#CHECK: ler	%f8, %f9                # encoding: [0x38,0x89]
+#CHECK: ler	%f10, %f11              # encoding: [0x38,0xab]
+#CHECK: ler	%f12, %f13              # encoding: [0x38,0xcd]
+#CHECK: ler	%f14, %f15              # encoding: [0x38,0xef]
+
+	ler	%f0,%f1
+	ler	%f2,%f3
+	ler	%f4,%f5
+	ler	%f6,%f7
+	ler	%f8,%f9
+	ler	%f10,%f11
+	ler	%f12,%f13
+	ler	%f14,%f15
diff --git a/test/MC/SystemZ/regs-05.s b/test/MC/SystemZ/regs-05.s
new file mode 100644
index 0000000..b5f50b5
--- /dev/null
+++ b/test/MC/SystemZ/regs-05.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: ldr	%f0, %f1                # encoding: [0x28,0x01]
+#CHECK: ldr	%f2, %f3                # encoding: [0x28,0x23]
+#CHECK: ldr	%f4, %f5                # encoding: [0x28,0x45]
+#CHECK: ldr	%f6, %f7                # encoding: [0x28,0x67]
+#CHECK: ldr	%f8, %f9                # encoding: [0x28,0x89]
+#CHECK: ldr	%f10, %f11              # encoding: [0x28,0xab]
+#CHECK: ldr	%f12, %f13              # encoding: [0x28,0xcd]
+#CHECK: ldr	%f14, %f15              # encoding: [0x28,0xef]
+
+	ldr	%f0,%f1
+	ldr	%f2,%f3
+	ldr	%f4,%f5
+	ldr	%f6,%f7
+	ldr	%f8,%f9
+	ldr	%f10,%f11
+	ldr	%f12,%f13
+	ldr	%f14,%f15
diff --git a/test/MC/SystemZ/regs-06.s b/test/MC/SystemZ/regs-06.s
new file mode 100644
index 0000000..43bf38c
--- /dev/null
+++ b/test/MC/SystemZ/regs-06.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple s390x-linux-gnu -show-encoding %s | FileCheck %s
+
+#CHECK: lxr	%f0, %f1                # encoding: [0xb3,0x65,0x00,0x01]
+#CHECK: lxr	%f4, %f5                # encoding: [0xb3,0x65,0x00,0x45]
+#CHECK: lxr	%f8, %f9                # encoding: [0xb3,0x65,0x00,0x89]
+#CHECK: lxr	%f12, %f13              # encoding: [0xb3,0x65,0x00,0xcd]
+
+	lxr	%f0,%f1
+	lxr	%f4,%f5
+	lxr	%f8,%f9
+	lxr	%f12,%f13
diff --git a/test/MC/SystemZ/regs-07.s b/test/MC/SystemZ/regs-07.s
new file mode 100644
index 0000000..d3585a6
--- /dev/null
+++ b/test/MC/SystemZ/regs-07.s
@@ -0,0 +1,28 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: lr	%r16,%r1
+#CHECK: error: invalid register
+#CHECK: lr	%f0,%r1
+#CHECK: error: invalid register
+#CHECK: lr	%a0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: lr	%arid,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: lr	%0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: lr	0,%r1
+#CHECK: error: unknown token in expression
+#CHECK: lr	(%r0),%r1
+#CHECK: error: unknown token in expression
+#CHECK: lr	%,%r1
+
+	lr	%r16,%r1
+	lr	%f0,%r1
+	lr	%a0,%r1
+	lr	%arid,%r1
+	lr	%0,%r1
+	lr	0,%r1
+	lr	(%r0),%r1
+	lr	%,%r1
diff --git a/test/MC/SystemZ/regs-08.s b/test/MC/SystemZ/regs-08.s
new file mode 100644
index 0000000..f11c457
--- /dev/null
+++ b/test/MC/SystemZ/regs-08.s
@@ -0,0 +1,28 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: lgr	%r16,%r1
+#CHECK: error: invalid register
+#CHECK: lgr	%f0,%r1
+#CHECK: error: invalid register
+#CHECK: lgr	%a0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: lgr	%arid,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: lgr	%0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: lgr	0,%r1
+#CHECK: error: unknown token in expression
+#CHECK: lgr	(%r0),%r1
+#CHECK: error: unknown token in expression
+#CHECK: lgr	%,%r1
+
+	lgr	%r16,%r1
+	lgr	%f0,%r1
+	lgr	%a0,%r1
+	lgr	%arid,%r1
+	lgr	%0,%r1
+	lgr	0,%r1
+	lgr	(%r0),%r1
+	lgr	%,%r1
diff --git a/test/MC/SystemZ/regs-09.s b/test/MC/SystemZ/regs-09.s
new file mode 100644
index 0000000..60f4d39
--- /dev/null
+++ b/test/MC/SystemZ/regs-09.s
@@ -0,0 +1,31 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: dlr	%r1,%r8
+#CHECK: error: invalid register
+#CHECK: dlr	%r16,%r1
+#CHECK: error: invalid register
+#CHECK: dlr	%f0,%r1
+#CHECK: error: invalid register
+#CHECK: dlr	%a0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: dlr	%arid,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: dlr	%0,%r1
+#CHECK: error: invalid operand for instruction
+#CHECK: dlr	0,%r1
+#CHECK: error: unknown token in expression
+#CHECK: dlr	(%r0),%r1
+#CHECK: error: unknown token in expression
+#CHECK: dlr	%,%r1
+
+	dlr	%r1,%r8
+	dlr	%r16,%r1
+	dlr	%f0,%r1
+	dlr	%a0,%r1
+	dlr	%arid,%r1
+	dlr	%0,%r1
+	dlr	0,%r1
+	dlr	(%r0),%r1
+	dlr	%,%r1
diff --git a/test/MC/SystemZ/regs-10.s b/test/MC/SystemZ/regs-10.s
new file mode 100644
index 0000000..865aa82
--- /dev/null
+++ b/test/MC/SystemZ/regs-10.s
@@ -0,0 +1,28 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: ler	%f1,%f16
+#CHECK: error: invalid register
+#CHECK: ler	%f1,%r0
+#CHECK: error: invalid register
+#CHECK: ler	%f1,%a0
+#CHECK: error: invalid operand for instruction
+#CHECK: ler	%f1,%fly
+#CHECK: error: invalid operand for instruction
+#CHECK: ler	%f1,%0
+#CHECK: error: invalid operand for instruction
+#CHECK: ler	%f1,0
+#CHECK: error: unknown token in expression
+#CHECK: ler	%f1,(%f0)
+#CHECK: error: unknown token in expression
+#CHECK: ler	%f1,%
+
+	ler	%f1,%f16
+	ler	%f1,%r0
+	ler	%f1,%a0
+	ler	%f1,%fly
+	ler	%f1,%0
+	ler	%f1,0
+	ler	%f1,(%f0)
+	ler	%f1,%
diff --git a/test/MC/SystemZ/regs-11.s b/test/MC/SystemZ/regs-11.s
new file mode 100644
index 0000000..5d0f04f
--- /dev/null
+++ b/test/MC/SystemZ/regs-11.s
@@ -0,0 +1,28 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: ldr	%f1,%f16
+#CHECK: error: invalid register
+#CHECK: ldr	%f1,%r0
+#CHECK: error: invalid register
+#CHECK: ldr	%f1,%a0
+#CHECK: error: invalid operand for instruction
+#CHECK: ldr	%f1,%fly
+#CHECK: error: invalid operand for instruction
+#CHECK: ldr	%f1,%0
+#CHECK: error: invalid operand for instruction
+#CHECK: ldr	%f1,0
+#CHECK: error: unknown token in expression
+#CHECK: ldr	%f1,(%f0)
+#CHECK: error: unknown token in expression
+#CHECK: ldr	%f1,%
+
+	ldr	%f1,%f16
+	ldr	%f1,%r0
+	ldr	%f1,%a0
+	ldr	%f1,%fly
+	ldr	%f1,%0
+	ldr	%f1,0
+	ldr	%f1,(%f0)
+	ldr	%f1,%
diff --git a/test/MC/SystemZ/regs-12.s b/test/MC/SystemZ/regs-12.s
new file mode 100644
index 0000000..f6cf0e7
--- /dev/null
+++ b/test/MC/SystemZ/regs-12.s
@@ -0,0 +1,31 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: lxr	%f1,%f2
+#CHECK: error: invalid register
+#CHECK: lxr	%f1,%f16
+#CHECK: error: invalid register
+#CHECK: lxr	%f1,%r0
+#CHECK: error: invalid register
+#CHECK: lxr	%f1,%a0
+#CHECK: error: invalid operand for instruction
+#CHECK: lxr	%f1,%fly
+#CHECK: error: invalid operand for instruction
+#CHECK: lxr	%f1,%0
+#CHECK: error: invalid operand for instruction
+#CHECK: lxr	%f1,0
+#CHECK: error: unknown token in expression
+#CHECK: lxr	%f1,(%f0)
+#CHECK: error: unknown token in expression
+#CHECK: lxr	%f1,%
+
+	lxr	%f1,%f2
+	lxr	%f1,%f16
+	lxr	%f1,%r0
+	lxr	%f1,%a0
+	lxr	%f1,%fly
+	lxr	%f1,%0
+	lxr	%f1,0
+	lxr	%f1,(%f0)
+	lxr	%f1,%
diff --git a/test/MC/SystemZ/regs-13.s b/test/MC/SystemZ/regs-13.s
new file mode 100644
index 0000000..88b0c05
--- /dev/null
+++ b/test/MC/SystemZ/regs-13.s
@@ -0,0 +1,69 @@
+# RUN: llvm-mc -triple s390x-linux-gnu < %s | FileCheck %s
+
+#CHECK: .cfi_offset %r0, 0
+#CHECK: .cfi_offset %r1, 8
+#CHECK: .cfi_offset %r2, 16
+#CHECK: .cfi_offset %r3, 24
+#CHECK: .cfi_offset %r4, 32
+#CHECK: .cfi_offset %r5, 40
+#CHECK: .cfi_offset %r6, 48
+#CHECK: .cfi_offset %r7, 56
+#CHECK: .cfi_offset %r8, 64
+#CHECK: .cfi_offset %r9, 72
+#CHECK: .cfi_offset %r10, 80
+#CHECK: .cfi_offset %r11, 88
+#CHECK: .cfi_offset %r12, 96
+#CHECK: .cfi_offset %r13, 104
+#CHECK: .cfi_offset %r14, 112
+#CHECK: .cfi_offset %r15, 120
+#CHECK: .cfi_offset %f0, 128
+#CHECK: .cfi_offset %f1, 136
+#CHECK: .cfi_offset %f2, 144
+#CHECK: .cfi_offset %f3, 152
+#CHECK: .cfi_offset %f4, 160
+#CHECK: .cfi_offset %f5, 168
+#CHECK: .cfi_offset %f6, 176
+#CHECK: .cfi_offset %f7, 184
+#CHECK: .cfi_offset %f8, 192
+#CHECK: .cfi_offset %f9, 200
+#CHECK: .cfi_offset %f10, 208
+#CHECK: .cfi_offset %f11, 216
+#CHECK: .cfi_offset %f12, 224
+#CHECK: .cfi_offset %f13, 232
+#CHECK: .cfi_offset %f14, 240
+#CHECK: .cfi_offset %f15, 248
+
+	.cfi_startproc
+	.cfi_offset %r0,0
+	.cfi_offset %r1,8
+	.cfi_offset %r2,16
+	.cfi_offset %r3,24
+	.cfi_offset %r4,32
+	.cfi_offset %r5,40
+	.cfi_offset %r6,48
+	.cfi_offset %r7,56
+	.cfi_offset %r8,64
+	.cfi_offset %r9,72
+	.cfi_offset %r10,80
+	.cfi_offset %r11,88
+	.cfi_offset %r12,96
+	.cfi_offset %r13,104
+	.cfi_offset %r14,112
+	.cfi_offset %r15,120
+	.cfi_offset %f0,128
+	.cfi_offset %f1,136
+	.cfi_offset %f2,144
+	.cfi_offset %f3,152
+	.cfi_offset %f4,160
+	.cfi_offset %f5,168
+	.cfi_offset %f6,176
+	.cfi_offset %f7,184
+	.cfi_offset %f8,192
+	.cfi_offset %f9,200
+	.cfi_offset %f10,208
+	.cfi_offset %f11,216
+	.cfi_offset %f12,224
+	.cfi_offset %f13,232
+	.cfi_offset %f14,240
+	.cfi_offset %f15,248
+	.cfi_endproc
diff --git a/test/MC/SystemZ/regs-14.s b/test/MC/SystemZ/regs-14.s
new file mode 100644
index 0000000..e22307d
--- /dev/null
+++ b/test/MC/SystemZ/regs-14.s
@@ -0,0 +1,18 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register
+#CHECK: .cfi_offset %a0,0
+#CHECK: error: register expected
+#CHECK: .cfi_offset %foo,0
+#CHECK: error: register expected
+#CHECK: .cfi_offset %,0
+#CHECK: error: register expected
+#CHECK: .cfi_offset r0,0
+
+	.cfi_startproc
+	.cfi_offset %a0,0
+	.cfi_offset %foo,0
+	.cfi_offset %,0
+	.cfi_offset r0,0
+	.cfi_endproc
diff --git a/test/MC/SystemZ/regs-15.s b/test/MC/SystemZ/regs-15.s
new file mode 100644
index 0000000..baec6a6
--- /dev/null
+++ b/test/MC/SystemZ/regs-15.s
@@ -0,0 +1,19 @@
+# RUN: not llvm-mc -triple s390x-linux-gnu < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: %r0 used in an address
+#CHECK: sll	%r2,8(%r0)
+#CHECK: error: %r0 used in an address
+#CHECK: br	%r0
+#CHECK: error: %r0 used in an address
+#CHECK: l	%r1,8(%r0)
+#CHECK: error: %r0 used in an address
+#CHECK: l	%r1,8(%r0,%r15)
+#CHECK: error: %r0 used in an address
+#CHECK: l	%r1,8(%r15,%r0)
+
+	sll	%r2,8(%r0)
+	br	%r0
+	l	%r1,8(%r0)
+	l	%r1,8(%r0,%r15)
+	l	%r1,8(%r15,%r0)
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 8bfa58a..b2f337d 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -247,4 +247,79 @@ _main:
     mov [16][eax][ebx*4], ecx
 // CHECK: movl %ecx, -16(%eax,%ebx,4)
     mov [eax][ebx*4 - 16], ecx
-	ret
+
+// CHECK: prefetchnta 12800(%esi)
+    prefetchnta [esi + (200*64)]
+// CHECK: prefetchnta 32(%esi)
+    prefetchnta [esi + (64/2)]
+// CHECK: prefetchnta 128(%esi)
+    prefetchnta [esi + (64/2*4)]
+// CHECK: prefetchnta 8(%esi)
+    prefetchnta [esi + (64/(2*4))]
+// CHECK: prefetchnta 48(%esi)
+    prefetchnta [esi + (64/(2*4)+40)]
+
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax][ebx*4 - 2*8], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax][4*ebx - 2*8], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax + 4*ebx - 2*8], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [12 + eax + (4*ebx) - 2*14], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax][ebx*4 - 2*2*2*2], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax][ebx*4 - (2*8)], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax][ebx*4 - 2 * 8 + 4 - 4], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax + ebx*4 - 2 * 8 + 4 - 4], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax + ebx*4 - 2 * ((8 + 4) - 4)], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [-2 * ((8 + 4) - 4) + eax + ebx*4], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [((-2) * ((8 + 4) - 4)) + eax + ebx*4], ecx
+// CHECK: movl %ecx, -16(%eax,%ebx,4)
+    mov [eax + ((-2) * ((8 + 4) - 4)) + ebx*4], ecx
+// CHECK: movl %ecx, 96(%eax,%ebx,4)
+    mov [eax + ((-2) * ((8 + 4) * -4)) + ebx*4], ecx
+// CHECK: movl %ecx, -8(%eax,%ebx,4)
+    mov [eax][-8][ebx*4], ecx
+// CHECK: movl %ecx, -2(%eax,%ebx,4)
+    mov [eax][16/-8][ebx*4], ecx
+// CHECK: movl %ecx, -2(%eax,%ebx,4)
+    mov [eax][(16)/-8][ebx*4], ecx
+
+// CHECK: setb %al
+    setc al
+// CHECK: sete %al
+    setz al
+// CHECK: setbe %al
+    setna al
+// CHECK: setae %al
+    setnb al
+// CHECK: setae %al
+    setnc al
+// CHECK: setle %al
+    setng al
+// CHECK: setge %al
+    setnl al
+// CHECK: setne %al
+    setnz al
+// CHECK: setp %al
+    setpe al
+// CHECK: setnp %al
+    setpo al
+// CHECK: setb %al
+    setnae al
+// CHECK: seta %al
+    setnbe al
+// CHECK: setl %al
+    setnge al
+// CHECK: setg %al
+    setnle al
+// CHECK: jne _foo
+    jnz _foo
+    ret
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index c5f1d15..521a077 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -1228,3 +1228,11 @@ sysexitl
 // CHECK: sysexitq
 // CHECK: encoding: [0x48,0x0f,0x35]
 sysexitq
+
+// CHECK: clac
+// CHECK: encoding: [0x0f,0x01,0xca]
+clac
+
+// CHECK: stac
+// CHECK: encoding: [0x0f,0x01,0xcb]
+stac
diff --git a/test/Makefile b/test/Makefile
index b476951..88573c5 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -100,13 +100,6 @@ check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-site-cfgs
 clean::
 	$(RM) -rf `find $(LLVM_OBJ_ROOT)/test -name Output -type d -print`
 
-# dsymutil is used on the Darwin to manipulate DWARF debugging information.
-ifeq ($(TARGET_OS),Darwin)
-DSYMUTIL=dsymutil
-else
-DSYMUTIL=true
-endif
-
 ifneq ($(OCAMLOPT),)
 CC_FOR_OCAMLOPT := $(shell $(OCAMLOPT) -config | grep native_c_compiler | sed -e 's/native_c_compiler: //')
 CXX_FOR_OCAMLOPT := $(subst gcc,g++,$(CC_FOR_OCAMLOPT))
@@ -132,7 +125,7 @@ endif
 
 lit.site.cfg: FORCE
 	@echo "Making LLVM 'lit.site.cfg' file..."
-	@$(ECHOPATH) s=@LLVM_HOSTTRIPLE@=$(HOST_TRIPLE)=g > lit.tmp
+	@$(ECHOPATH) s=@LLVM_HOST_TRIPLE@=$(HOST_TRIPLE)=g > lit.tmp
 	@$(ECHOPATH) s=@TARGET_TRIPLE@=$(TARGET_TRIPLE)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> lit.tmp
@@ -148,6 +141,7 @@ lit.site.cfg: FORCE
 	@$(ECHOPATH) s=@LLVM_BINDINGS@=$(BINDINGS_TO_BUILD)=g >> lit.tmp
 	@$(ECHOPATH) s=@HOST_OS@=$(HOST_OS)=g >> lit.tmp
 	@$(ECHOPATH) s=@HOST_ARCH@=$(HOST_ARCH)=g >> lit.tmp
+	@$(ECHOPATH) s=@HAVE_LIBZ@=$(HAVE_LIBZ)=g >> lit.tmp
 	@sed -f lit.tmp $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
 	@-rm -f lit.tmp
 
diff --git a/test/Makefile.tests b/test/Makefile.tests
index aeb5871..c60c90c 100644
--- a/test/Makefile.tests
+++ b/test/Makefile.tests
@@ -38,7 +38,7 @@ LCCFLAGS  += -O2 -Wall
 LCXXFLAGS += -O2 -Wall
 LLCFLAGS =
 TESTRUNR = @echo Running test: $<; \
-             PATH="$(LLVMTOOLCURRENT):$(LLVM_SRC_ROOT)/test/Scripts:$(PATH)" \
+             PATH="$(LLVMTOOLCURRENT):$(PATH)" \
                   $(LLVM_SRC_ROOT)/test/TestRunner.sh
 
 LLCLIBS := $(LLCLIBS) -lm
diff --git a/test/Object/ARM/lit.local.cfg b/test/Object/ARM/lit.local.cfg
new file mode 100644
index 0000000..5fc35d8
--- /dev/null
+++ b/test/Object/ARM/lit.local.cfg
@@ -0,0 +1,3 @@
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
diff --git a/test/Object/ARM/objdump-thumb.test b/test/Object/ARM/objdump-thumb.test
new file mode 100644
index 0000000..9c92a27
--- /dev/null
+++ b/test/Object/ARM/objdump-thumb.test
@@ -0,0 +1,4 @@
+RUN: llvm-objdump -d -macho -triple=thumbv7-apple-ios \
+RUN: %p/../Inputs/macho-text.thumb | FileCheck %s
+
+CHECK: 0:	00 bf                                        	nop
diff --git a/test/Object/Inputs/COFF/i386.yaml b/test/Object/Inputs/COFF/i386.yaml
index aec7a58..f763182 100644
--- a/test/Object/Inputs/COFF/i386.yaml
+++ b/test/Object/Inputs/COFF/i386.yaml
@@ -5,7 +5,8 @@ header: !Header
 sections:
   - !Section
     Name: .text
-    Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_ALIGN_16BYTES, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
+    Alignment: 16
+    Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
     SectionData:  !hex "83EC0CC744240800000000C7042400000000E800000000E8000000008B44240883C40CC3" # |....D$.......$...............D$.....|
 
     Relocations:
@@ -26,7 +27,8 @@ sections:
 
   - !Section
     Name: .data
-    Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_ALIGN_1BYTES, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
+    Alignment: 1
+    Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
     SectionData:  !hex "48656C6C6F20576F726C642100" # |Hello World!.|
 
 symbols:
@@ -38,7 +40,7 @@ symbols:
     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
     StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
     NumberOfAuxSymbols: 1
-    AuxillaryData:  !hex "240000000300000000000000010000000000" # |$.................|
+    AuxiliaryData:  !hex "240000000300000000000000010000000000" # |$.................|
 
   - !Symbol
     Name: .data
@@ -48,7 +50,7 @@ symbols:
     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
     StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
     NumberOfAuxSymbols: 1
-    AuxillaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
+    AuxiliaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
 
   - !Symbol
     Name: _main
diff --git a/test/Object/Inputs/COFF/x86-64.yaml b/test/Object/Inputs/COFF/x86-64.yaml
index 0b1265f..5134071 100644
--- a/test/Object/Inputs/COFF/x86-64.yaml
+++ b/test/Object/Inputs/COFF/x86-64.yaml
@@ -4,7 +4,8 @@ header: !Header
 sections:
   - !Section
     Name: .text
-    Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_ALIGN_16BYTES, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
+    Alignment: 16
+    Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
     SectionData:  !hex "4883EC28C744242400000000488D0D00000000E800000000E8000000008B4424244883C428C3" # |H..(.D$$....H.................D$$H..(.|
 
     Relocations:
@@ -25,7 +26,8 @@ sections:
 
   - !Section
     Name: .data
-    Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_ALIGN_1BYTES, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
+    Alignment: 1
+    Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
     SectionData:  !hex "48656C6C6F20576F726C642100" # |Hello World!.|
 
 symbols:
@@ -37,7 +39,7 @@ symbols:
     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
     StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
     NumberOfAuxSymbols: 1
-    AuxillaryData:  !hex "260000000300000000000000010000000000" # |&.................|
+    AuxiliaryData:  !hex "260000000300000000000000010000000000" # |&.................|
 
   - !Symbol
     Name: .data
@@ -47,7 +49,7 @@ symbols:
     ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
     StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
     NumberOfAuxSymbols: 1
-    AuxillaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
+    AuxiliaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
 
   - !Symbol
     Name: main
diff --git a/test/Object/Inputs/hello-world.elf-x86-64 b/test/Object/Inputs/hello-world.elf-x86-64
new file mode 100755
index 0000000..16092b8
--- /dev/null
+++ b/test/Object/Inputs/hello-world.elf-x86-64
diff --git a/test/Object/Inputs/macho-text.thumb b/test/Object/Inputs/macho-text.thumb
new file mode 100644
index 0000000..b29428a
--- /dev/null
+++ b/test/Object/Inputs/macho-text.thumb
diff --git a/test/Object/lit.local.cfg b/test/Object/lit.local.cfg
index df9b335..b2439b2 100644
--- a/test/Object/lit.local.cfg
+++ b/test/Object/lit.local.cfg
@@ -1 +1 @@
-config.suffixes = ['.test']
+config.suffixes = ['.test', '.ll']
diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test
index 8fd1c04..5c3cc31 100644
--- a/test/Object/nm-trivial-object.test
+++ b/test/Object/nm-trivial-object.test
@@ -1,7 +1,7 @@
 RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-nm \
 RUN:         | FileCheck %s -check-prefix COFF
 RUN: yaml2obj %p/Inputs/COFF/x86-64.yaml | llvm-nm \
-RUN          | FileCheck %s -check-prefix COFF
+RUN:         | FileCheck %s -check-prefix COFF
 RUN: llvm-nm %p/Inputs/trivial-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF
 RUN: llvm-nm %p/Inputs/trivial-object-test.elf-x86-64 \
diff --git a/test/Object/objdump-section-content.test b/test/Object/objdump-section-content.test
index f9c4f43..e0199b3 100644
--- a/test/Object/objdump-section-content.test
+++ b/test/Object/objdump-section-content.test
@@ -1,6 +1,8 @@
 RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-objdump -s - | FileCheck %s -check-prefix COFF-i386
 RUN: llvm-objdump -s %p/Inputs/trivial-object-test.elf-i386 \
 RUN:              | FileCheck %s -check-prefix ELF-i386
+RUN: llvm-objdump -s %p/Inputs/shared-object-test.elf-i386 \
+RUN:              | FileCheck %s -check-prefix BSS
 
 COFF-i386: file format
 COFF-i386: Contents of section .text:
@@ -17,3 +19,6 @@ ELF-i386:  0010 0000e8fc ffffffe8 fcffffff 8b442408  .............D$.
 ELF-i386:  0020 83c40cc3                             ....
 ELF-i386: Contents of section .rodata.str1.1:
 ELF-i386:  0024 48656c6c 6f20576f 726c6421 00        Hello World!.
+
+BSS: Contents of section .bss:
+BSS-NEXT: <skipping contents of bss section at [12c8, 12cc)>
diff --git a/test/Object/relocation-executable.test b/test/Object/relocation-executable.test
new file mode 100644
index 0000000..98f5b4e
--- /dev/null
+++ b/test/Object/relocation-executable.test
@@ -0,0 +1,18 @@
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/hello-world.elf-x86-64 \
+RUN:   | FileCheck %s
+
+// CHECK:     Relocations [
+// CHECK:       Section (11) .plt {
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x4018F8
+// CHECK-NEXT:      Type: R_X86_64_JUMP_SLOT (7)
+// CHECK-NEXT:      Symbol: __libc_start_main
+// CHECK-NEXT:      Info: 0x0
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x401900
+// CHECK-NEXT:      Type: R_X86_64_JUMP_SLOT (7)
+// CHECK-NEXT:      Symbol: puts
+// CHECK-NEXT:      Info: 0x0
+// CHECK-NEXT:    }
+// CHECK-NEXT:  }
diff --git a/test/Object/yaml2obj-readobj.test b/test/Object/yaml2obj-readobj.test
index 545ccc4..3031f5e 100644
--- a/test/Object/yaml2obj-readobj.test
+++ b/test/Object/yaml2obj-readobj.test
@@ -1,5 +1,25 @@
-RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-readobj -file-headers - | FileCheck %s --check-prefix COFF-I386
+RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-readobj -file-headers -relocations -expand-relocs - | FileCheck %s --check-prefix COFF-I386
 
 // COFF-I386:  Characteristics [ (0x200)
 // COFF-I386-NEXT:    IMAGE_FILE_DEBUG_STRIPPED (0x200)
 // COFF-I386-NEXT:  ]
+
+// COFF-I386:      Relocations [
+// COFF-I386-NEXT:   Section (1) .text {
+// COFF-I386-NEXT:     Relocation {
+// COFF-I386-NEXT:       Offset: 0xE
+// COFF-I386-NEXT:       Type: IMAGE_REL_I386_DIR32 (6)
+// COFF-I386-NEXT:       Symbol: L_.str
+// COFF-I386-NEXT:     }
+// COFF-I386-NEXT:     Relocation {
+// COFF-I386-NEXT:       Offset: 0x13
+// COFF-I386-NEXT:       Type: IMAGE_REL_I386_REL32 (20)
+// COFF-I386-NEXT:       Symbol: _puts
+// COFF-I386-NEXT:     }
+// COFF-I386-NEXT:     Relocation {
+// COFF-I386-NEXT:       Offset: 0x18
+// COFF-I386-NEXT:       Type: IMAGE_REL_I386_REL32 (20)
+// COFF-I386-NEXT:       Symbol: _SomeOtherFunction
+// COFF-I386-NEXT:     }
+// COFF-I386-NEXT:   }
+// COFF-I386-NEXT: ]
diff --git a/test/Other/attribute-comment.ll b/test/Other/attribute-comment.ll
new file mode 100644
index 0000000..7354e7f
--- /dev/null
+++ b/test/Other/attribute-comment.ll
@@ -0,0 +1,9 @@
+; RUN: opt -S < %s | FileCheck %s -strict-whitespace
+
+; CHECK: {{^}}; Function Attrs: nounwind readnone ssp uwtable{{$}}
+; CHECK-NEXT: define void @test1() #0
+define void @test1() #0 {
+  ret void
+}
+
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" uwtable "no-frame-pointer-elim"="true" readnone "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Scripts/README.txt b/test/Scripts/README.txt
deleted file mode 100644
index b0b1105..0000000
--- a/test/Scripts/README.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-This directory contains scripts which are used by the TestRunner style
-tests, which allows them to be simpler and more direct.
diff --git a/test/Scripts/coff-dump.py b/test/Scripts/coff-dump.py
deleted file mode 100755
index 36ec539..0000000
--- a/test/Scripts/coff-dump.py
+++ /dev/null
@@ -1,590 +0,0 @@
-#!/usr/bin/env python
-#===-- coff-dump.py - COFF object file dump utility-------------------------===#
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-#===------------------------------------------------------------------------===#
-
-#
-# COFF File Definition
-#
-
-def string_table_entry (offset):
-  return ('ptr', '+ + PointerToSymbolTable * NumberOfSymbols 18 %s' % offset, ('scalar', 'cstr', '%s'))
-
-def secname(value):
-  if value[0] == '/':
-    return string_table_entry(value[1:].rstrip('\0'))
-  else:
-    return '%s'
-
-def symname(value):
-  parts = struct.unpack("<2L", value)
-  if parts[0] == 0:
-    return string_table_entry(parts[1])
-  else:
-    return '%s'
-
-file = ('struct', [
-  ('MachineType', ('enum', '<H', '0x%X', {
-    0x0:    'IMAGE_FILE_MACHINE_UNKNOWN',
-    0x1d3:  'IMAGE_FILE_MACHINE_AM33',
-    0x8664: 'IMAGE_FILE_MACHINE_AMD64',
-    0x1c0:  'IMAGE_FILE_MACHINE_ARM',
-    0xebc:  'IMAGE_FILE_MACHINE_EBC',
-    0x14c:  'IMAGE_FILE_MACHINE_I386',
-    0x200:  'IMAGE_FILE_MACHINE_IA64',
-    0x904:  'IMAGE_FILE_MACHINE_M32R',
-    0x266:  'IMAGE_FILE_MACHINE_MIPS16',
-    0x366:  'IMAGE_FILE_MACHINE_MIPSFPU',
-    0x466:  'IMAGE_FILE_MACHINE_MIPSFPU16',
-    0x1f0:  'IMAGE_FILE_MACHINE_POWERPC',
-    0x1f1:  'IMAGE_FILE_MACHINE_POWERPCFP',
-    0x166:  'IMAGE_FILE_MACHINE_R4000',
-    0x1a2:  'IMAGE_FILE_MACHINE_SH3',
-    0x1a3:  'IMAGE_FILE_MACHINE_SH3DSP',
-    0x1a6:  'IMAGE_FILE_MACHINE_SH4',
-    0x1a8:  'IMAGE_FILE_MACHINE_SH5',
-    0x1c2:  'IMAGE_FILE_MACHINE_THUMB',
-    0x169:  'IMAGE_FILE_MACHINE_WCEMIPSV2',
-  })),
-  ('NumberOfSections',     ('scalar',  '<H', '%d')),
-  ('TimeDateStamp',        ('scalar',  '<L', '%d')),
-  ('PointerToSymbolTable', ('scalar',  '<L', '0x%0X')),
-  ('NumberOfSymbols',      ('scalar',  '<L', '%d')),
-  ('SizeOfOptionalHeader', ('scalar',  '<H', '%d')),
-  ('Characteristics',      ('flags',   '<H', '0x%x', [
-    (0x0001,      'IMAGE_FILE_RELOCS_STRIPPED',         ),
-    (0x0002,      'IMAGE_FILE_EXECUTABLE_IMAGE',        ),
-    (0x0004,      'IMAGE_FILE_LINE_NUMS_STRIPPED',      ),
-    (0x0008,      'IMAGE_FILE_LOCAL_SYMS_STRIPPED',     ),
-    (0x0010,      'IMAGE_FILE_AGGRESSIVE_WS_TRIM',      ),
-    (0x0020,      'IMAGE_FILE_LARGE_ADDRESS_AWARE',     ),
-    (0x0080,      'IMAGE_FILE_BYTES_REVERSED_LO',       ),
-    (0x0100,      'IMAGE_FILE_32BIT_MACHINE',           ),
-    (0x0200,      'IMAGE_FILE_DEBUG_STRIPPED',          ),
-    (0x0400,      'IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', ),
-    (0x0800,      'IMAGE_FILE_NET_RUN_FROM_SWAP',       ),
-    (0x1000,      'IMAGE_FILE_SYSTEM',                  ),
-    (0x2000,      'IMAGE_FILE_DLL',                     ),
-    (0x4000,      'IMAGE_FILE_UP_SYSTEM_ONLY',          ),
-    (0x8000,      'IMAGE_FILE_BYTES_REVERSED_HI',       ),
-  ])),
-  ('Sections', ('array', '1', 'NumberOfSections', ('struct', [
-    ('Name',                 ('scalar',  '<8s', secname)),
-    ('VirtualSize',          ('scalar',  '<L',  '%d'   )),
-    ('VirtualAddress',       ('scalar',  '<L',  '%d'   )),
-    ('SizeOfRawData',        ('scalar',  '<L',  '%d'   )),
-    ('PointerToRawData',     ('scalar',  '<L',  '0x%X' )),
-    ('PointerToRelocations', ('scalar',  '<L',  '0x%X' )),
-    ('PointerToLineNumbers', ('scalar',  '<L',  '0x%X' )),
-    ('NumberOfRelocations',  ('scalar',  '<H',  '%d'   )),
-    ('NumberOfLineNumbers',  ('scalar',  '<H',  '%d'   )),
-    ('Charateristics',       ('flags',   '<L',  '0x%X', [
-      (0x00000008, 'IMAGE_SCN_TYPE_NO_PAD'),
-      (0x00000020, 'IMAGE_SCN_CNT_CODE'),
-      (0x00000040, 'IMAGE_SCN_CNT_INITIALIZED_DATA'),
-      (0x00000080, 'IMAGE_SCN_CNT_UNINITIALIZED_DATA'),
-      (0x00000100, 'IMAGE_SCN_LNK_OTHER'),
-      (0x00000200, 'IMAGE_SCN_LNK_INFO'),
-      (0x00000800, 'IMAGE_SCN_LNK_REMOVE'),
-      (0x00001000, 'IMAGE_SCN_LNK_COMDAT'),
-      (0x00008000, 'IMAGE_SCN_GPREL'),
-      (0x00020000, 'IMAGE_SCN_MEM_PURGEABLE'),
-      (0x00020000, 'IMAGE_SCN_MEM_16BIT'),
-      (0x00040000, 'IMAGE_SCN_MEM_LOCKED'),
-      (0x00080000, 'IMAGE_SCN_MEM_PRELOAD'),
-      (0x00F00000, 'IMAGE_SCN_ALIGN', {
-        0x00100000: 'IMAGE_SCN_ALIGN_1BYTES',
-        0x00200000: 'IMAGE_SCN_ALIGN_2BYTES',
-        0x00300000: 'IMAGE_SCN_ALIGN_4BYTES',
-        0x00400000: 'IMAGE_SCN_ALIGN_8BYTES',
-        0x00500000: 'IMAGE_SCN_ALIGN_16BYTES',
-        0x00600000: 'IMAGE_SCN_ALIGN_32BYTES',
-        0x00700000: 'IMAGE_SCN_ALIGN_64BYTES',
-        0x00800000: 'IMAGE_SCN_ALIGN_128BYTES',
-        0x00900000: 'IMAGE_SCN_ALIGN_256BYTES',
-        0x00A00000: 'IMAGE_SCN_ALIGN_512BYTES',
-        0x00B00000: 'IMAGE_SCN_ALIGN_1024BYTES',
-        0x00C00000: 'IMAGE_SCN_ALIGN_2048BYTES',
-        0x00D00000: 'IMAGE_SCN_ALIGN_4096BYTES',
-        0x00E00000: 'IMAGE_SCN_ALIGN_8192BYTES',
-      }),
-      (0x01000000, 'IMAGE_SCN_LNK_NRELOC_OVFL'),
-      (0x02000000, 'IMAGE_SCN_MEM_DISCARDABLE'),
-      (0x04000000, 'IMAGE_SCN_MEM_NOT_CACHED'),
-      (0x08000000, 'IMAGE_SCN_MEM_NOT_PAGED'),
-      (0x10000000, 'IMAGE_SCN_MEM_SHARED'),
-      (0x20000000, 'IMAGE_SCN_MEM_EXECUTE'),
-      (0x40000000, 'IMAGE_SCN_MEM_READ'),
-      (0x80000000, 'IMAGE_SCN_MEM_WRITE'),
-    ])),
-    ('SectionData', ('ptr', 'PointerToRawData', ('blob', 'SizeOfRawData'))),
-    ('Relocations', ('ptr', 'PointerToRelocations', ('array', '0', 'NumberOfRelocations', ('struct', [
-      ('VirtualAddress',   ('scalar', '<L', '0x%X')),
-      ('SymbolTableIndex', ('scalar', '<L', '%d'  )),
-      ('Type',             ('enum', '<H', '%d', ('MachineType', {
-        0x14c: {
-          0x0000: 'IMAGE_REL_I386_ABSOLUTE',
-          0x0001: 'IMAGE_REL_I386_DIR16',
-          0x0002: 'IMAGE_REL_I386_REL16',
-          0x0006: 'IMAGE_REL_I386_DIR32',
-          0x0007: 'IMAGE_REL_I386_DIR32NB',
-          0x0009: 'IMAGE_REL_I386_SEG12',
-          0x000A: 'IMAGE_REL_I386_SECTION',
-          0x000B: 'IMAGE_REL_I386_SECREL',
-          0x000C: 'IMAGE_REL_I386_TOKEN',
-          0x000D: 'IMAGE_REL_I386_SECREL7',
-          0x0014: 'IMAGE_REL_I386_REL32',
-        },
-        0x8664: {
-          0x0000: 'IMAGE_REL_AMD64_ABSOLUTE',
-          0x0001: 'IMAGE_REL_AMD64_ADDR64',
-          0x0002: 'IMAGE_REL_AMD64_ADDR32',
-          0x0003: 'IMAGE_REL_AMD64_ADDR32NB',
-          0x0004: 'IMAGE_REL_AMD64_REL32',
-          0x0005: 'IMAGE_REL_AMD64_REL32_1',
-          0x0006: 'IMAGE_REL_AMD64_REL32_2',
-          0x0007: 'IMAGE_REL_AMD64_REL32_3',
-          0x0008: 'IMAGE_REL_AMD64_REL32_4',
-          0x0009: 'IMAGE_REL_AMD64_REL32_5',
-          0x000A: 'IMAGE_REL_AMD64_SECTION',
-          0x000B: 'IMAGE_REL_AMD64_SECREL',
-          0x000C: 'IMAGE_REL_AMD64_SECREL7',
-          0x000D: 'IMAGE_REL_AMD64_TOKEN',
-          0x000E: 'IMAGE_REL_AMD64_SREL32',
-          0x000F: 'IMAGE_REL_AMD64_PAIR',
-          0x0010: 'IMAGE_REL_AMD64_SSPAN32',
-        },
-      }))),
-      ('SymbolName',       ('ptr', '+ PointerToSymbolTable * SymbolTableIndex 18', ('scalar',  '<8s', symname)))
-    ])))),
-  ]))),
-  ('Symbols', ('ptr', 'PointerToSymbolTable', ('byte-array', '18', '* NumberOfSymbols 18',  ('struct', [
-    ('Name',                ('scalar',  '<8s', symname)),
-    ('Value',               ('scalar',  '<L',  '%d'   )),
-    ('SectionNumber',       ('scalar',  '<H',  '%d'   )),
-    ('_Type',               ('scalar',  '<H',  None   )),
-    ('SimpleType',          ('enum',    '& _Type 15',  '%d', {
-      0: 'IMAGE_SYM_TYPE_NULL',
-      1: 'IMAGE_SYM_TYPE_VOID',
-      2: 'IMAGE_SYM_TYPE_CHAR',
-      3: 'IMAGE_SYM_TYPE_SHORT',
-      4: 'IMAGE_SYM_TYPE_INT',
-      5: 'IMAGE_SYM_TYPE_LONG',
-      6: 'IMAGE_SYM_TYPE_FLOAT',
-      7: 'IMAGE_SYM_TYPE_DOUBLE',
-      8: 'IMAGE_SYM_TYPE_STRUCT',
-      9: 'IMAGE_SYM_TYPE_UNION',
-      10: 'IMAGE_SYM_TYPE_ENUM',
-      11: 'IMAGE_SYM_TYPE_MOE',
-      12: 'IMAGE_SYM_TYPE_BYTE',
-      13: 'IMAGE_SYM_TYPE_WORD',
-      14: 'IMAGE_SYM_TYPE_UINT',
-      15: 'IMAGE_SYM_TYPE_DWORD',
-    })),                                # (Type & 0xF0) >> 4
-    ('ComplexType',         ('enum',    '>> & _Type 240 4',  '%d', {
-      0: 'IMAGE_SYM_DTYPE_NULL',
-      1: 'IMAGE_SYM_DTYPE_POINTER',
-      2: 'IMAGE_SYM_DTYPE_FUNCTION',
-      3: 'IMAGE_SYM_DTYPE_ARRAY',
-    })),
-    ('StorageClass',        ('enum',    '<B',  '%d', {
-      -1:  'IMAGE_SYM_CLASS_END_OF_FUNCTION',
-      0: 'IMAGE_SYM_CLASS_NULL',
-      1: 'IMAGE_SYM_CLASS_AUTOMATIC',
-      2: 'IMAGE_SYM_CLASS_EXTERNAL',
-      3: 'IMAGE_SYM_CLASS_STATIC',
-      4: 'IMAGE_SYM_CLASS_REGISTER',
-      5: 'IMAGE_SYM_CLASS_EXTERNAL_DEF',
-      6: 'IMAGE_SYM_CLASS_LABEL',
-      7: 'IMAGE_SYM_CLASS_UNDEFINED_LABEL',
-      8: 'IMAGE_SYM_CLASS_MEMBER_OF_STRUCT',
-      9: 'IMAGE_SYM_CLASS_ARGUMENT',
-      10: 'IMAGE_SYM_CLASS_STRUCT_TAG',
-      11: 'IMAGE_SYM_CLASS_MEMBER_OF_UNION',
-      12: 'IMAGE_SYM_CLASS_UNION_TAG',
-      13: 'IMAGE_SYM_CLASS_TYPE_DEFINITION',
-      14: 'IMAGE_SYM_CLASS_UNDEFINED_STATIC',
-      15: 'IMAGE_SYM_CLASS_ENUM_TAG',
-      16: 'IMAGE_SYM_CLASS_MEMBER_OF_ENUM',
-      17: 'IMAGE_SYM_CLASS_REGISTER_PARAM',
-      18: 'IMAGE_SYM_CLASS_BIT_FIELD',
-      100: 'IMAGE_SYM_CLASS_BLOCK',
-      101: 'IMAGE_SYM_CLASS_FUNCTION',
-      102: 'IMAGE_SYM_CLASS_END_OF_STRUCT',
-      103: 'IMAGE_SYM_CLASS_FILE',
-      104: 'IMAGE_SYM_CLASS_SECTION',
-      105: 'IMAGE_SYM_CLASS_WEAK_EXTERNAL',
-      107: 'IMAGE_SYM_CLASS_CLR_TOKEN',
-    })),
-    ('NumberOfAuxSymbols',  ('scalar',  '<B',  '%d'  )),
-    ('AuxillaryData', ('blob', '* NumberOfAuxSymbols 18')),
-  ])))),
-])
-
-#
-# Definition Interpreter
-#
-
-import sys, types, struct, re
-
-Input = None
-Stack = []
-Fields = {}
-
-Indent = 0
-NewLine = True
-
-def indent():
-  global Indent
-  Indent += 1
-
-def dedent():
-  global Indent
-  Indent -= 1
-
-def write(input):
-  global NewLine
-  output = ""
-
-  for char in input:
-
-    if NewLine:
-      output += Indent * '  '
-      NewLine = False
-
-    output += char
-
-    if char == '\n':
-      NewLine = True
-
-  sys.stdout.write(output)
-
-def read(format):
-  return struct.unpack(format, Input.read(struct.calcsize(format)))
-
-def read_cstr():
-  output = ""
-  while True:
-    char = Input.read(1)
-    if len(char) == 0:
-      raise RuntimeError ("EOF while reading cstr")
-    if char == '\0':
-      break
-    output += char
-  return output
-
-def push_pos(seek_to = None):
-  Stack [0:0] = [Input.tell()]
-  if seek_to:
-    Input.seek(seek_to)
-
-def pop_pos():
-  assert(len(Stack) > 0)
-  Input.seek(Stack[0])
-  del Stack[0]
-
-def print_binary_data(size):
-  value = ""
-  while size > 0:
-    if size >= 16:
-      data = Input.read(16)
-      size -= 16
-    else:
-      data = Input.read(size)
-      size = 0
-    value += data
-    bytes = ""
-    text = ""
-    for index in xrange(16):
-      if index < len(data):
-        if index == 8:
-          bytes += "- "
-        ch = ord(data[index])
-        bytes += "%02X " % ch
-        if ch >= 0x20 and ch <= 0x7F:
-          text += data[index]
-        else:
-          text += "."
-      else:
-        if index == 8:
-          bytes += "  "
-        bytes += "   "
-
-    write("%s|%s|\n" % (bytes, text))
-  return value
-
-idlit = re.compile("[a-zA-Z_][a-zA-Z0-9_-]*")
-numlit = re.compile("[0-9]+")
-
-def read_value(expr):
-
-  input = iter(expr.split())
-
-  def eval():
-
-    token = input.next()
-
-    if expr == 'cstr':
-      return read_cstr()
-    if expr == 'true':
-      return True
-    if expr == 'false':
-      return False
-
-    if token == '+':
-      return eval() + eval()
-    if token == '-':
-      return eval() - eval()
-    if token == '*':
-      return eval() * eval()
-    if token == '/':
-      return eval() / eval()
-    if token == '&':
-      return eval() & eval()
-    if token == '|':
-      return eval() | eval()
-    if token == '>>':
-      return eval() >> eval()
-    if token == '<<':
-      return eval() << eval()
-
-    if len(token) > 1 and token[0] in ('=', '@', '<', '!', '>'):
-      val = read(expr)
-      assert(len(val) == 1)
-      return val[0]
-
-    if idlit.match(token):
-      return Fields[token]
-    if numlit.match(token):
-      return int(token)
-
-    raise RuntimeError("unexpected token %s" % repr(token))
-
-  value = eval()
-
-  try:
-    input.next()
-  except StopIteration:
-    return value
-  raise RuntimeError("unexpected input at end of expression")
-
-def write_value(format,value):
-  format_type = type(format)
-  if format_type is types.StringType:
-    write(format % value)
-  elif format_type is types.FunctionType:
-    write_value(format(value), value)
-  elif format_type is types.TupleType:
-    Fields['this'] = value
-    handle_element(format)
-  elif format_type is types.NoneType:
-    pass
-  else:
-    raise RuntimeError("unexpected type: %s" % repr(format_type))
-
-def handle_scalar(entry):
-  iformat = entry[1]
-  oformat = entry[2]
-
-  value = read_value(iformat)
-
-  write_value(oformat, value)
-
-  return value
-
-def handle_enum(entry):
-  iformat = entry[1]
-  oformat = entry[2]
-  definitions = entry[3]
-
-  value = read_value(iformat)
-
-  if type(definitions) is types.TupleType:
-    selector = read_value(definitions[0])
-    definitions = definitions[1][selector]
-
-  if value in definitions:
-    description = definitions[value]
-  else:
-    description = "unknown"
-
-  write("%s (" % description)
-  write_value(oformat, value)
-  write(")")
-
-  return value
-
-def handle_flags(entry):
-  iformat = entry[1]
-  oformat = entry[2]
-  definitions = entry[3]
-
-  value = read_value(iformat)
-
-  write_value(oformat, value)
-
-  indent()
-  for entry in definitions:
-    mask = entry[0]
-    name = entry[1]
-    if len (entry) == 3:
-      map = entry[2]
-      selection = value & mask
-      if selection in map:
-        write("\n%s" % map[selection])
-      else:
-        write("\n%s <%d>" % (name, selection))
-    elif len(entry) == 2:
-      if value & mask != 0:
-        write("\n%s" % name)
-  dedent()
-
-  return value
-
-def handle_struct(entry):
-  global Fields
-  members = entry[1]
-
-  newFields = {}
-
-  write("{\n");
-  indent()
-
-  for member in members:
-    name = member[0]
-    type = member[1]
-
-    if name[0] != "_":
-      write("%s = " % name.ljust(24))
-
-    value = handle_element(type)
-
-    if name[0] != "_":
-      write("\n")
-
-    Fields[name] = value
-    newFields[name] = value
-
-  dedent()
-  write("}")
-
-  return newFields
-
-def handle_array(entry):
-  start_index = entry[1]
-  length = entry[2]
-  element = entry[3]
-
-  newItems = []
-
-  write("[\n")
-  indent()
-
-  start_index = read_value(start_index)
-  value = read_value(length)
-
-  for index in xrange(value):
-    write("%d = " % (index + start_index))
-    value = handle_element(element)
-    write("\n")
-    newItems.append(value)
-
-  dedent()
-  write("]")
-
-  return newItems
-
-def handle_byte_array(entry):
-  ent_size = entry[1]
-  length = entry[2]
-  element = entry[3]
-
-  newItems = []
-
-  write("[\n")
-  indent()
-
-  item_size = read_value(ent_size)
-  value = read_value(length)
-  end_of_array = Input.tell() + value
-
-  prev_loc = Input.tell()
-  index = 0
-  while Input.tell() < end_of_array:
-    write("%d = " % index)
-    value = handle_element(element)
-    write("\n")
-    newItems.append(value)
-    index += (Input.tell() - prev_loc) / item_size
-    prev_loc = Input.tell()
-
-  dedent()
-  write("]")
-
-  return newItems
-
-def handle_ptr(entry):
-  offset = entry[1]
-  element = entry[2]
-
-  value = None
-  offset = read_value(offset)
-
-  if offset != 0:
-
-    push_pos(offset)
-
-    value = handle_element(element)
-
-    pop_pos()
-
-  else:
-    write("None")
-
-  return value
-
-def handle_blob(entry):
-  length = entry[1]
-
-  write("\n")
-  indent()
-
-  value = print_binary_data(read_value(length))
-
-  dedent()
-
-  return value
-
-def handle_element(entry):
-  handlers = {
-    'struct':      handle_struct,
-    'scalar':      handle_scalar,
-    'enum':        handle_enum,
-    'flags':       handle_flags,
-    'ptr':         handle_ptr,
-    'blob':        handle_blob,
-    'array':       handle_array,
-    'byte-array':  handle_byte_array,
-  }
-
-  if not entry[0] in handlers:
-    raise RuntimeError ("unexpected type '%s'" % str (entry[0]))
-
-  return handlers[entry[0]](entry)
-
-if len(sys.argv) <= 1 or sys.argv[1] == '-':
-  import StringIO
-  Input = StringIO.StringIO(sys.stdin.read())
-else:
-  Input = open (sys.argv[1], "rb")
-
-try:
-  handle_element(file)
-finally:
-  Input.close()
-  Input = None
diff --git a/test/Scripts/coff-dump.py.bat b/test/Scripts/coff-dump.py.bat
deleted file mode 100644
index 56428e1..0000000
--- a/test/Scripts/coff-dump.py.bat
+++ /dev/null
@@ -1,7 +0,0 @@
-@echo off
-
-@rem We need to set -u to treat stdin as binary. Python 3 has support for doing
-@rem this in code, but I haven't found a way to do this in 2.6 yet.
-
-%PYTHON_EXECUTABLE% -u %LLVM_SRC_ROOT%\test\Scripts\coff-dump.py %1 %2 %3 %4 %5 %6 %7 %8 %9
-
diff --git a/test/Scripts/common_dump.py b/test/Scripts/common_dump.py
deleted file mode 100644
index fd58993..0000000
--- a/test/Scripts/common_dump.py
+++ /dev/null
@@ -1,48 +0,0 @@
-def dataToHex(d):
-    """ Convert the raw data in 'd' to an hex string with a space every 4 bytes.
-    """
-    bytes = []
-    for i,c in enumerate(d):
-        byte = ord(c)
-        hex_byte = hex(byte)[2:]
-        if byte <= 0xf:
-            hex_byte = '0' + hex_byte
-        if i % 4 == 3:
-            hex_byte += ' '
-        bytes.append(hex_byte)
-    return ''.join(bytes).strip()
-
-def dataToHexUnified(d):
-    """ Convert the raw data in 'd' to an hex string with a space every 4 bytes.
-    Each 4byte number is prefixed with 0x for easy sed/rx
-    Fixme: convert all MC tests to use this routine instead of the above
-    """
-    bytes = []
-    for i,c in enumerate(d):
-        byte = ord(c)
-        hex_byte = hex(byte)[2:]
-        if byte <= 0xf:
-            hex_byte = '0' + hex_byte
-        if i % 4 == 0:
-            hex_byte = '0x' + hex_byte
-        if i % 4 == 3:
-            hex_byte += ' '
-        bytes.append(hex_byte)
-    return ''.join(bytes).strip()
-
-
-def HexDump(valPair):
-    """
-    1. do not print 'L'
-    2. Handle negatives and large numbers by mod (2^numBits)
-    3. print fixed length, prepend with zeros.
-       Length is exactly 2+(numBits/4)
-    4. Do print 0x Why?
-       so that they can be easily distinguished using sed/rx
-    """
-    val, numBits = valPair
-    assert 0 <= val < (1 << numBits)
-
-    val = val & (( 1 << numBits) - 1)
-    newFmt = "0x%0" + "%d" % (numBits / 4) + "x"
-    return newFmt % val
diff --git a/test/Scripts/elf-dump b/test/Scripts/elf-dump
deleted file mode 100755
index 61342d8..0000000
--- a/test/Scripts/elf-dump
+++ /dev/null
@@ -1,285 +0,0 @@
-#!/usr/bin/env python
-
-import struct
-import sys
-import StringIO
-
-import common_dump
-
-class Reader:
-    def __init__(self, path):
-        if path == "-":
-            # Snarf all the data so we can seek.
-            self.file = StringIO.StringIO(sys.stdin.read())
-        else:
-            self.file = open(path, "rb")
-        self.isLSB = None
-        self.is64Bit = None
-        self.isN64 = False
-
-    def seek(self, pos):
-        self.file.seek(pos)
-
-    def read(self, N):
-        data = self.file.read(N)
-        if len(data) != N:
-            raise ValueError, "Out of data!"
-        return data
-
-    def read8(self):
-        return (ord(self.read(1)), 8)
-
-    def read16(self):
-        return (struct.unpack('><'[self.isLSB] + 'H', self.read(2))[0], 16)
-
-    def read32(self):
-        return (struct.unpack('><'[self.isLSB] + 'I', self.read(4))[0], 32)
-
-    def read64(self):
-        return (struct.unpack('><'[self.isLSB] + 'Q', self.read(8))[0], 64)
-
-    def readWord(self):
-        if self.is64Bit:
-            return self.read64()
-        else:
-            return self.read32()
-
-class StringTable:
-    def __init__(self, strings):
-       self.string_table = strings
-
-    def __getitem__(self, index):
-       end = self.string_table.index('\x00', index)
-       return self.string_table[index:end]
-
-class ProgramHeader:
-    def __init__(self, f):
-        self.p_type = f.read32()
-        if f.is64Bit:
-            self.p_flags = f.read32()
-        self.p_offset = f.readWord()
-        self.p_vaddr = f.readWord()
-        self.p_paddr = f.readWord()
-        self.p_filesz = f.readWord()
-        self.p_memsz = f.readWord()
-        if not f.is64Bit:
-            self.p_flags = f.read32()
-        self.p_align = f.readWord()
-
-    def dump(self):
-        print "  (('p_type', %s)" % common_dump.HexDump(self.p_type) 
-        print "   ('p_flags', %s)" % common_dump.HexDump(self.p_flags) 
-        print "   ('p_offset', %s)" % common_dump.HexDump(self.p_offset) 
-        print "   ('p_vaddr', %s)" % common_dump.HexDump(self.p_vaddr) 
-        print "   ('p_paddr', %s)" % common_dump.HexDump(self.p_paddr) 
-        print "   ('p_filesz', %s)" % common_dump.HexDump(self.p_filesz) 
-        print "   ('p_memsz', %s)" % common_dump.HexDump(self.p_memsz) 
-        print "   ('p_align', %s)" % common_dump.HexDump(self.p_align) 
-        print "  ),"
-
-class Section:
-    def __init__(self, f):
-        self.sh_name = f.read32()
-        self.sh_type = f.read32()
-        self.sh_flags = f.readWord()
-        self.sh_addr = f.readWord()
-        self.sh_offset = f.readWord()
-        self.sh_size = f.readWord()
-        self.sh_link = f.read32()
-        self.sh_info = f.read32()
-        self.sh_addralign = f.readWord()
-        self.sh_entsize = f.readWord()
-
-    def dump(self, shstrtab, f, strtab, dumpdata):
-        print "  (('sh_name', %s)" % common_dump.HexDump(self.sh_name), "# %r" % shstrtab[self.sh_name[0]]
-        print "   ('sh_type', %s)" % common_dump.HexDump(self.sh_type)
-        print "   ('sh_flags', %s)" % common_dump.HexDump(self.sh_flags)
-        print "   ('sh_addr', %s)" % common_dump.HexDump(self.sh_addr)
-        print "   ('sh_offset', %s)" % common_dump.HexDump(self.sh_offset)
-        print "   ('sh_size', %s)" % common_dump.HexDump(self.sh_size)
-        print "   ('sh_link', %s)" % common_dump.HexDump(self.sh_link)
-        print "   ('sh_info', %s)" % common_dump.HexDump(self.sh_info)
-        print "   ('sh_addralign', %s)" % common_dump.HexDump(self.sh_addralign)
-        print "   ('sh_entsize', %s)" % common_dump.HexDump(self.sh_entsize)
-        if self.sh_type[0] == 2: # SHT_SYMTAB
-            print "   ('_symbols', ["
-            dumpSymtab(f, self, strtab)
-            print "   ])"
-        elif self.sh_type[0] == 4 or self.sh_type[0] == 9: # SHT_RELA / SHT_REL
-            print "   ('_relocations', ["
-            dumpRel(f, self, self.sh_type[0] == 4)
-            print "   ])"
-        elif dumpdata:
-            f.seek(self.sh_offset[0])
-            if self.sh_type != 8: # != SHT_NOBITS
-                data = f.read(self.sh_size[0])
-                print "   ('_section_data', '%s')" % common_dump.dataToHex(data)
-            else:
-                print "   ('_section_data', '')" 
-        print "  ),"
-
-def dumpSymtab(f, section, strtab):
-    entries = section.sh_size[0] // section.sh_entsize[0]
-
-    for index in range(entries):
-        f.seek(section.sh_offset[0] + index * section.sh_entsize[0])
-        print "    # Symbol %s" % index
-        name = f.read32()
-        print "    (('st_name', %s)" % common_dump.HexDump(name), "# %r" % strtab[name[0]]
-        if not f.is64Bit:
-            print "     ('st_value', %s)" % common_dump.HexDump(f.read32())
-            print "     ('st_size', %s)" % common_dump.HexDump(f.read32())
-        st_info = f.read8()[0]
-        st_bind = (st_info >> 4, 4)
-        st_type = (st_info & 0xf, 4)
-        print "     ('st_bind', %s)" % common_dump.HexDump(st_bind)
-        print "     ('st_type', %s)" % common_dump.HexDump(st_type)
-        print "     ('st_other', %s)" % common_dump.HexDump(f.read8())
-        print "     ('st_shndx', %s)" % common_dump.HexDump(f.read16())
-        if f.is64Bit:
-            print "     ('st_value', %s)" % common_dump.HexDump(f.read64())
-            print "     ('st_size', %s)" % common_dump.HexDump(f.read64())
-        print "    ),"
-
-def dumpRel(f, section, dumprela = False):
-    entries = section.sh_size[0] // section.sh_entsize[0]
-
-    for index in range(entries):
-        f.seek(section.sh_offset[0] + index * section.sh_entsize[0])
-        print "    # Relocation %s" % index
-        print "    (('r_offset', %s)" % common_dump.HexDump(f.readWord())
-
-        if f.isN64:
-            r_sym =   f.read32()
-            r_ssym =  f.read8()
-            r_type3 = f.read8()
-            r_type2 = f.read8()
-            r_type =  f.read8()
-            print "     ('r_sym', %s)" % common_dump.HexDump(r_sym)
-            print "     ('r_ssym', %s)" % common_dump.HexDump(r_ssym)
-            print "     ('r_type3', %s)" % common_dump.HexDump(r_type3)
-            print "     ('r_type2', %s)" % common_dump.HexDump(r_type2)
-            print "     ('r_type', %s)" % common_dump.HexDump(r_type)
-        else:
-            r_info = f.readWord()[0]
-            if f.is64Bit:
-                r_sym = (r_info >> 32, 32)
-                r_type = (r_info & 0xffffffff, 32)
-            else:
-                r_sym = (r_info >> 8, 24)
-                r_type = (r_info & 0xff, 8)
-            print "     ('r_sym', %s)" % common_dump.HexDump(r_sym)
-            print "     ('r_type', %s)" % common_dump.HexDump(r_type)
-        if dumprela:
-            print "     ('r_addend', %s)" % common_dump.HexDump(f.readWord())
-        print "    ),"
-
-def dumpELF(path, opts):
-    f = Reader(path)
-
-    magic = f.read(4)
-    assert magic == '\x7FELF'
-
-    fileclass = f.read8()
-    if fileclass[0] == 1: # ELFCLASS32
-        f.is64Bit = False
-    elif fileclass[0] == 2: # ELFCLASS64
-        f.is64Bit = True
-    else:
-        raise ValueError, "Unknown file class %s" % common_dump.HexDump(fileclass)
-    print "('e_indent[EI_CLASS]', %s)" % common_dump.HexDump(fileclass)
-
-    byteordering = f.read8()
-    if byteordering[0] == 1: # ELFDATA2LSB
-        f.isLSB = True
-    elif byteordering[0] == 2: # ELFDATA2MSB
-        f.isLSB = False
-    else:
-        raise ValueError, "Unknown byte ordering %s" % common_dump.HexDump(byteordering)
-    print "('e_indent[EI_DATA]', %s)" % common_dump.HexDump(byteordering)
-
-    print "('e_indent[EI_VERSION]', %s)" % common_dump.HexDump(f.read8())
-    print "('e_indent[EI_OSABI]', %s)" % common_dump.HexDump(f.read8())
-    print "('e_indent[EI_ABIVERSION]', %s)" % common_dump.HexDump(f.read8())
-
-    f.seek(16) # Seek to end of e_ident.
-
-    print "('e_type', %s)" % common_dump.HexDump(f.read16())
-
-    # Does any other architecture use N64?
-    e_machine = f.read16()
-    if e_machine[0] == 0x0008 and f.is64Bit: # EM_MIPS && 64 bit
-        f.isN64 = True 
-    
-    print "('e_machine', %s)" % common_dump.HexDump(e_machine)
-    print "('e_version', %s)" % common_dump.HexDump(f.read32())
-    print "('e_entry', %s)" % common_dump.HexDump(f.readWord())
-    e_phoff = f.readWord()
-    print "('e_phoff', %s)" % common_dump.HexDump(e_phoff)
-    e_shoff = f.readWord()
-    print "('e_shoff', %s)" % common_dump.HexDump(e_shoff)
-    print "('e_flags', %s)" % common_dump.HexDump(f.read32())
-    print "('e_ehsize', %s)" % common_dump.HexDump(f.read16())
-    e_phentsize = f.read16()
-    print "('e_phentsize', %s)" % common_dump.HexDump(e_phentsize)
-    e_phnum = f.read16()
-    print "('e_phnum', %s)" % common_dump.HexDump(e_phnum)
-    e_shentsize = f.read16()
-    print "('e_shentsize', %s)" % common_dump.HexDump(e_shentsize)
-    e_shnum = f.read16()
-    print "('e_shnum', %s)" % common_dump.HexDump(e_shnum)
-    e_shstrndx = f.read16()
-    print "('e_shstrndx', %s)" % common_dump.HexDump(e_shstrndx)
-    
-
-    # Read all section headers
-    sections = []
-    for index in range(e_shnum[0]):
-        f.seek(e_shoff[0] + index * e_shentsize[0])
-        s = Section(f)
-        sections.append(s)
-
-    # Read .shstrtab so we can resolve section names
-    f.seek(sections[e_shstrndx[0]].sh_offset[0])
-    shstrtab = StringTable(f.read(sections[e_shstrndx[0]].sh_size[0]))
-
-    # Get the symbol string table
-    strtab = None
-    for section in sections:
-        if shstrtab[section.sh_name[0]] == ".strtab":
-            f.seek(section.sh_offset[0])
-            strtab = StringTable(f.read(section.sh_size[0]))
-            break
-
-    print "('_sections', ["
-    for index in range(e_shnum[0]):
-        print "  # Section %s" % index
-        sections[index].dump(shstrtab, f, strtab, opts.dumpSectionData)
-    print "])"
-
-    # Read all  program headers
-    headers = []
-    for index in range(e_phnum[0]):
-        f.seek(e_phoff[0] + index * e_phentsize[0])
-        h = ProgramHeader(f)
-        headers.append(h)
-
-    print "('_ProgramHeaders', ["
-    for index in range(e_phnum[0]):
-        print "  # Program Header %s" % index
-        headers[index].dump()
-    print "])"
-
-if __name__ == "__main__":
-    from optparse import OptionParser, OptionGroup
-    parser = OptionParser("usage: %prog [options] {files}")
-    parser.add_option("", "--dump-section-data", dest="dumpSectionData",
-                      help="Dump the contents of sections",
-                      action="store_true", default=False)
-    (opts, args) = parser.parse_args()
-
-    if not args:
-        args.append('-')
-
-    for arg in args:
-        dumpELF(arg, opts)
diff --git a/test/Scripts/elf-dump.bat b/test/Scripts/elf-dump.bat
deleted file mode 100644
index 9c70808..0000000
--- a/test/Scripts/elf-dump.bat
+++ /dev/null
@@ -1,7 +0,0 @@
-@echo off
-
-@rem We need to set -u to treat stdin as binary. Python 3 has support for doing
-@rem this in code, but I haven't found a way to do this in 2.6 yet.
-
-%PYTHON_EXECUTABLE% -u %LLVM_SRC_ROOT%\test\Scripts\elf-dump %1 %2 %3 %4 %5 %6 %7 %8 %9
-
diff --git a/test/Scripts/ignore b/test/Scripts/ignore
deleted file mode 100755
index 865ae4d..0000000
--- a/test/Scripts/ignore
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-#
-# Program: ignore
-#
-# Synopsis: Ignore the result code of the command and always return 0
-#
-# Syntax:   ignore command <arguments>
-
-"$@" || exit 0 && exit 0
-exit 0
diff --git a/test/Scripts/macho-dumpx b/test/Scripts/macho-dumpx
deleted file mode 100755
index 71e06d8..0000000
--- a/test/Scripts/macho-dumpx
+++ /dev/null
@@ -1,294 +0,0 @@
-#!/usr/bin/env python
-
-import struct
-import sys
-import StringIO
-
-import common_dump
-
-class Reader:
-   def __init__(self, path):
-      if path == '-':
-         # Snarf all the data so we can seek.
-         self.file = StringIO.StringIO(sys.stdin.read())
-      else:
-         self.file = open(path,'rb')
-      self.isLSB = None
-      self.is64Bit = None
-
-      self.string_table = None
-
-   def tell(self):
-      return self.file.tell()
-
-   def seek(self, pos):
-      self.file.seek(pos)
-
-   def read(self, N):
-      data = self.file.read(N)
-      if len(data) != N:
-         raise ValueError,"Out of data!"
-      return data
-
-   def read8(self):
-      return ord(self.read(1))
-
-   def read16(self):
-      return struct.unpack('><'[self.isLSB] + 'H', self.read(2))[0]
-
-   def read32(self):
-      # Force to 32-bit, if possible; otherwise these might be long ints on a
-      # big-endian platform. FIXME: Why???
-      Value = struct.unpack('><'[self.isLSB] + 'I', self.read(4))[0]
-      return int(Value)
-
-   def read64(self):
-      Value = struct.unpack('><'[self.isLSB] + 'Q', self.read(8))[0]
-      if Value == int(Value):
-         Value = int(Value)
-      return Value
-
-   def registerStringTable(self, strings):
-      if self.string_table is not None:
-         raise ValueError,"%s: warning: multiple string tables" % sys.argv[0]
-
-      self.string_table = strings
-
-   def getString(self, index):
-      if self.string_table is None:
-         raise ValueError,"%s: warning: no string table registered" % sys.argv[0]
-      
-      end = self.string_table.index('\x00', index)
-      return self.string_table[index:end]
-
-def dumpmacho(path, opts):
-   f = Reader(path)
-
-   magic = f.read(4)
-   if magic == '\xFE\xED\xFA\xCE':
-      f.isLSB, f.is64Bit = False, False
-   elif magic == '\xCE\xFA\xED\xFE':
-      f.isLSB, f.is64Bit = True, False
-   elif magic == '\xFE\xED\xFA\xCF':
-      f.isLSB, f.is64Bit = False, True
-   elif magic == '\xCF\xFA\xED\xFE':
-      f.isLSB, f.is64Bit = True, True
-   else:
-      raise ValueError,"Not a Mach-O object file: %r (bad magic)" % path
-
-   print "('cputype', %r)" % f.read32()
-   print "('cpusubtype', %r)" % f.read32()
-   filetype = f.read32()
-   print "('filetype', %r)" % filetype
-   
-   numLoadCommands = f.read32()
-   print "('num_load_commands', %r)" % numLoadCommands
-
-   loadCommandsSize = f.read32()
-   print "('load_commands_size', %r)" % loadCommandsSize
-
-   print "('flag', %r)" % f.read32()
-
-   if f.is64Bit:
-      print "('reserved', %r)" % f.read32()
-
-   start = f.tell()
-
-   print "('load_commands', ["
-   for i in range(numLoadCommands):
-      dumpLoadCommand(f, i, opts)
-   print "])"
-
-   if f.tell() - start != loadCommandsSize:
-      raise ValueError,"%s: warning: invalid load commands size: %r" % (
-         sys.argv[0], loadCommandsSize)
-
-def dumpLoadCommand(f, i, opts):
-   start = f.tell()
-
-   print "  # Load Command %r" % i
-   cmd = f.read32()
-   print " (('command', %r)" % cmd
-   cmdSize = f.read32()
-   print "  ('size', %r)" % cmdSize
-
-   if cmd == 1:
-      dumpSegmentLoadCommand(f, opts, False)
-   elif cmd == 2:
-      dumpSymtabCommand(f, opts)
-   elif cmd == 11:
-      dumpDysymtabCommand(f, opts)
-   elif cmd == 25:
-      dumpSegmentLoadCommand(f, opts, True)
-   elif cmd == 27:
-      import uuid
-      print "  ('uuid', %s)" % uuid.UUID(bytes=f.read(16))
-   else:
-      print >>sys.stderr,"%s: warning: unknown load command: %r" % (
-         sys.argv[0], cmd)
-      f.read(cmdSize - 8)
-   print " ),"
-
-   if f.tell() - start != cmdSize:
-      raise ValueError,"%s: warning: invalid load command size: %r" % (
-         sys.argv[0], cmdSize)
-
-def dumpSegmentLoadCommand(f, opts, is64Bit):
-   print "  ('segment_name', %r)" % f.read(16) 
-   if is64Bit:
-      print "  ('vm_addr', %r)" % f.read64()
-      print "  ('vm_size', %r)" % f.read64()
-      print "  ('file_offset', %r)" % f.read64()
-      print "  ('file_size', %r)" % f.read64()
-   else:
-      print "  ('vm_addr', %r)" % f.read32()
-      print "  ('vm_size', %r)" % f.read32()
-      print "  ('file_offset', %r)" % f.read32()
-      print "  ('file_size', %r)" % f.read32()
-   print "  ('maxprot', %r)" % f.read32()
-   print "  ('initprot', %r)" % f.read32()
-   numSections = f.read32()
-   print "  ('num_sections', %r)" % numSections
-   print "  ('flags', %r)" % f.read32()
-
-   print "  ('sections', ["
-   for i in range(numSections):
-      dumpSection(f, i, opts, is64Bit)
-   print "  ])"
-
-def dumpSymtabCommand(f, opts):
-   symoff = f.read32()
-   print "  ('symoff', %r)" % symoff
-   nsyms = f.read32()
-   print "  ('nsyms', %r)" % nsyms
-   stroff = f.read32()
-   print "  ('stroff', %r)" % stroff
-   strsize = f.read32()
-   print "  ('strsize', %r)" % strsize
-
-   prev_pos = f.tell()
-
-   f.seek(stroff)
-   string_data = f.read(strsize)
-   print "  ('_string_data', %r)" % string_data
-
-   f.registerStringTable(string_data)
-
-   f.seek(symoff)
-   print "  ('_symbols', ["
-   for i in range(nsyms):
-      dumpNlist32(f, i, opts)
-   print "  ])"
-      
-   f.seek(prev_pos)
-
-def dumpNlist32(f, i, opts):
-   print "    # Symbol %r" % i
-   n_strx = f.read32()
-   print "   (('n_strx', %r)" % n_strx
-   n_type = f.read8()
-   print "    ('n_type', %#x)" % n_type
-   n_sect = f.read8()
-   print "    ('n_sect', %r)" % n_sect
-   n_desc = f.read16()
-   print "    ('n_desc', %r)" % n_desc
-   if f.is64Bit:
-      n_value = f.read64()
-      print "    ('n_value', %r)" % n_value
-   else:
-      n_value = f.read32()
-      print "    ('n_value', %r)" % n_value
-   print "    ('_string', %r)" % f.getString(n_strx)
-   print "   ),"
-
-def dumpDysymtabCommand(f, opts):   
-   print "  ('ilocalsym', %r)" % f.read32()
-   print "  ('nlocalsym', %r)" % f.read32()
-   print "  ('iextdefsym', %r)" % f.read32()
-   print "  ('nextdefsym', %r)" % f.read32()
-   print "  ('iundefsym', %r)" % f.read32()
-   print "  ('nundefsym', %r)" % f.read32()
-   print "  ('tocoff', %r)" % f.read32()
-   print "  ('ntoc', %r)" % f.read32()
-   print "  ('modtaboff', %r)" % f.read32()
-   print "  ('nmodtab', %r)" % f.read32()
-   print "  ('extrefsymoff', %r)" % f.read32()
-   print "  ('nextrefsyms', %r)" % f.read32()
-   indirectsymoff = f.read32()
-   print "  ('indirectsymoff', %r)" % indirectsymoff
-   nindirectsyms = f.read32()
-   print "  ('nindirectsyms', %r)" % nindirectsyms
-   print "  ('extreloff', %r)" % f.read32()
-   print "  ('nextrel', %r)" % f.read32()
-   print "  ('locreloff', %r)" % f.read32()
-   print "  ('nlocrel', %r)" % f.read32()
-
-   prev_pos = f.tell()
-
-   f.seek(indirectsymoff)
-   print "  ('_indirect_symbols', ["
-   for i in range(nindirectsyms):
-      print "    # Indirect Symbol %r" % i
-      print "    (('symbol_index', %#x),)," % f.read32()
-   print "  ])"
-      
-   f.seek(prev_pos)
-
-def dumpSection(f, i, opts, is64Bit):
-   print "    # Section %r" % i
-   print "   (('section_name', %r)" % f.read(16)
-   print "    ('segment_name', %r)" % f.read(16)
-   if is64Bit:
-      print "    ('address', %r)" % f.read64()
-      size = f.read64()
-      print "    ('size', %r)" % size
-   else:
-      print "    ('address', %r)" % f.read32()
-      size = f.read32()
-      print "    ('size', %r)" % size
-   offset = f.read32()
-   print "    ('offset', %r)" % offset
-   print "    ('alignment', %r)" % f.read32()   
-   reloc_offset = f.read32()
-   print "    ('reloc_offset', %r)" % reloc_offset
-   num_reloc = f.read32()
-   print "    ('num_reloc', %r)" % num_reloc
-   print "    ('flags', %#x)" % f.read32()
-   print "    ('reserved1', %r)" % f.read32()
-   print "    ('reserved2', %r)" % f.read32()
-   if is64Bit:
-      print "    ('reserved3', %r)" % f.read32()
-   print "   ),"
-
-   prev_pos = f.tell()
-
-   f.seek(reloc_offset)
-   print "  ('_relocations', ["
-   for i in range(num_reloc):
-      print "    # Relocation %r" % i
-      print "    (('word-0', %#x)," % f.read32()
-      print "     ('word-1', %#x))," % f.read32()
-   print "  ])"
-
-   if opts.dumpSectionData:
-      f.seek(offset)
-      print "  ('_section_data', '%s')" % common_dump.dataToHex(f.read(size))
-      
-   f.seek(prev_pos)
-   
-def main():
-    from optparse import OptionParser, OptionGroup
-    parser = OptionParser("usage: %prog [options] {files}")
-    parser.add_option("", "--dump-section-data", dest="dumpSectionData",
-                      help="Dump the contents of sections",
-                      action="store_true", default=False)    
-    (opts, args) = parser.parse_args()
-
-    if not args:
-       args.append('-')
-
-    for arg in args:
-       dumpmacho(arg, opts)
-
-if __name__ == '__main__':
-   main()
diff --git a/test/Scripts/macho-dumpx.bat b/test/Scripts/macho-dumpx.bat
deleted file mode 100644
index 81484f6..0000000
--- a/test/Scripts/macho-dumpx.bat
+++ /dev/null
@@ -1,7 +0,0 @@
-@echo off
-
-@rem We need to set -u to treat stdin as binary. Python 3 has support for doing
-@rem this in code, but I haven't found a way to do this in 2.6 yet.
-
-%PYTHON_EXECUTABLE% -u %LLVM_SRC_ROOT%\test\Scripts\macho-dump %1 %2 %3 %4 %5 %6 %7 %8 %9
-
diff --git a/test/Transforms/BBVectorize/X86/loop1.ll b/test/Transforms/BBVectorize/X86/loop1.ll
index 493f23b..bbf565d 100644
--- a/test/Transforms/BBVectorize/X86/loop1.ll
+++ b/test/Transforms/BBVectorize/X86/loop1.ll
@@ -34,7 +34,15 @@ for.body:                                         ; preds = %for.body, %entry
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 10
   br i1 %exitcond, label %for.end, label %for.body
-; CHECK-NOT: <2 x double>
+; CHECK: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: fadd <2 x double>
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: fadd <2 x double>
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: fmul <2 x double>
+
 ; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
 ; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
 ; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
diff --git a/test/Transforms/BBVectorize/X86/simple.ll b/test/Transforms/BBVectorize/X86/simple.ll
index 0113e38..8abfa5f 100644
--- a/test/Transforms/BBVectorize/X86/simple.ll
+++ b/test/Transforms/BBVectorize/X86/simple.ll
@@ -12,7 +12,11 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) {
 	%R  = fmul double %Z1, %Z2
 	ret double %R
 ; CHECK: @test1
-; CHECK-NOT: fmul <2 x double>
+; CHECK: fsub <2 x double>
+; CHECK: fmul <2 x double>
+; CHECK: fadd <2 x double>
+; CHECK: extract
+; CHECK: extract
 ; CHECK: ret double %R
 }
 
@@ -63,7 +67,12 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) {
 	%R  = fmul double %Z1, %Z2
 	ret double %R
 ; CHECK: @test2
-; CHECK-NOT: fmul <2 x double>
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: fsub <2 x double>
+; CHECK: fmul <2 x double>
 ; CHECK: ret double %R
 }
 
@@ -80,7 +89,15 @@ define double @test4(double %A1, double %A2, double %B1, double %B2) {
 	%R  = fmul double %Z1, %Z2
 	ret double %R
 ; CHECK: @test4
-; CHECK-NOT: fmul <2 x double>
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: fsub <2 x double>
+; CHECK: fmul <2 x double>
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: fadd <2 x double>
 ; CHECK: ret double %R
 }
 
diff --git a/test/Transforms/ConstantMerge/merge-both.ll b/test/Transforms/ConstantMerge/merge-both.ll
index b003455..3162676 100644
--- a/test/Transforms/ConstantMerge/merge-both.ll
+++ b/test/Transforms/ConstantMerge/merge-both.ll
@@ -26,6 +26,9 @@ declare void @helper([16 x i8]*)
 ; CHECK-NEXT: @var6 = private constant [16 x i8] c"foo1bar2foo3bar\00", align 16
 ; CHECK-NEXT: @var8 = private constant [16 x i8] c"foo1bar2foo3bar\00"
 
+@var4a = alias %struct.foobar* @var4
+@llvm.used = appending global [1 x %struct.foobar*] [%struct.foobar* @var4a], section "llvm.metadata"
+
 define i32 @main() {
 entry:
   call void @zed(%struct.foobar* @var1, %struct.foobar* @var2)
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index 24448b7..d53c19c 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -36,7 +36,7 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165305)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165305)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 0}
 !3 = metadata !{metadata !5, metadata !8, metadata !9}
 !5 = metadata !{i32 786478, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
diff --git a/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll b/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll
index c5cc101..d114e51 100644
--- a/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll
+++ b/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll
@@ -11,17 +11,13 @@ _ZNSt8auto_ptrIiED1Ev.exit:
   %temp.lvalue = alloca %"class.std::auto_ptr", align 8
   call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue)
   %_M_ptr.i.i = getelementptr inbounds %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0
-  %tmp.i.i = load i32** %_M_ptr.i.i, align 8, !tbaa !0
+  %tmp.i.i = load i32** %_M_ptr.i.i, align 8
 ; CHECK-NOT: store i32* null
-  store i32* null, i32** %_M_ptr.i.i, align 8, !tbaa !0
+  store i32* null, i32** %_M_ptr.i.i, align 8
   %_M_ptr.i.i4 = getelementptr inbounds %"class.std::auto_ptr"* %agg.result, i64 0, i32 0
-  store i32* %tmp.i.i, i32** %_M_ptr.i.i4, align 8, !tbaa !0
+  store i32* %tmp.i.i, i32** %_M_ptr.i.i4, align 8
 ; CHECK: ret void
   ret void
 }
 
 declare void @_Z3barv(%"class.std::auto_ptr"* sret)
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
index d1bce72..7ce4d86 100644
--- a/test/Transforms/GCOVProfiling/linkagename.ll
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -14,7 +14,7 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.gcov = !{!9}
 
-!0 = metadata !{i32 786449, i32 4, metadata !1, metadata !"clang version 3.3 (trunk 177323)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus]
+!0 = metadata !{i32 786449, i32 4, metadata !1, metadata !"clang version 3.3 (trunk 177323)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus]
 !1 = metadata !{i32 786473, metadata !2}          ; [ DW_TAG_file_type ] [/home/nlewycky/hello.cc]
 !2 = metadata !{metadata !"hello.cc", metadata !"/home/nlewycky"}
 !3 = metadata !{i32 0}
diff --git a/test/Transforms/GVN/unreachable_block_infinite_loop.ll b/test/Transforms/GVN/unreachable_block_infinite_loop.ll
new file mode 100644
index 0000000..fe335ce
--- /dev/null
+++ b/test/Transforms/GVN/unreachable_block_infinite_loop.ll
@@ -0,0 +1,14 @@
+; RUN: opt -memdep -gvn -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0"
+
+define i32 @test2() nounwind ssp {
+entry:
+    ret i32 0
+
+unreachable_block:
+    %a = add i32 %a, 1
+    ret i32 %a
+}
+
diff --git a/test/Transforms/GlobalDCE/complex-constantexpr.ll b/test/Transforms/GlobalDCE/complex-constantexpr.ll
new file mode 100644
index 0000000..4bf1aee
--- /dev/null
+++ b/test/Transforms/GlobalDCE/complex-constantexpr.ll
@@ -0,0 +1,97 @@
+; RUN: opt -O2 -disable-output < %s
+; PR15714
+
+%struct.ham = type { i32 }
+
+@global5 = common global i32 0, align 4
+@global6 = common global i32 0, align 4
+@global7 = common global i32 0, align 4
+@global = common global i32 0, align 4
+@global8 = common global %struct.ham zeroinitializer, align 4
+@global9 = common global i32 0, align 4
+@global10 = common global i32 0, align 4
+@global11 = common global i32 0, align 4
+
+define void @zot12() {
+bb:
+  store i32 0, i32* @global5, align 4
+  store i32 0, i32* @global6, align 4
+  br label %bb2
+
+bb1:                                              ; preds = %bb11
+  %tmp = load i32* @global5, align 4
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %tmp3 = phi i32 [ %tmp, %bb1 ], [ 0, %bb ]
+  %tmp4 = xor i32 %tmp3, zext (i1 icmp ne (i64 ptrtoint (i32* @global5 to i64), i64 1) to i32)
+  store i32 %tmp4, i32* @global5, align 4
+  %tmp5 = icmp eq i32 %tmp3, zext (i1 icmp ne (i64 ptrtoint (i32* @global5 to i64), i64 1) to i32)
+  br i1 %tmp5, label %bb8, label %bb6
+
+bb6:                                              ; preds = %bb2
+  %tmp7 = tail call i32 @quux13()
+  br label %bb8
+
+bb8:                                              ; preds = %bb6, %bb2
+  %tmp9 = load i32* @global7, align 4
+  %tmp10 = icmp eq i32 %tmp9, 0
+  br i1 %tmp10, label %bb11, label %bb15
+
+bb11:                                             ; preds = %bb8
+  %tmp12 = load i32* @global6, align 4
+  %tmp13 = add nsw i32 %tmp12, 1
+  store i32 %tmp13, i32* @global6, align 4
+  %tmp14 = icmp slt i32 %tmp13, 42
+  br i1 %tmp14, label %bb1, label %bb15
+
+bb15:                                             ; preds = %bb11, %bb8
+  ret void
+}
+
+define i32 @quux13() {
+bb:
+  store i32 1, i32* @global5, align 4
+  ret i32 1
+}
+
+define void @wombat() {
+bb:
+  tail call void @zot12()
+  ret void
+}
+
+define void @wombat14() {
+bb:
+  tail call void @blam()
+  ret void
+}
+
+define void @blam() {
+bb:
+  store i32 ptrtoint (i32* @global to i32), i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
+  store i32 0, i32* @global9, align 4
+  %tmp = load i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp2 = phi i32 [ 0, %bb ], [ %tmp11, %bb1 ]
+  %tmp3 = phi i32 [ %tmp, %bb ], [ %tmp10, %bb1 ]
+  %tmp4 = icmp sgt i32 %tmp3, 0
+  %tmp5 = zext i1 %tmp4 to i32
+  %tmp6 = urem i32 %tmp5, 5
+  %tmp7 = mul i32 %tmp3, -80
+  %tmp8 = or i32 %tmp7, %tmp6
+  %tmp9 = icmp eq i32 %tmp8, 0
+  %tmp10 = zext i1 %tmp9 to i32
+  %tmp11 = add nsw i32 %tmp2, 1
+  %tmp12 = icmp eq i32 %tmp11, 20
+  br i1 %tmp12, label %bb13, label %bb1
+
+bb13:                                             ; preds = %bb1
+  store i32 %tmp10, i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
+  store i32 0, i32* @global10, align 4
+  store i32 %tmp6, i32* @global11, align 4
+  store i32 20, i32* @global9, align 4
+  ret void
+}
diff --git a/test/Transforms/GlobalDCE/indirectbr.ll b/test/Transforms/GlobalDCE/indirectbr.ll
new file mode 100644
index 0000000..90f1ae4
--- /dev/null
+++ b/test/Transforms/GlobalDCE/indirectbr.ll
@@ -0,0 +1,18 @@
+; RUN: opt -S -globaldce < %s | FileCheck %s
+
+@L = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@test1, %L1), i8* blockaddress(@test1, %L2), i8* null], align 16
+
+; CHECK: @L = internal unnamed_addr constant
+
+define void @test1(i32 %idx) {
+entry:
+  br label %L1
+
+L1:
+  %arrayidx = getelementptr inbounds [3 x i8*]* @L, i32 0, i32 %idx
+  %l = load i8** %arrayidx
+  indirectbr i8* %l, [label %L1, label %L2]
+
+L2:
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/alias-used.ll b/test/Transforms/GlobalOpt/alias-used.ll
new file mode 100644
index 0000000..f91579b
--- /dev/null
+++ b/test/Transforms/GlobalOpt/alias-used.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+@c = global i8 42
+
+@llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
+; CHECK: @llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
+
+@llvm.compiler_used = appending global [2 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @fa3 to i8*)], section "llvm.metadata"
+
+@sameAsUsed = global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca]
+; CHECK: @sameAsUsed = global [3 x i8*] [i8* bitcast (void ()* @f to i8*), i8* bitcast (void ()* @f to i8*), i8* @c]
+
+@other = global i32* bitcast (void ()* @fa to i32*)
+; CHECK: @other = global i32* bitcast (void ()* @f to i32*)
+
+@fa = alias internal void ()* @f
+; CHECK: @fa = alias internal void ()* @f
+
+@fa2 = alias internal void ()* @f
+; CHECK-NOT: @fa2
+
+@fa3 = alias internal void ()* @f
+; CHECK: @fa3
+
+@ca = alias internal i8* @c
+; CHECK: @ca = alias internal i8* @c
+
+define void @f() {
+  ret void
+}
+
+define i8* @g() {
+  ret i8* bitcast (void ()* @fa to i8*);
+}
+
+define i8* @g2() {
+  ret i8* bitcast (void ()* @fa2 to i8*);
+}
+
+define i8* @h() {
+  ret i8* @ca
+}
diff --git a/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll b/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll
index 2ec0a32..ba83fe9 100644
--- a/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll
+++ b/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll
@@ -20,10 +20,10 @@ entry:
 
 define void @fn4() nounwind uwtable ssp {
 entry:
-  %0 = load i32* @d, align 4, !tbaa !0
+  %0 = load i32* @d, align 4
   %cmp = icmp eq i32 %0, 0
   %conv = zext i1 %cmp to i32
-  store i32 %conv, i32* @c, align 4, !tbaa !0
+  store i32 %conv, i32* @c, align 4
   tail call void @fn3(i32 %conv) nounwind
   ret void
 }
@@ -31,15 +31,15 @@ entry:
 define void @fn3(i32 %p1) nounwind uwtable ssp {
 entry:
   %and = and i32 %p1, 8
-  store i32 %and, i32* @e, align 4, !tbaa !0
+  store i32 %and, i32* @e, align 4
   %sub = add nsw i32 %and, -1
-  store i32 %sub, i32* @f, align 4, !tbaa !0
-  %0 = load i32* @a, align 4, !tbaa !0
+  store i32 %sub, i32* @f, align 4
+  %0 = load i32* @a, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.else, label %if.then
 
 if.then:                                          ; preds = %entry
-  %1 = load i32* @b, align 4, !tbaa !0
+  %1 = load i32* @b, align 4
   %.lobit = lshr i32 %1, 31
   %2 = trunc i32 %.lobit to i8
   %.not = xor i8 %2, 1
@@ -55,7 +55,3 @@ if.end:                                           ; preds = %if.else, %if.then
   store i32 %storemerge, i32* @b, align 4
   ret void
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/InstCombine/add4.ll b/test/Transforms/InstCombine/add4.ll
new file mode 100644
index 0000000..0fc0a6c
--- /dev/null
+++ b/test/Transforms/InstCombine/add4.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;; Target triple for gep raising case below.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define float @test1(float %A, float %B, i1 %C) {
+EntryBlock:
+  ;; A*(1 - uitofp i1 C) -> select C, 0, A
+  %cf = uitofp i1 %C to float
+  %mc = fsub float 1.000000e+00, %cf
+  %p1 = fmul fast float %A, %mc
+  ret float %p1
+; CHECK: @test1
+; CHECK: select i1 %C, float -0.000000e+00, float %A
+}
+
+define float @test2(float %A, float %B, i1 %C) {
+EntryBlock:
+  ;; B*(uitofp i1 C) -> select C, B, 0
+  %cf = uitofp i1 %C to float
+  %p2 = fmul fast float %B, %cf
+  ret float %p2
+; CHECK: @test2
+; CHECK: select i1 %C, float %B, float -0.000000e+00
+}
+
+define float @test3(float %A, float %B, i1 %C) {
+EntryBlock:
+  ;; A*(1 - uitofp i1 C) + B*(uitofp i1 C) -> select C, A, B
+  %cf = uitofp i1 %C to float
+  %mc = fsub float 1.000000e+00, %cf
+  %p1 = fmul fast float %A, %mc
+  %p2 = fmul fast float %B, %cf
+  %s1 = fadd fast float %p1, %p2
+  ret float %s1
+; CHECK: @test3
+; CHECK: select i1 %C, float %B, float %A
+}
+
+; PR15952
+define float @test4(float %A, float %B, i32 %C) {
+  %cf = uitofp i32 %C to float
+  %mc = fsub float 1.000000e+00, %cf
+  %p1 = fmul fast float %A, %mc
+  ret float %p1
+; CHECK: @test4
+; CHECK: uitofp
+}
+
+define float @test5(float %A, float %B, i32 %C) {
+  %cf = uitofp i32 %C to float
+  %p2 = fmul fast float %B, %cf
+  ret float %p2
+; CHECK: @test5
+; CHECK: uitofp
+}
+
diff --git a/test/Transforms/InstCombine/and-fcmp.ll b/test/Transforms/InstCombine/and-fcmp.ll
index 40c44c0..a398307 100644
--- a/test/Transforms/InstCombine/and-fcmp.ll
+++ b/test/Transforms/InstCombine/and-fcmp.ll
@@ -77,3 +77,24 @@ define zeroext i8 @t7(float %x, float %y) nounwind {
 ; CHECK: fcmp uno
 ; CHECK-NOT: fcmp ult
 }
+
+; PR15737
+define i1 @t8(float %a, double %b) {
+  %cmp = fcmp ord float %a, 0.000000e+00
+  %cmp1 = fcmp ord double %b, 0.000000e+00
+  %and = and i1 %cmp, %cmp1
+  ret i1 %and
+; CHECK: t8
+; CHECK: fcmp ord
+; CHECK: fcmp ord
+}
+
+define <2 x i1> @t9(<2 x float> %a, <2 x double> %b) {
+  %cmp = fcmp ord <2 x float> %a, zeroinitializer
+  %cmp1 = fcmp ord <2 x double> %b, zeroinitializer
+  %and = and <2 x i1> %cmp, %cmp1
+  ret <2 x i1> %and
+; CHECK: t9
+; CHECK: fcmp ord
+; CHECK: fcmp ord
+}
diff --git a/test/Transforms/InstCombine/apint-shift-simplify.ll b/test/Transforms/InstCombine/apint-shift-simplify.ll
index 818ae66..14e895a 100644
--- a/test/Transforms/InstCombine/apint-shift-simplify.ll
+++ b/test/Transforms/InstCombine/apint-shift-simplify.ll
@@ -1,11 +1,14 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:    egrep "shl|lshr|ashr" | count 3
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i41 @test0(i41 %A, i41 %B, i41 %C) {
 	%X = shl i41 %A, %C
 	%Y = shl i41 %B, %C
 	%Z = and i41 %X, %Y
 	ret i41 %Z
+; CHECK: @test0
+; CHECK-NEXT: and i41 %A, %B
+; CHECK-NEXT: shl i41
+; CHECK-NEXT: ret
 }
 
 define i57 @test1(i57 %A, i57 %B, i57 %C) {
@@ -13,6 +16,10 @@ define i57 @test1(i57 %A, i57 %B, i57 %C) {
 	%Y = lshr i57 %B, %C
 	%Z = or i57 %X, %Y
 	ret i57 %Z
+; CHECK: @test1
+; CHECK-NEXT: or i57 %A, %B
+; CHECK-NEXT: lshr i57
+; CHECK-NEXT: ret
 }
 
 define i49 @test2(i49 %A, i49 %B, i49 %C) {
@@ -20,4 +27,8 @@ define i49 @test2(i49 %A, i49 %B, i49 %C) {
 	%Y = ashr i49 %B, %C
 	%Z = xor i49 %X, %Y
 	ret i49 %Z
+; CHECK: @test2
+; CHECK-NEXT: xor i49 %A, %B
+; CHECK-NEXT: ashr i49
+; CHECK-NEXT: ret
 }
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index cdbcd86..a9e3de3 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -11,18 +11,18 @@ entry:
   %__dest.addr = alloca i8*, align 8
   %__val.addr = alloca i32, align 4
   %__len.addr = alloca i64, align 8
-  store i8* %__dest, i8** %__dest.addr, align 8, !tbaa !1
+  store i8* %__dest, i8** %__dest.addr, align 8
 ; CHECK-NOT: call void @llvm.dbg.declare
 ; CHECK: call void @llvm.dbg.value
   call void @llvm.dbg.declare(metadata !{i8** %__dest.addr}, metadata !0), !dbg !16
-  store i32 %__val, i32* %__val.addr, align 4, !tbaa !17
+  store i32 %__val, i32* %__val.addr, align 4
   call void @llvm.dbg.declare(metadata !{i32* %__val.addr}, metadata !7), !dbg !18
-  store i64 %__len, i64* %__len.addr, align 8, !tbaa !19
+  store i64 %__len, i64* %__len.addr, align 8
   call void @llvm.dbg.declare(metadata !{i64* %__len.addr}, metadata !9), !dbg !20
-  %tmp = load i8** %__dest.addr, align 8, !dbg !21, !tbaa !13
-  %tmp1 = load i32* %__val.addr, align 4, !dbg !21, !tbaa !17
-  %tmp2 = load i64* %__len.addr, align 8, !dbg !21, !tbaa !19
-  %tmp3 = load i8** %__dest.addr, align 8, !dbg !21, !tbaa !13
+  %tmp = load i8** %__dest.addr, align 8, !dbg !21
+  %tmp1 = load i32* %__val.addr, align 4, !dbg !21
+  %tmp2 = load i64* %__len.addr, align 8, !dbg !21
+  %tmp3 = load i8** %__dest.addr, align 8, !dbg !21
   %0 = call i64 @llvm.objectsize.i64(i8* %tmp3, i1 false), !dbg !21
   %call = call i8* @foo(i8* %tmp, i32 %tmp1, i64 %tmp2, i64 %0), !dbg !21
   ret i8* %call, !dbg !21
@@ -43,13 +43,8 @@ entry:
 !10 = metadata !{i32 589846, metadata !3, metadata !"size_t", metadata !2, i32 80, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_typedef ]
 !11 = metadata !{i32 589846, metadata !3, metadata !"__darwin_size_t", metadata !2, i32 90, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
 !12 = metadata !{i32 786468, metadata !3, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!13 = metadata !{metadata !"any pointer", metadata !14}
-!14 = metadata !{metadata !"omnipotent char", metadata !15}
-!15 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !16 = metadata !{i32 78, i32 28, metadata !1, null}
-!17 = metadata !{metadata !"int", metadata !14}
 !18 = metadata !{i32 78, i32 40, metadata !1, null}
-!19 = metadata !{metadata !"long", metadata !14}
 !20 = metadata !{i32 78, i32 54, metadata !1, null}
 !21 = metadata !{i32 80, i32 3, metadata !22, null}
 !22 = metadata !{i32 786443, metadata !23, i32 80, i32 3, metadata !2, i32 7} ; [ DW_TAG_lexical_block ]
diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll
index 39d86b4..e1dc191 100644
--- a/test/Transforms/InstCombine/fprintf-1.ll
+++ b/test/Transforms/InstCombine/fprintf-1.ll
@@ -78,3 +78,12 @@ define void @test_no_simplify2(%FILE* %fp, double %d) {
   ret void
 ; CHECK-NEXT: ret void
 }
+
+define i32 @test_no_simplify3(%FILE* %fp) {
+; CHECK: @test_no_simplify3
+  %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  %1 = call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt)
+; CHECK-NEXT: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0))
+  ret i32 %1
+; CHECK-NEXT: ret i32 %1
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 446c0e0..c912a57 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -886,3 +886,93 @@ define i1 @icmp_mul0_ne0(i32 %x) {
   %cmp = icmp ne i32 %mul, 0
   ret i1 %cmp
 }
+
+; CHECK: @icmp_sub1_sge
+; CHECK-NEXT: icmp sgt i32 %x, %y
+define i1 @icmp_sub1_sge(i32 %x, i32 %y) {
+  %sub = add nsw i32 %x, -1
+  %cmp = icmp sge i32 %sub, %y
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_add1_sgt
+; CHECK-NEXT: icmp sge i32 %x, %y
+define i1 @icmp_add1_sgt(i32 %x, i32 %y) {
+  %add = add nsw i32 %x, 1
+  %cmp = icmp sgt i32 %add, %y
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_sub1_slt
+; CHECK-NEXT: icmp sle i32 %x, %y
+define i1 @icmp_sub1_slt(i32 %x, i32 %y) {
+  %sub = add nsw i32 %x, -1
+  %cmp = icmp slt i32 %sub, %y
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_add1_sle
+; CHECK-NEXT: icmp slt i32 %x, %y
+define i1 @icmp_add1_sle(i32 %x, i32 %y) {
+  %add = add nsw i32 %x, 1
+  %cmp = icmp sle i32 %add, %y
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_add20_sge_add57
+; CHECK-NEXT: [[ADD:%[a-z0-9]+]] = add nsw i32 %y, 37
+; CHECK-NEXT: icmp sle i32 [[ADD]], %x
+define i1 @icmp_add20_sge_add57(i32 %x, i32 %y) {
+  %1 = add nsw i32 %x, 20
+  %2 = add nsw i32 %y, 57
+  %cmp = icmp sge i32 %1, %2
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_sub57_sge_sub20
+; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = add nsw i32 %x, -37
+; CHECK-NEXT: icmp sge i32 [[SUB]], %y
+define i1 @icmp_sub57_sge_sub20(i32 %x, i32 %y) {
+  %1 = add nsw i32 %x, -57
+  %2 = add nsw i32 %y, -20
+  %cmp = icmp sge i32 %1, %2
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_and_shl_neg_ne_0
+; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl i32 1, %B
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHL]], %A
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_and_shl_neg_ne_0(i32 %A, i32 %B) {
+  %neg = xor i32 %A, -1
+  %shl = shl i32 1, %B
+  %and = and i32 %shl, %neg
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_and_shl_neg_eq_0
+; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl i32 1, %B
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHL]], %A
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_and_shl_neg_eq_0(i32 %A, i32 %B) {
+  %neg = xor i32 %A, -1
+  %shl = shl i32 1, %B
+  %and = and i32 %shl, %neg
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+}
+
+; CHECK: @icmp_add_and_shr_ne_0
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %X, 240
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 224
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_add_and_shr_ne_0(i32 %X) {
+  %shr = lshr i32 %X, 4
+  %and = and i32 %shr, 15
+  %add = add i32 %and, -14
+  %tobool = icmp ne i32 %add, 0
+  ret i1 %tobool
+}
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index d88188e..869215c 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -100,8 +100,8 @@ define i1 @test8(i32 %X) {
   %S = icmp eq i16 %R, 0
   ret i1 %S
 ; CHECK: @test8
-; CHECK-NEXT: add i32 %X, -8
-; CHECK-NEXT: icmp ult i32 {{.*}}, 2
+; CHECK-NEXT: and i32 %X, -2
+; CHECK-NEXT: icmp eq i32 {{.*}}, 8
 ; CHECK-NEXT: ret i1
 }
 
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 0ead9d1..122c650 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -257,114 +257,6 @@ return:
   ret i32 7
 }
 
-declare noalias i8* @valloc(i32) nounwind
-
-; CHECK: @test14
-; CHECK: ret i32 6
-define i32 @test14(i32 %a) nounwind {
-  switch i32 %a, label %sw.default [
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb1
-  ]
-
-sw.bb:
-  %call = tail call noalias i8* @malloc(i32 6) nounwind
-  br label %sw.epilog
-
-sw.bb1:
-  %call2 = tail call noalias i8* @calloc(i32 3, i32 2) nounwind
-  br label %sw.epilog
-
-sw.default:
-  %call3 = tail call noalias i8* @valloc(i32 6) nounwind
-  br label %sw.epilog
-
-sw.epilog:
-  %b.0 = phi i8* [ %call3, %sw.default ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %b.0, i1 false)
-  ret i32 %1
-}
-
-; CHECK: @test15
-; CHECK: llvm.objectsize
-define i32 @test15(i32 %a) nounwind {
-  switch i32 %a, label %sw.default [
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb1
-  ]
-
-sw.bb:
-  %call = tail call noalias i8* @malloc(i32 3) nounwind
-  br label %sw.epilog
-
-sw.bb1:
-  %call2 = tail call noalias i8* @calloc(i32 2, i32 1) nounwind
-  br label %sw.epilog
-
-sw.default:
-  %call3 = tail call noalias i8* @valloc(i32 3) nounwind
-  br label %sw.epilog
-
-sw.epilog:
-  %b.0 = phi i8* [ %call3, %sw.default ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
-  %1 = tail call i32 @llvm.objectsize.i32(i8* %b.0, i1 false)
-  ret i32 %1
-}
-
-; CHECK: @test16
-; CHECK: llvm.objectsize
-define i32 @test16(i8* %a, i32 %n) nounwind {
-  %b = alloca [5 x i8], align 1
-  %c = alloca [5 x i8], align 1
-  switch i32 %n, label %sw.default [
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb1
-  ]
-
-sw.bb:
-  %bp = bitcast [5 x i8]* %b to i8*
-  br label %sw.epilog
-
-sw.bb1:
-  %cp = bitcast [5 x i8]* %c to i8*
-  br label %sw.epilog
-
-sw.default:
-  br label %sw.epilog
-
-sw.epilog:
-  %phi = phi i8* [ %a, %sw.default ], [ %cp, %sw.bb1 ], [ %bp, %sw.bb ]
-  %sz = call i32 @llvm.objectsize.i32(i8* %phi, i1 false)
-  ret i32 %sz
-}
-
-; CHECK: @test17
-; CHECK: ret i32 5
-define i32 @test17(i32 %n) nounwind {
-  %b = alloca [5 x i8], align 1
-  %c = alloca [5 x i8], align 1
-  %bp = bitcast [5 x i8]* %b to i8*
-  switch i32 %n, label %sw.default [
-    i32 1, label %sw.bb
-    i32 2, label %sw.bb1
-  ]
-
-sw.bb:
-  br label %sw.epilog
-
-sw.bb1:
-  %cp = bitcast [5 x i8]* %c to i8*
-  br label %sw.epilog
-
-sw.default:
-  br label %sw.epilog
-
-sw.epilog:
-  %phi = phi i8* [ %bp, %sw.default ], [ %cp, %sw.bb1 ], [ %bp, %sw.bb ]
-  %sz = call i32 @llvm.objectsize.i32(i8* %phi, i1 false)
-  ret i32 %sz
-}
-
 @globalalias = alias internal [60 x i8]* @a
 
 ; CHECK: @test18
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index bde2a54..7226bd9 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -178,12 +178,12 @@ define i1 @test18(i32 %A) {
 define i1 @test19(i32 %A) {
         %B = icmp eq i32 %A, 50
         %C = icmp eq i32 %A, 51
-        ;; (A-50) < 2
+        ;; (A&-2) == 50
         %D = or i1 %B, %C
         ret i1 %D
 ; CHECK: @test19
-; CHECK: add i32
-; CHECK: icmp ult 
+; CHECK: and i32
+; CHECK: icmp eq 
 ; CHECK: ret i1
 }
 
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index cc3aacd..c72a6f7 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -863,3 +863,125 @@ while.body:
 ; CHECK: @test64
 ; CHECK-NOT: select
 }
+
+; CHECK: @select_icmp_eq_and_1_0_or_2
+; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl i32 %x, 1
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHL]], 2
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[AND]], %y
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_eq_and_1_0_or_2(i32 %x, i32 %y) {
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %y, 2
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+; CHECK: @select_icmp_eq_and_32_0_or_8
+; CHECK-NEXT: [[LSHR:%[a-z0-9]+]] = lshr i32 %x, 2
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[LSHR]], 8
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[AND]], %y
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_eq_and_32_0_or_8(i32 %x, i32 %y) {
+  %and = and i32 %x, 32
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %y, 8
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+; CHECK: @select_icmp_ne_0_and_4096_or_4096
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 4096
+; CHECK-NEXT: [[XOR:%[a-z0-9]+]] = xor i32 [[AND]], 4096
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[XOR]], %y
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_ne_0_and_4096_or_4096(i32 %x, i32 %y) {
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %or = or i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+; CHECK: @select_icmp_eq_and_4096_0_or_4096
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 4096
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[AND]], %y
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_eq_and_4096_0_or_4096(i32 %x, i32 %y) {
+  %and = and i32 %x, 4096
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+; CHECK: @select_icmp_eq_0_and_1_or_1
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i64 %x, 1
+; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = trunc i64 [[AND]] to i32
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[XOR]], %y
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_eq_0_and_1_or_1(i64 %x, i32 %y) {
+  %and = and i64 %x, 1
+  %cmp = icmp eq i64 %and, 0
+  %or = or i32 %y, 1
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+; CHECK: @select_icmp_ne_0_and_4096_or_32
+; CHECK-NEXT: [[LSHR:%[a-z0-9]+]] = lshr i32 %x, 7
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[LSHR]], 32
+; CHECK-NEXT: [[XOR:%[a-z0-9]+]] = xor i32 [[AND]], 32
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[XOR]], %y
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_ne_0_and_4096_or_32(i32 %x, i32 %y) {
+  %and = and i32 %x, 4096
+  %cmp = icmp ne i32 0, %and
+  %or = or i32 %y, 32
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+; CHECK: @select_icmp_ne_0_and_32_or_4096
+; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl i32 %x, 7
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHL]], 4096
+; CHECK-NEXT: [[XOR:%[a-z0-9]+]] = xor i32 [[AND]], 4096
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[XOR]], %y
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) {
+  %and = and i32 %x, 32
+  %cmp = icmp ne i32 0, %and
+  %or = or i32 %y, 4096
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
+
+; CHECK: @select_icmp_ne_0_and_1073741824_or_8
+; CHECK-NEXT: [[LSHR:%[a-z0-9]+]] = lshr i32 %x, 27
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[LSHR]], 8
+; CHECK-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i32 [[AND]] to i8
+; CHECK-NEXT: [[XOR:%[a-z0-9]+]] = xor i8 [[TRUNC]], 8
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i8 [[XOR]], %y
+; CHECK-NEXT: ret i8 [[OR]]
+define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
+  %and = and i32 %x, 1073741824
+  %cmp = icmp ne i32 0, %and
+  %or = or i8 %y, 8
+  %select = select i1 %cmp, i8 %y, i8 %or
+  ret i8 %select
+}
+
+; CHECK: @select_icmp_ne_0_and_8_or_1073741824
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i8 %x, 8
+; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = zext i8 [[AND]] to i32
+; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl nuw nsw i32 [[ZEXT]], 27
+; CHECK-NEXT: [[XOR:%[a-z0-9]+]] = xor i32 [[SHL]], 1073741824
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[XOR]], %y
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_ne_0_and_8_or_1073741824(i8 %x, i32 %y) {
+  %and = and i8 %x, 8
+  %cmp = icmp ne i8 0, %and
+  %or = or i32 %y, 1073741824
+  %select = select i1 %cmp, i32 %y, i32 %or
+  ret i32 %select
+}
diff --git a/test/Transforms/InstCombine/sub-xor.ll b/test/Transforms/InstCombine/sub-xor.ll
index 279e4ac..1d14852 100644
--- a/test/Transforms/InstCombine/sub-xor.ll
+++ b/test/Transforms/InstCombine/sub-xor.ll
@@ -35,3 +35,13 @@ define i32 @test3(i32 %x) nounwind {
 ; CHECK-NEXT: sub i32 73, %and
 ; CHECK-NEXT: ret
 }
+
+define i32 @test4(i32 %x) nounwind {
+  %sub = xor i32 %x, 2147483648
+  %add = add i32 %sub, 42
+  ret i32 %add
+
+; CHECK: @test4
+; CHECK-NEXT: add i32 %x, -2147483606
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 2d90750..0019a57 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -196,7 +196,7 @@ define <4 x float> @test_select(float %f, float %g) {
 ; CHECK-NOT: insertelement
 ; CHECK: %a3 = insertelement <4 x float> %a0, float 3.000000e+00, i32 3
 ; CHECK-NOT: insertelement
-; CHECK: shufflevector <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+; CHECK: %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>
   %a0 = insertelement <4 x float> undef, float %f, i32 0
   %a1 = insertelement <4 x float> %a0, float 1.000000e+00, i32 1
   %a2 = insertelement <4 x float> %a1, float 2.000000e+00, i32 2
diff --git a/test/Transforms/InstCombine/vec_extract_2elts.ll b/test/Transforms/InstCombine/vec_extract_2elts.ll
new file mode 100644
index 0000000..5972340
--- /dev/null
+++ b/test/Transforms/InstCombine/vec_extract_2elts.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @test(<4 x i32> %v, i64 *%r1, i64 *%r2) {
+;CHECK: %1 = extractelement <4 x i32> %v, i32 0
+;CHECK: %2 = zext i32 %1 to i64
+        %1 = zext <4 x i32> %v to <4 x i64>
+        %2 = extractelement <4 x i64> %1, i32 0
+        store i64 %2, i64 *%r1
+        store i64 %2, i64 *%r2
+        ret void
+}
+
diff --git a/test/Transforms/InstCombine/vec_extract_var_elt.ll b/test/Transforms/InstCombine/vec_extract_var_elt.ll
new file mode 100644
index 0000000..3c98287
--- /dev/null
+++ b/test/Transforms/InstCombine/vec_extract_var_elt.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @test (float %b, <8 x float> * %p)  {
+; CHECK: extractelement
+; CHECK: fptosi
+  %1 = load <8 x float> * %p
+  %2 = bitcast <8 x float> %1 to <8 x i32>
+  %3 = bitcast <8 x i32> %2 to <8 x float>
+  %a = fptosi <8 x float> %3 to <8 x i32>
+  %4 = fptosi float %b to i32
+  %5 = add i32 %4, -2
+  %6 = extractelement <8 x i32> %a, i32 %5
+  %7 = insertelement <8 x i32> undef, i32 %6, i32 7
+  %8 = sitofp <8 x i32> %7 to <8 x float>
+  store <8 x float> %8, <8 x float>* %p
+  ret void    
+}
+
diff --git a/test/Transforms/InstCombine/vec_phi_extract.ll b/test/Transforms/InstCombine/vec_phi_extract.ll
new file mode 100644
index 0000000..2f10fc2
--- /dev/null
+++ b/test/Transforms/InstCombine/vec_phi_extract.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @f(i64 %val, i32  %limit, i32 *%ptr) {
+;CHECK: %0 = trunc i64
+;CHECK: %1 = phi i32
+entry:
+  %tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
+  %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
+  %0 = add <16 x i64> %vector, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+  %1 = trunc <16 x i64> %0 to <16 x i32>
+  br label %loop
+
+loop:
+  %2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
+  %elt = extractelement <16 x i32> %2, i32 0
+  %end = icmp ult i32 %elt, %limit
+  %3 = add i32 10, %elt
+  %4 = sext i32 %elt to i64
+  %5 = getelementptr i32* %ptr, i64 %4
+  store i32 %3, i32* %5
+  %inc = add <16 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  br i1 %end, label %loop, label %ret
+
+ret:
+  ret void
+}
+
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 14f5321..8f78c2e 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -153,46 +153,3 @@ define <8 x i8> @test12a(<8 x i8> %tmp6, <8 x i8> %tmp2) nounwind {
   ret <8 x i8> %tmp3
 }
 
-; We should form a shuffle out of a select with constant condition.
-define <4 x i16> @test13a(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: @test13a
-; CHECK-NEXT: shufflevector <4 x i16> %lhs, <4 x i16> %rhs, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT: ret
-  %A = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>,
-           <4 x i16> %lhs, <4 x i16> %rhs
-  ret <4 x i16> %A
-}
-
-define <4 x i16> @test13b(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: @test13b
-; CHECK-NEXT: ret <4 x i16> %lhs
-  %A = select <4 x i1> <i1 true, i1 undef, i1 true, i1 true>,
-           <4 x i16> %lhs, <4 x i16> %rhs
-  ret <4 x i16> %A
-}
-
-define <4 x i16> @test13c(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: @test13c
-; CHECK-NEXT: shufflevector <4 x i16> %lhs, <4 x i16> %rhs, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
-; CHECK-NEXT: ret
-  %A = select <4 x i1> <i1 true, i1 undef, i1 true, i1 false>,
-           <4 x i16> %lhs, <4 x i16> %rhs
-  ret <4 x i16> %A
-}
-
-define <4 x i16> @test13d(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: @test13d
-; CHECK: select
-; CHECK-NEXT: ret
-  %A = select <4 x i1> <i1 true, i1 icmp ugt (<4 x i16>(<4 x i16>, <4 x i16>)* @test13a, <4 x i16>(<4 x i16>, <4 x i16>)* @test13b), i1 true, i1 false>,
-           <4 x i16> %lhs, <4 x i16> %rhs
-  ret <4 x i16> %A
-}
-
-define <4 x i16> @test13e(<4 x i16> %lhs, <4 x i16> %rhs) {
-; CHECK: @test13e
-; CHECK-NEXT: ret <4 x i16> %rhs
-  %A = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>,
-           <4 x i16> %lhs, <4 x i16> %rhs
-  ret <4 x i16> %A
-}
diff --git a/test/Transforms/InstSimplify/2013-04-19-ConstantFoldingCrash.ll b/test/Transforms/InstSimplify/2013-04-19-ConstantFoldingCrash.ll
new file mode 100644
index 0000000..1647517
--- /dev/null
+++ b/test/Transforms/InstSimplify/2013-04-19-ConstantFoldingCrash.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instsimplify
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; PR15791
+define <2 x i64> @test1() {
+  %a = and <2 x i64> undef, bitcast (<4 x i32> <i32 undef, i32 undef, i32 undef, i32 2147483647> to <2 x i64>)
+  ret <2 x i64> %a
+}
diff --git a/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index f9c364c..91ce263 100644
--- a/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -14,7 +14,7 @@ define float @fsub_0_0_x(float %a) {
 ; CHECK: @fsub_x_0
 define float @fsub_x_0(float %a) {
   %ret = fsub float %a, 0.0
-; CHECK ret float %a
+; CHECK: ret float %a
   ret float %ret
 }
 
@@ -22,7 +22,7 @@ define float @fsub_x_0(float %a) {
 ; CHECK: @fadd_x_n0
 define float @fadd_x_n0(float %a) {
   %ret = fadd float %a, -0.0
-; CHECK ret float %a
+; CHECK: ret float %a
   ret float %ret
 }
 
diff --git a/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll b/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
index e80bae5..86a1321 100644
--- a/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
+++ b/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
@@ -15,7 +15,7 @@ for.cond1177:
   br i1 %cmp1179, label %for.cond1177, label %land.rhs1320
 
 land.rhs1320:
-  %tmp1324 = load volatile i64* getelementptr inbounds (%0* @g_338, i64 0, i32 2), align 1, !tbaa !0
+  %tmp1324 = load volatile i64* getelementptr inbounds (%0* @g_338, i64 0, i32 2), align 1
   br label %if.end.i
 
 if.end.i:
@@ -25,7 +25,3 @@ if.end.i:
 return:
   ret void
 }
-
-!0 = metadata !{metadata !"long long", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/Transforms/LoopRotate/simplifylatch.ll b/test/Transforms/LoopRotate/simplifylatch.ll
index f422724..037bb20 100644
--- a/test/Transforms/LoopRotate/simplifylatch.ll
+++ b/test/Transforms/LoopRotate/simplifylatch.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S < %s -loop-rotate -verify-dom-info -verify-loop-info | FileCheck %s
+; RUN: opt -S < %s -loop-rotate -licm -verify-dom-info -verify-loop-info | FileCheck %s
 ; PR2624 unroll multiple exits
 
 @mode_table = global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
@@ -37,3 +37,40 @@ bb5:		; preds = %bb2
 declare i32 @fegetround()
 
 declare void @raise_exception() noreturn
+
+;CHECK: for.body.lr.ph:
+;CHECK-NEXT:  %arrayidx1 = getelementptr inbounds i8* %CurPtr, i64 0
+;CHECK-NEXT:  %0 = load i8* %arrayidx1, align 1
+;CHECK-NEXT:  %conv2 = sext i8 %0 to i32
+;CHECK-NEXT:  br label %for.body
+
+define i32 @foo(i8* %CurPtr, i32 %a) #0 {
+entry:
+  br label %for.cond
+
+for.cond:					  ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 1, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ne i32 %i.0, %a
+  br i1 %cmp, label %for.body, label %return
+
+for.body:					  ; preds = %for.cond
+  %idxprom = zext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i8* %CurPtr, i64 %idxprom
+  %0 = load i8* %arrayidx, align 1
+  %conv = sext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %CurPtr, i64 0
+  %1 = load i8* %arrayidx1, align 1
+  %conv2 = sext i8 %1 to i32
+  %cmp3 = icmp ne i32 %conv, %conv2
+  br i1 %cmp3, label %return, label %for.inc
+
+for.inc:					  ; preds = %for.body
+  %inc = add i32 %i.0, 1
+  br label %for.cond
+
+return:						  ; preds = %for.cond, %for.body
+  %retval.0 = phi i32 [ 0, %for.body ], [ 1, %for.cond ]
+  ret i32 %retval.0
+}
+
+attributes #0 = { nounwind uwtable }
diff --git a/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
index a122208..8bac639 100644
--- a/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
+++ b/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
@@ -18,11 +18,11 @@ define i32 @main() nounwind uwtable ssp {
 entry:
   %l_2 = alloca [1 x i32], align 4
   %arrayidx = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 0
-  store i32 0, i32* %arrayidx, align 4, !tbaa !0
-  %tmp = load i32* @g_3, align 4, !tbaa !0
+  store i32 0, i32* %arrayidx, align 4
+  %tmp = load i32* @g_3, align 4
   %idxprom = sext i32 %tmp to i64
   %arrayidx1 = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 %idxprom
-  %tmp1 = load i32* %arrayidx1, align 4, !tbaa !0
+  %tmp1 = load i32* %arrayidx1, align 4
   %conv.i.i = and i32 %tmp1, 65535
   %tobool.i.i.i = icmp ne i32 %tmp, 0
   br label %codeRepl
@@ -48,7 +48,7 @@ for.cond.i.i.us:                                  ; preds = %for.inc.i.i.us, %co
 
 for.inc.i.i.us:                                   ; preds = %for.body.i.i.us
   %add.i.i.us = add nsw i32 %tmp2, 1
-  store i32 %add.i.i.us, i32* @g_752, align 4, !tbaa !0
+  store i32 %add.i.i.us, i32* @g_752, align 4
   br label %for.cond.i.i.us
 
 for.body.i.i.us:                                  ; preds = %codeRepl5.us
@@ -78,13 +78,9 @@ for.body.i.i:                                     ; preds = %codeRepl5
 
 for.inc.i.i:                                      ; preds = %for.body.i.i
   %add.i.i = add nsw i32 %tmp3, 1
-  store i32 %add.i.i, i32* @g_752, align 4, !tbaa !0
+  store i32 %add.i.i, i32* @g_752, align 4
   br label %for.cond.i.i
 
 func_4.exit:                                      ; No predecessors!
   ret i32 0
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
index b5124ea..5d728b5 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
@@ -50,7 +50,7 @@ declare %s* @getstruct() nounwind
 ; CHECK: ldr{{.*}}lsl #2
 define i32 @main() nounwind ssp {
 entry:
-  %v0 = load i32* @ncol, align 4, !tbaa !0
+  %v0 = load i32* @ncol, align 4
   %v1 = tail call i32* @getptr() nounwind
   %cmp10.i = icmp eq i32 %v0, 0
   br label %while.cond.outer
@@ -64,12 +64,12 @@ while.cond:
   br label %while.body
 
 while.body:
-  %v3 = load i32* @ncol, align 4, !tbaa !0
+  %v3 = load i32* @ncol, align 4
   br label %end_of_chain
 
 end_of_chain:
   %state.i = getelementptr inbounds %s* %call18, i32 0, i32 0
-  %v4 = load i32** %state.i, align 4, !tbaa !3
+  %v4 = load i32** %state.i, align 4
   br label %while.cond.i.i
 
 while.cond.i.i:
@@ -80,9 +80,9 @@ while.cond.i.i:
 
 land.rhs.i.i:
   %arrayidx.i.i = getelementptr inbounds i32* %v4, i32 %dec.i.i
-  %v5 = load i32* %arrayidx.i.i, align 4, !tbaa !0
+  %v5 = load i32* %arrayidx.i.i, align 4
   %arrayidx1.i.i = getelementptr inbounds i32* %v1, i32 %dec.i.i
-  %v6 = load i32* %arrayidx1.i.i, align 4, !tbaa !0
+  %v6 = load i32* %arrayidx1.i.i, align 4
   %cmp.i.i = icmp eq i32 %v5, %v6
   br i1 %cmp.i.i, label %while.cond.i.i, label %equal_data.exit.i
 
@@ -95,8 +95,3 @@ where.exit:
 while.end.i:
   ret i32 %v3
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll
index 99b3a7d..308a036 100644
--- a/test/Transforms/LoopUnroll/scevunroll.ll
+++ b/test/Transforms/LoopUnroll/scevunroll.ll
@@ -66,13 +66,16 @@ exit2:
 
 ; SCEV properly unrolls multi-exit loops.
 ;
+; SCEV cannot currently unroll this loop.
+; It should ideally detect a trip count of 5.
+; rdar:14038809 [SCEV]: Optimize trip count computation for multi-exit loops.
 ; CHECK: @multiExit
-; CHECK: getelementptr i32* %base, i32 10
-; CHECK-NEXT: load i32*
-; CHECK: br i1 false, label %l2.10, label %exit1
-; CHECK: l2.10:
-; CHECK-NOT: br
-; CHECK: ret i32
+; CHECKFIXME: getelementptr i32* %base, i32 10
+; CHECKFIXME-NEXT: load i32*
+; CHECKFIXME: br i1 false, label %l2.10, label %exit1
+; CHECKFIXME: l2.10:
+; CHECKFIXME-NOT: br
+; CHECKFIXME: ret i32
 define i32 @multiExit(i32* %base) nounwind {
 entry:
   br label %l1
@@ -170,3 +173,38 @@ for.body87:
   br label %for.body87
 }
 
+; PR16130: clang produces incorrect code with loop/expression at -O2
+; rdar:14036816 loop-unroll makes assumptions about undefined behavior
+;
+; The loop latch is assumed to exit after the first iteration because
+; of the induction variable's NSW flag. However, the loop latch's
+; equality test is skipped and the loop exits after the second
+; iteration via the early exit. So loop unrolling cannot assume that
+; the loop latch's exit count of zero is an upper bound on the number
+; of iterations.
+;
+; CHECK: @nsw_latch
+; CHECK: for.body:
+; CHECK: %b.03 = phi i32 [ 0, %entry ], [ %add, %for.cond ]
+; CHECK: return:
+; CHECK: %b.03.lcssa = phi i32 [ %b.03, %for.body ], [ %b.03, %for.cond ]
+define void @nsw_latch(i32* %a) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond, %entry
+  %b.03 = phi i32 [ 0, %entry ], [ %add, %for.cond ]
+  %tobool = icmp eq i32 %b.03, 0
+  %add = add nsw i32 %b.03, 8
+  br i1 %tobool, label %for.cond, label %return
+
+for.cond:                                         ; preds = %for.body
+  %cmp = icmp eq i32 %add, 13
+  br i1 %cmp, label %return, label %for.body
+
+return:                                           ; preds = %for.body, %for.cond
+  %b.03.lcssa = phi i32 [ %b.03, %for.body ], [ %b.03, %for.cond ]
+  %retval.0 = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+  store i32 %b.03.lcssa, i32* %a, align 4
+  ret void
+}
diff --git a/test/Transforms/LoopUnroll/unloop.ll b/test/Transforms/LoopUnroll/unloop.ll
index 5a9cacd..9a938cc 100644
--- a/test/Transforms/LoopUnroll/unloop.ll
+++ b/test/Transforms/LoopUnroll/unloop.ll
@@ -21,8 +21,8 @@ outer:
 inner:
   %iv = phi i32 [ 0, %outer ], [ %inc, %tail ]
   %inc = add i32 %iv, 1
-  %wbucond = call zeroext i1 @check()
-  br i1 %wbucond, label %outer.backedge, label %tail
+  call zeroext i1 @check()
+  br i1 true, label %outer.backedge, label %tail
 
 tail:
   br i1 false, label %inner, label %exit
@@ -126,25 +126,27 @@ return:
 ; Ensure that only the middle loop is removed and rely on verify-loopinfo to
 ; check soundness.
 ;
-; CHECK: @unloopDeepNested
+; This test must be disabled until trip count computation can be optimized...
+; rdar:14038809 [SCEV]: Optimize trip count computation for multi-exit loops.
+; CHECKFIXME: @unloopDeepNested
 ; Inner-inner loop control.
-; CHECK: while.cond.us.i:
-; CHECK: br i1 %cmp.us.i, label %next_data.exit, label %while.body.us.i
-; CHECK: if.then.us.i:
-; CHECK: br label %while.cond.us.i
+; CHECKFIXME: while.cond.us.i:
+; CHECKFIXME: br i1 %cmp.us.i, label %next_data.exit, label %while.body.us.i
+; CHECKFIXME: if.then.us.i:
+; CHECKFIXME: br label %while.cond.us.i
 ; Inner loop tail.
-; CHECK: if.else.i:
-; CHECK: br label %while.cond.outer.i
+; CHECKFIXME: if.else.i:
+; CHECKFIXME: br label %while.cond.outer.i
 ; Middle loop control (removed).
-; CHECK: valid_data.exit:
-; CHECK-NOT: br
-; CHECK: %cmp = call zeroext i1 @check()
+; CHECKFIXME: valid_data.exit:
+; CHECKFIXME-NOT: br
+; CHECKFIXME: %cmp = call zeroext i1 @check()
 ; Outer loop control.
-; CHECK: copy_data.exit:
-; CHECK: br i1 %cmp38, label %if.then39, label %while.cond.outer
+; CHECKFIXME: copy_data.exit:
+; CHECKFIXME: br i1 %cmp38, label %if.then39, label %while.cond.outer
 ; Outer-outer loop tail.
-; CHECK: while.cond.outer.outer.backedge:
-; CHECK: br label %while.cond.outer.outer
+; CHECKFIXME: while.cond.outer.outer.backedge:
+; CHECKFIXME: br label %while.cond.outer.outer
 define void @unloopDeepNested() nounwind {
 for.cond8.preheader.i:
   %cmp113.i = call zeroext i1 @check()
diff --git a/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll b/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll
index 0e3103d..e8feef3 100644
--- a/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll
+++ b/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll
@@ -24,7 +24,7 @@ if.then:                                          ; preds = %for.body
   %idxprom = sext i32 %inc1 to i64
   %array_ = getelementptr inbounds %class.MyContainer.1.3.19.29* %this, i32 0, i32 0
   %arrayidx = getelementptr inbounds [6 x %class.MyMemVarClass.0.2.18.28*]* %array_, i32 0, i64 %idxprom
-  %tmp4 = load %class.MyMemVarClass.0.2.18.28** %arrayidx, align 8, !tbaa !0
+  %tmp4 = load %class.MyMemVarClass.0.2.18.28** %arrayidx, align 8
   %isnull = icmp eq %class.MyMemVarClass.0.2.18.28* %tmp4, null
   br i1 %isnull, label %for.inc, label %delete.notnull
 
@@ -61,7 +61,3 @@ declare void @_ZN13MyMemVarClassD1Ev(%class.MyMemVarClass.0.2.18.28*)
 declare i32 @__gxx_personality_v0(...)
 
 declare void @_ZdlPv(i8*) nounwind
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll b/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
index 261876d..a6c0d83 100644
--- a/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
+++ b/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
@@ -45,10 +45,10 @@ for.end:                                          ; preds = %invoke.cont6
 define void @_ZN1DptEv(%class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this) uwtable ssp align 2 {
 entry:
   %this.addr = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, align 8
-  store %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr, align 8, !tbaa !0
+  store %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr, align 8
   %this1 = load %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr
   %px = getelementptr inbounds %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this1, i32 0, i32 0
-  %0 = load %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376** %px, align 8, !tbaa !0
+  %0 = load %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376** %px, align 8
   %tobool = icmp ne %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376* %0, null
   br i1 %tobool, label %cond.end, label %cond.false
 
@@ -95,7 +95,3 @@ entry:
 }
 
 declare void @_Z10__assert13v() noreturn
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
index 2dd7fe3..bab6300 100644
--- a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
+++ b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
@@ -15,7 +15,7 @@ entry:
 for.body:                                         ; preds = %entry, %if.end
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %0 = load i32* %arrayidx, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.end, label %if.then
 
@@ -29,7 +29,7 @@ if.then:                                          ; preds = %for.body
 
 if.end:                                           ; preds = %for.body, %if.then
   %z.0 = phi i32 [ %add1, %if.then ], [ 9, %for.body ]
-  store i32 %z.0, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %z.0, i32* %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %x
@@ -38,7 +38,3 @@ if.end:                                           ; preds = %for.body, %if.then
 for.end:                                          ; preds = %if.end, %entry
   ret i32 undef
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
index 405582c..ae9f998 100644
--- a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
+++ b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
@@ -24,7 +24,7 @@ entry:
   %3 = shl nsw i64 %indvars.iv, 2
   %4 = getelementptr inbounds i8* %1, i64 %3
   %5 = bitcast i8* %4 to float*
-  store float %value, float* %5, align 4, !tbaa !0
+  store float %value, float* %5, align 4
   %indvars.iv.next = add i64 %indvars.iv, %2
   %6 = trunc i64 %indvars.iv.next to i32
   %7 = icmp slt i32 %6, %_n
@@ -43,7 +43,7 @@ entry:
   %0 = shl nsw i64 %indvars.iv, 2
   %1 = getelementptr inbounds i8* bitcast (float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 16000) to i8*), i64 %0
   %2 = bitcast i8* %1 to float*
-  store float -1.000000e+00, float* %2, align 4, !tbaa !0
+  store float -1.000000e+00, float* %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 16000
@@ -52,6 +52,3 @@ entry:
 "5":                                              ; preds = %"3"
   ret i32 0
 }
-
-!0 = metadata !{metadata !"alias set 7: float", metadata !1}
-!1 = metadata !{metadata !1}
diff --git a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
index 6c92440..f4c07b4 100644
--- a/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
+++ b/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
@@ -1,5 +1,7 @@
 ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -loop-vectorize -dce -instcombine -S < %s | FileCheck %s
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
 @B = common global [1024 x i32] zeroinitializer, align 16
 @A = common global [1024 x i32] zeroinitializer, align 16
 
diff --git a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
new file mode 100644
index 0000000..47a5e7a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK: @foo
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+
+; Function Attrs: nounwind uwtable 
+define void @foo(i32* nocapture %a, i32* nocapture %b, i32 %k, i32 %m) #0 {
+entry:
+  %cmp27 = icmp sgt i32 %m, 0
+  br i1 %cmp27, label %for.body3.lr.ph.us, label %for.end15
+
+for.end.us:                                       ; preds = %for.body3.us
+  %arrayidx9.us = getelementptr inbounds i32* %b, i64 %indvars.iv33
+  %0 = load i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3
+  %add10.us = add nsw i32 %0, 3
+  store i32 %add10.us, i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3
+  %indvars.iv.next34 = add i64 %indvars.iv33, 1
+  %lftr.wideiv35 = trunc i64 %indvars.iv.next34 to i32
+  %exitcond36 = icmp eq i32 %lftr.wideiv35, %m
+  br i1 %exitcond36, label %for.end15, label %for.body3.lr.ph.us, !llvm.loop.parallel !5
+
+for.body3.us:                                     ; preds = %for.body3.us, %for.body3.lr.ph.us
+  %indvars.iv29 = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next30, %for.body3.us ]
+  %1 = trunc i64 %indvars.iv29 to i32
+  %add4.us = add i32 %add.us, %1
+  %idxprom.us = sext i32 %add4.us to i64
+  %arrayidx.us = getelementptr inbounds i32* %a, i64 %idxprom.us
+  %2 = load i32* %arrayidx.us, align 4, !llvm.mem.parallel_loop_access !3
+  %add5.us = add nsw i32 %2, 1
+  store i32 %add5.us, i32* %arrayidx7.us, align 4, !llvm.mem.parallel_loop_access !3
+  %indvars.iv.next30 = add i64 %indvars.iv29, 1
+  %lftr.wideiv31 = trunc i64 %indvars.iv.next30 to i32
+  %exitcond32 = icmp eq i32 %lftr.wideiv31, %m
+  br i1 %exitcond32, label %for.end.us, label %for.body3.us, !llvm.loop.parallel !4
+
+for.body3.lr.ph.us:                               ; preds = %for.end.us, %entry
+  %indvars.iv33 = phi i64 [ %indvars.iv.next34, %for.end.us ], [ 0, %entry ]
+  %3 = trunc i64 %indvars.iv33 to i32
+  %add.us = add i32 %3, %k
+  %arrayidx7.us = getelementptr inbounds i32* %a, i64 %indvars.iv33
+  br label %for.body3.us
+
+for.end15:                                        ; preds = %for.end.us, %entry
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!3 = metadata !{metadata !4, metadata !5}
+!4 = metadata !{metadata !4}
+!5 = metadata !{metadata !5}
+
diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
index 186fba8..8716cff 100644
--- a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
+++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
@@ -11,9 +11,9 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %add = fadd float %0, 1.000000e+00
-  store float %add, float* %arrayidx, align 4, !tbaa !0
+  store float %add, float* %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 8
@@ -22,7 +22,3 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
-
-!0 = metadata !{metadata !"float", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
index 452d0df..f904a8e 100644
--- a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
@@ -19,19 +19,19 @@ entry:
 for.body:                                         ; preds = %for.body.for.body_crit_edge, %entry
   %indvars.iv.reload = load i64* %indvars.iv.reg2mem
   %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv.reload
-  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
   %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv.reload
-  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
   %idxprom3 = sext i32 %1 to i64
   %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
-  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !3
   %indvars.iv.next = add i64 %indvars.iv.reload, 1
   ; A new store without the parallel metadata here:
   store i64 %indvars.iv.next, i64* %indvars.iv.next.reg2mem
   %indvars.iv.next.reload1 = load i64* %indvars.iv.next.reg2mem
   %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next.reload1
-  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
-  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3
+  store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
   %indvars.iv.next.reload = load i64* %indvars.iv.next.reg2mem
   %lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
@@ -46,7 +46,4 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
 !3 = metadata !{metadata !3}
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
index f648722..3f1a071 100644
--- a/test/Transforms/LoopVectorize/X86/parallel-loops.ll
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -21,16 +21,16 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %0 = load i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4, !tbaa !0
+  %1 = load i32* %arrayidx2, align 4
   %idxprom3 = sext i32 %1 to i64
   %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
-  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0
+  store i32 %0, i32* %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
-  %2 = load i32* %arrayidx6, align 4, !tbaa !0
-  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0
+  %2 = load i32* %arrayidx6, align 4
+  store i32 %2, i32* %arrayidx2, align 4
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
   br i1 %exitcond, label %for.end, label %for.body
@@ -51,18 +51,18 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
   %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
   %idxprom3 = sext i32 %1 to i64
   %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
   ; This store might have originated from inlining a function with a parallel
   ; loop. Refers to a list with the "original loop reference" (!4) also included.
-  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !5
+  store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !5
   %indvars.iv.next = add i64 %indvars.iv, 1
   %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
-  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
-  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !3
+  %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3
+  store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !3
@@ -84,18 +84,18 @@ entry:
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !6
   %arrayidx2 = getelementptr inbounds i32* %a, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6
   %idxprom3 = sext i32 %1 to i64
   %arrayidx4 = getelementptr inbounds i32* %a, i64 %idxprom3
   ; This refers to the loop marked with !7 which we are not in at the moment.
   ; It should prevent detecting as a parallel loop.
-  store i32 %0, i32* %arrayidx4, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !7
+  store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !7
   %indvars.iv.next = add i64 %indvars.iv, 1
   %arrayidx6 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
-  %2 = load i32* %arrayidx6, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
-  store i32 %2, i32* %arrayidx2, align 4, !tbaa !0, !llvm.mem.parallel_loop_access !6
+  %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !6
+  store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop.parallel !6
@@ -104,9 +104,6 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
 !3 = metadata !{metadata !3}
 !4 = metadata !{metadata !4}
 !5 = metadata !{metadata !3, metadata !4}
diff --git a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
new file mode 100644
index 0000000..b66119f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
@@ -0,0 +1,29 @@
+; RUN: opt -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+@x = common global [1024 x x86_fp80] zeroinitializer, align 16
+
+;CHECK: @example
+;CHECK-NOT: bitcast x86_fp80* {{%[^ ]+}} to <{{[2-9][0-9]*}} x x86_fp80>*
+;CHECK: store
+;CHECK: ret void
+
+define void @example() nounwind ssp uwtable {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %conv = sitofp i32 1 to x86_fp80
+  %arrayidx = getelementptr inbounds [1024 x x86_fp80]* @x, i64 0, i64 %indvars.iv
+  store x86_fp80 %conv, x86_fp80* %arrayidx, align 16
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/bsd_regex.ll b/test/Transforms/LoopVectorize/bsd_regex.ll
new file mode 100644
index 0000000..a14b92d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/bsd_regex.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=2 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;PR 15830.
+
+;CHECK: foo
+; When scalarizing stores we need to preserve the original order.
+; Make sure that we are extracting in the correct order (0101, and not 0011).
+;CHECK: extractelement <2 x i64> {{.*}}, i32 0
+;CHECK: extractelement <2 x i64> {{.*}}, i32 1
+;CHECK: extractelement <2 x i64> {{.*}}, i32 0
+;CHECK: extractelement <2 x i64> {{.*}}, i32 1
+;CHECK: store
+;CHECK: store
+;CHECK: store
+;CHECK: store
+;CHECK: ret
+
+define i32 @foo(i32* nocapture %A) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %0 = shl nsw i64 %indvars.iv, 2
+  %arrayidx = getelementptr inbounds i32* %A, i64 %0
+  store i32 4, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 undef
+}
+
+
diff --git a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
index 431e422..2648bbe 100644
--- a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
+++ b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
@@ -17,7 +17,7 @@ do.body:                                          ; preds = %cond.end, %entry
   %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %cond.end ]
   %p.addr.0 = phi i16* [ %p, %entry ], [ %incdec.ptr, %cond.end ]
   %incdec.ptr = getelementptr inbounds i16* %p.addr.0, i64 -1
-  %0 = load i16* %incdec.ptr, align 2, !tbaa !0
+  %0 = load i16* %incdec.ptr, align 2
   %conv = zext i16 %0 to i32
   %cmp = icmp ult i32 %conv, %size
   br i1 %cmp, label %cond.end, label %cond.true
@@ -29,7 +29,7 @@ cond.true:                                        ; preds = %do.body
 
 cond.end:                                         ; preds = %do.body, %cond.true
   %cond = phi i16 [ %phitmp, %cond.true ], [ 0, %do.body ]
-  store i16 %cond, i16* %incdec.ptr, align 2, !tbaa !0
+  store i16 %cond, i16* %incdec.ptr, align 2
   %dec = add i32 %n.addr.0, -1
   %tobool = icmp eq i32 %dec, 0
   br i1 %tobool, label %do.end, label %do.body
@@ -52,11 +52,11 @@ do.body:                                          ; preds = %do.body, %entry
   %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ]
   %p.0 = phi i32* [ %a, %entry ], [ %incdec.ptr, %do.body ]
   %incdec.ptr = getelementptr inbounds i32* %p.0, i64 -1
-  %0 = load i32* %incdec.ptr, align 4, !tbaa !3
+  %0 = load i32* %incdec.ptr, align 4
   %cmp = icmp slt i32 %0, %wsize
   %sub = sub nsw i32 %0, %wsize
   %cond = select i1 %cmp, i32 0, i32 %sub
-  store i32 %cond, i32* %incdec.ptr, align 4, !tbaa !3
+  store i32 %cond, i32* %incdec.ptr, align 4
   %dec = add nsw i32 %n.addr.0, -1
   %tobool = icmp eq i32 %dec, 0
   br i1 %tobool, label %do.end, label %do.body
@@ -64,8 +64,3 @@ do.body:                                          ; preds = %do.body, %entry
 do.end:                                           ; preds = %do.body
   ret void
 }
-
-!0 = metadata !{metadata !"short", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LoopVectorize/calloc.ll b/test/Transforms/LoopVectorize/calloc.ll
index 08c84ef..7e79916 100644
--- a/test/Transforms/LoopVectorize/calloc.ll
+++ b/test/Transforms/LoopVectorize/calloc.ll
@@ -23,7 +23,7 @@ for.body:                                         ; preds = %for.body, %for.body
   %i.030 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %shr = lshr i64 %i.030, 1
   %arrayidx = getelementptr inbounds i8* %bytes, i64 %shr
-  %1 = load i8* %arrayidx, align 1, !tbaa !0
+  %1 = load i8* %arrayidx, align 1
   %conv = zext i8 %1 to i32
   %and = shl i64 %i.030, 2
   %neg = and i64 %and, 4
@@ -38,7 +38,7 @@ for.body:                                         ; preds = %for.body, %for.body
   %add17 = add nsw i32 %cond, %shr11
   %conv18 = trunc i32 %add17 to i8
   %arrayidx19 = getelementptr inbounds i8* %call, i64 %i.030
-  store i8 %conv18, i8* %arrayidx19, align 1, !tbaa !0
+  store i8 %conv18, i8* %arrayidx19, align 1
   %inc = add i64 %i.030, 1
   %exitcond = icmp eq i64 %inc, %0
   br i1 %exitcond, label %for.end, label %for.body
@@ -48,6 +48,3 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 declare noalias i8* @calloc(i64, i64) nounwind
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
index a2ea951..127d479 100644
--- a/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -18,12 +18,12 @@ for.body:
   ;CHECK: load <4 x i32>
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32]* @B, i64 0, i64 %indvars.iv, !dbg !19
-  %0 = load i32* %arrayidx, align 4, !dbg !19, !tbaa !21
+  %0 = load i32* %arrayidx, align 4, !dbg !19
   %arrayidx2 = getelementptr inbounds [1024 x i32]* @C, i64 0, i64 %indvars.iv, !dbg !19
-  %1 = load i32* %arrayidx2, align 4, !dbg !19, !tbaa !21
+  %1 = load i32* %arrayidx2, align 4, !dbg !19
   %add = add nsw i32 %1, %0, !dbg !19
   %arrayidx4 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !19
-  store i32 %add, i32* %arrayidx4, align 4, !dbg !19, !tbaa !21
+  store i32 %add, i32* %arrayidx4, align 4, !dbg !19
   %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !18
   tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9), !dbg !18
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !18
@@ -64,7 +64,4 @@ attributes #1 = { nounwind readnone }
 !18 = metadata !{i32 6, i32 0, metadata !10, null}
 !19 = metadata !{i32 7, i32 0, metadata !20, null}
 !20 = metadata !{i32 786443, metadata !10, i32 6, i32 0, metadata !4, i32 1}
-!21 = metadata !{metadata !"int", metadata !22}
-!22 = metadata !{metadata !"omnipotent char", metadata !23}
-!23 = metadata !{metadata !"Simple C/C++ TBAA"}
 !24 = metadata !{i32 9, i32 0, metadata !3, null}
diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll
index 565684c..54ca172 100644
--- a/test/Transforms/LoopVectorize/float-reduction.ll
+++ b/test/Transforms/LoopVectorize/float-reduction.ll
@@ -13,7 +13,7 @@ for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %add = fadd fast float %sum.04, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -23,7 +23,3 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret float %add
 }
-
-!0 = metadata !{metadata !"float", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/i8-induction.ll b/test/Transforms/LoopVectorize/i8-induction.ll
index 7759b70..2a0e826 100644
--- a/test/Transforms/LoopVectorize/i8-induction.ll
+++ b/test/Transforms/LoopVectorize/i8-induction.ll
@@ -8,8 +8,8 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 define void @f() nounwind uwtable ssp {
 scalar.ph:
-  store i8 0, i8* inttoptr (i64 1 to i8*), align 1, !tbaa !0
-  %0 = load i8* @a, align 1, !tbaa !0
+  store i8 0, i8* inttoptr (i64 1 to i8*), align 1
+  %0 = load i8* @a, align 1
   br label %for.body
 
 for.body:
@@ -26,10 +26,6 @@ for.body:
   br i1 %phitmp14, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
-  store i8 %mul, i8* @b, align 1, !tbaa !0
+  store i8 %mul, i8* @b, align 1
   ret void
 }
-
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
-
diff --git a/test/Transforms/LoopVectorize/if-conversion-nest.ll b/test/Transforms/LoopVectorize/if-conversion-nest.ll
new file mode 100644
index 0000000..f44862a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/if-conversion-nest.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;CHECK: @foo
+;CHECK: icmp sgt
+;CHECK: icmp sgt
+;CHECK: icmp slt
+;CHECK: select <4 x i1>
+;CHECK: %[[P1:.*]] = select <4 x i1>
+;CHECK: xor <4 x i1>
+;CHECK: and <4 x i1>
+;CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %[[P1]]
+;CHECK: ret
+define i32 @foo(i32* nocapture %A, i32* nocapture %B, i32 %n) {
+entry:
+  %cmp26 = icmp sgt i32 %n, 0
+  br i1 %cmp26, label %for.body, label %for.end
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %if.end14 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4
+  %cmp3 = icmp sgt i32 %0, %1
+  br i1 %cmp3, label %if.then, label %if.end14
+
+if.then:
+  %cmp6 = icmp sgt i32 %0, 19
+  br i1 %cmp6, label %if.end14, label %if.else
+
+if.else:
+  %cmp10 = icmp slt i32 %1, 4
+  %. = select i1 %cmp10, i32 4, i32 5
+  br label %if.end14
+
+if.end14:
+  %x.0 = phi i32 [ 9, %for.body ], [ 3, %if.then ], [ %., %if.else ]  ; <------------- A PHI with 3 entries that we can still vectorize.
+  store i32 %x.0, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
index e79d78d..defbb5b 100644
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -14,10 +14,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -40,10 +40,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -66,10 +66,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.sin.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -92,10 +92,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.sin.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -118,10 +118,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.cos.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -144,10 +144,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.cos.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -170,10 +170,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -196,10 +196,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.exp.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -222,10 +222,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.exp2.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -248,10 +248,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.exp2.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -274,10 +274,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.log.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -300,10 +300,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.log.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -326,10 +326,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.log10.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -352,10 +352,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.log10.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -378,10 +378,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.log2.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -404,10 +404,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.log2.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -430,10 +430,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -453,10 +453,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.fabs(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -479,10 +479,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.floor.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -505,10 +505,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.floor.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -531,10 +531,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.ceil.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -557,10 +557,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.ceil.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -583,10 +583,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.trunc.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -609,10 +609,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.trunc.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -635,10 +635,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.rint.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -661,10 +661,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.rint.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -687,10 +687,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %call = tail call float @llvm.nearbyint.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  store float %call, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -713,10 +713,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %call = tail call double @llvm.nearbyint.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  store double %call, double* %arrayidx2, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -739,14 +739,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %1 = load float* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv
-  %2 = load float* %arrayidx4, align 4, !tbaa !0
+  %2 = load float* %arrayidx4, align 4
   %3 = tail call float @llvm.fma.f32(float %0, float %2, float %1)
   %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %3, float* %arrayidx6, align 4, !tbaa !0
+  store float %3, float* %arrayidx6, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -769,14 +769,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv
-  %1 = load double* %arrayidx2, align 8, !tbaa !3
+  %1 = load double* %arrayidx2, align 8
   %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv
-  %2 = load double* %arrayidx4, align 8, !tbaa !3
+  %2 = load double* %arrayidx4, align 8
   %3 = tail call double @llvm.fma.f64(double %0, double %2, double %1)
   %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %3, double* %arrayidx6, align 8, !tbaa !3
+  store double %3, double* %arrayidx6, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -799,14 +799,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %1 = load float* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv
-  %2 = load float* %arrayidx4, align 4, !tbaa !0
+  %2 = load float* %arrayidx4, align 4
   %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1)
   %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %3, float* %arrayidx6, align 4, !tbaa !0
+  store float %3, float* %arrayidx6, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -829,14 +829,14 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv
-  %1 = load double* %arrayidx2, align 8, !tbaa !3
+  %1 = load double* %arrayidx2, align 8
   %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv
-  %2 = load double* %arrayidx4, align 8, !tbaa !3
+  %2 = load double* %arrayidx4, align 8
   %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1)
   %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %3, double* %arrayidx6, align 8, !tbaa !3
+  store double %3, double* %arrayidx6, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -859,12 +859,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %1 = load float* %arrayidx2, align 4
   %call = tail call float @llvm.pow.f32(float %0, float %1) nounwind readnone
   %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv
-  store float %call, float* %arrayidx4, align 4, !tbaa !0
+  store float %call, float* %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -887,12 +887,12 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %0 = load double* %arrayidx, align 8
   %arrayidx2 = getelementptr inbounds double* %z, i64 %indvars.iv
-  %1 = load double* %arrayidx2, align 8, !tbaa !3
+  %1 = load double* %arrayidx2, align 8
   %call = tail call double @llvm.pow.f64(double %0, double %1) nounwind readnone
   %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
-  store double %call, double* %arrayidx4, align 8, !tbaa !3
+  store double %call, double* %arrayidx4, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -927,9 +927,3 @@ for.end:                                          ; preds = %for.body
 declare float @fabsf(float) nounwind readnone
 
 declare double @llvm.pow.f64(double, double) nounwind readnone
-
-!0 = metadata !{metadata !"float", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"double", metadata !1}
-!4 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/LoopVectorize/lcssa-crash.ll b/test/Transforms/LoopVectorize/lcssa-crash.ll
index 06b3b08..de6be54 100644
--- a/test/Transforms/LoopVectorize/lcssa-crash.ll
+++ b/test/Transforms/LoopVectorize/lcssa-crash.ll
@@ -27,3 +27,14 @@ for.end.i.i.i:
   unreachable
 }
 
+; PR16139
+define void @test2(i8* %x) {
+entry:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+
+L0:
+  br label %L0
+
+L1:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll
new file mode 100644
index 0000000..502fd8b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -0,0 +1,885 @@
+; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=1  < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@A = common global [1024 x i32] zeroinitializer, align 16
+@fA = common global [1024 x float] zeroinitializer, align 16
+@dA = common global [1024 x double] zeroinitializer, align 16
+
+; Signed tests.
+
+; Turn this into a max reduction. Make sure we use a splat to initialize the
+; vector for the reduction.
+; CHECK: @max_red
+; CHECK: %[[VAR:.*]] = insertelement <2 x i32> undef, i32 %max, i32 0
+; CHECK: {{.*}} = shufflevector <2 x i32> %[[VAR]], <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK: icmp sgt <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp sgt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @max_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp sgt i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Turn this into a max reduction. The select has its inputs reversed therefore
+; this is a max reduction.
+; CHECK: @max_red_inverse_select
+; CHECK: icmp slt <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp sgt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @max_red_inverse_select(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp slt i32 %max.red.08, %0
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Turn this into a min reduction.
+; CHECK: @min_red
+; CHECK: icmp slt <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp slt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @min_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp slt i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Turn this into a min reduction. The select has its inputs reversed therefore
+; this is a min reduction.
+; CHECK: @min_red_inverse_select
+; CHECK: icmp sgt <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp slt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @min_red_inverse_select(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp sgt i32 %max.red.08, %0
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Unsigned tests.
+
+; Turn this into a max reduction.
+; CHECK: @umax_red
+; CHECK: icmp ugt <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp ugt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @umax_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp ugt i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Turn this into a max reduction. The select has its inputs reversed therefore
+; this is a max reduction.
+; CHECK: @umax_red_inverse_select
+; CHECK: icmp ult <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp ugt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @umax_red_inverse_select(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp ult i32 %max.red.08, %0
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Turn this into a min reduction.
+; CHECK: @umin_red
+; CHECK: icmp ult <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp ult <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @umin_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp ult i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Turn this into a min reduction. The select has its inputs reversed therefore
+; this is a min reduction.
+; CHECK: @umin_red_inverse_select
+; CHECK: icmp ugt <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp ult <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @umin_red_inverse_select(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp ugt i32 %max.red.08, %0
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; SGE -> SLT
+; Turn this into a min reduction (select inputs are reversed).
+; CHECK: @sge_min_red
+; CHECK: icmp sge <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp slt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @sge_min_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp sge i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; SLE -> SGT
+; Turn this into a max reduction (select inputs are reversed).
+; CHECK: @sle_min_red
+; CHECK: icmp sle <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp sgt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @sle_min_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp sle i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; UGE -> ULT
+; Turn this into a min reduction (select inputs are reversed).
+; CHECK: @uge_min_red
+; CHECK: icmp uge <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp ult <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @uge_min_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp uge i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; ULE -> UGT
+; Turn this into a max reduction (select inputs are reversed).
+; CHECK: @ule_min_red
+; CHECK: icmp ule <2 x i32>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: icmp ugt <2 x i32>
+; CHECK: select <2 x i1>
+
+define i32 @ule_min_red(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp3 = icmp ule i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; No reduction.
+; CHECK: @no_red_1
+; CHECK-NOT: icmp <2 x i32>
+define i32 @no_red_1(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %arrayidx1 = getelementptr inbounds [1024 x i32]* @A, i64 1, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %1 = load i32* %arrayidx1, align 4
+  %cmp3 = icmp sgt i32 %0, %1
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; CHECK: @no_red_2
+; CHECK-NOT: icmp <2 x i32>
+define i32 @no_red_2(i32 %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
+  %arrayidx1 = getelementptr inbounds [1024 x i32]* @A, i64 1, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %1 = load i32* %arrayidx1, align 4
+  %cmp3 = icmp sgt i32 %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, i32 %0, i32 %1
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %max.red.0
+}
+
+; Float tests.
+
+; Maximum.
+
+; Turn this into a max reduction in the presence of a no-nans-fp-math attribute.
+; CHECK: @max_red_float
+; CHECK: fcmp ogt <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp ogt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @max_red_float(float %max) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp ogt float %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %max.red.0
+}
+
+; CHECK: @max_red_float_ge
+; CHECK: fcmp oge <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp ogt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @max_red_float_ge(float %max) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp oge float %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %max.red.0
+}
+
+; CHECK: @inverted_max_red_float
+; CHECK: fcmp olt <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp ogt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @inverted_max_red_float(float %max) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp olt float %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %max.red.0
+}
+
+; CHECK: @inverted_max_red_float_le
+; CHECK: fcmp ole <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp ogt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @inverted_max_red_float_le(float %max) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp ole float %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %max.red.0
+}
+
+; CHECK: @unordered_max_red
+; CHECK: fcmp ugt <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp ogt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @unordered_max_red_float(float %max) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp ugt float %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %max.red.0
+}
+
+; CHECK: @unordered_max_red_float_ge
+; CHECK: fcmp uge <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp ogt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @unordered_max_red_float_ge(float %max) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp uge float %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %max.red.0
+}
+
+; CHECK: @inverted_unordered_max_red
+; CHECK: fcmp ult <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp ogt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @inverted_unordered_max_red_float(float %max) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp ult float %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %max.red.0
+}
+
+; CHECK: @inverted_unordered_max_red_float_le
+; CHECK: fcmp ule <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp ogt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @inverted_unordered_max_red_float_le(float %max) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp ule float %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %max.red.0
+}
+
+; Minimum.
+
+; Turn this into a min reduction in the presence of a no-nans-fp-math attribute.
+; CHECK: @min_red_float
+; CHECK: fcmp olt <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp olt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @min_red_float(float %min) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp olt float %0, %min.red.08
+  %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %min.red.0
+}
+
+; CHECK: @min_red_float_le
+; CHECK: fcmp ole <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp olt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @min_red_float_le(float %min) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp ole float %0, %min.red.08
+  %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %min.red.0
+}
+
+; CHECK: @inverted_min_red_float
+; CHECK: fcmp ogt <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp olt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @inverted_min_red_float(float %min) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp ogt float %0, %min.red.08
+  %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %min.red.0
+}
+
+; CHECK: @inverted_min_red_float_ge
+; CHECK: fcmp oge <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp olt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @inverted_min_red_float_ge(float %min) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp oge float %0, %min.red.08
+  %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %min.red.0
+}
+
+; CHECK: @unordered_min_red
+; CHECK: fcmp ult <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp olt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @unordered_min_red_float(float %min) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp ult float %0, %min.red.08
+  %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %min.red.0
+}
+
+; CHECK: @unordered_min_red_float_le
+; CHECK: fcmp ule <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp olt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @unordered_min_red_float_le(float %min) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp ule float %0, %min.red.08
+  %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %min.red.0
+}
+
+; CHECK: @inverted_unordered_min_red
+; CHECK: fcmp ugt <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp olt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @inverted_unordered_min_red_float(float %min) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp ugt float %0, %min.red.08
+  %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %min.red.0
+}
+
+; CHECK: @inverted_unordered_min_red_float_ge
+; CHECK: fcmp uge <2 x float>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp olt <2 x float>
+; CHECK: select <2 x i1>
+
+define float @inverted_unordered_min_red_float_ge(float %min) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp uge float %0, %min.red.08
+  %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %min.red.0
+}
+
+; Make sure we handle doubles, too.
+; CHECK: @min_red_double
+; CHECK: fcmp olt <2 x double>
+; CHECK: select <2 x i1>
+; CHECK: middle.block
+; CHECK: fcmp olt <2 x double>
+; CHECK: select <2 x i1>
+
+define double @min_red_double(double %min) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x double]* @dA, i64 0, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 4
+  %cmp3 = fcmp olt double %0, %min.red.08
+  %min.red.0 = select i1 %cmp3, double %0, double %min.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret double %min.red.0
+}
+
+
+; Don't this into a max reduction. The no-nans-fp-math attribute is missing
+; CHECK: @max_red_float_nans
+; CHECK-NOT: <2 x float>
+
+define float @max_red_float_nans(float %max) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp3 = fcmp ogt float %0, %max.red.08
+  %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %max.red.0
+}
+
+
+attributes #0 = { "no-nans-fp-math"="true" } 
diff --git a/test/Transforms/LoopVectorize/no_idiv_reduction.ll b/test/Transforms/LoopVectorize/no_idiv_reduction.ll
new file mode 100644
index 0000000..cdfb3fd
--- /dev/null
+++ b/test/Transforms/LoopVectorize/no_idiv_reduction.ll
@@ -0,0 +1,24 @@
+; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+@a = common global [128 x i32] zeroinitializer, align 16
+
+;; Must not vectorize division reduction. Division is lossy.
+define i32 @g() {
+entry:
+  br label %for.body
+
+for.body:
+  ; CHECK: @g
+  ; CHECK-NOT: sdiv <2 x i32>
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.05 = phi i32 [ 80, %entry ], [ %div, %for.body ]
+  %arrayidx = getelementptr inbounds [128 x i32]* @a, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %div = sdiv i32 %r.05, %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1024
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %div
+}
diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll
new file mode 100644
index 0000000..6f0357c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -0,0 +1,41 @@
+; RUN: opt -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+
+@f = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+@c = common global i32 0, align 4
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@e = common global i32 0, align 4
+
+; We used to vectorize this loop. But it has a value that is used outside of the
+; and is not a recognized reduction variable "tmp17".
+
+; CHECK-NOT: <2 x i32>
+
+define i32 @main()  {
+bb:
+  %b.promoted = load i32* @b, align 4
+  br label %.lr.ph.i
+
+.lr.ph.i:
+  %tmp8 = phi i32 [ %tmp18, %bb16 ], [ %b.promoted, %bb ]
+  %tmp2 = icmp sgt i32 %tmp8, 10
+  br i1 %tmp2, label %bb16, label %bb10
+
+bb10:
+  br label %bb16
+
+bb16:
+  %tmp17 = phi i32 [ 0, %bb10 ], [ 1, %.lr.ph.i ]
+  %tmp18 = add nsw i32 %tmp8, 1
+  %tmp19 = icmp slt i32 %tmp18, 4
+  br i1 %tmp19, label %.lr.ph.i, label %f1.exit.loopexit
+
+f1.exit.loopexit:
+  %.lcssa = phi i32 [ %tmp17, %bb16 ]
+  ret i32 %.lcssa
+}
+
+
diff --git a/test/Transforms/LoopVectorize/phi-hang.ll b/test/Transforms/LoopVectorize/phi-hang.ll
index b80d459..bbce239 100644
--- a/test/Transforms/LoopVectorize/phi-hang.ll
+++ b/test/Transforms/LoopVectorize/phi-hang.ll
@@ -27,3 +27,21 @@ bb5:                                              ; preds = %bb4, %bb1
 bb11:                                             ; preds = %bb5
   ret void
 }
+
+; PR15748
+define void @test2() {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp = phi i32 [ 0, %bb ], [ %tmp5, %bb1 ]
+  %tmp2 = phi i32 [ 0, %bb ], [ 1, %bb1 ]
+  %tmp3 = phi i32 [ 0, %bb ], [ %tmp4, %bb1 ]
+  %tmp4 = or i32 %tmp2, %tmp3
+  %tmp5 = add nsw i32 %tmp, 1
+  %tmp6 = icmp eq i32 %tmp5, 0
+  br i1 %tmp6, label %bb7, label %bb1
+
+bb7:                                              ; preds = %bb1
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll
new file mode 100644
index 0000000..f43f02b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -0,0 +1,79 @@
+; RUN: opt < %s -loop-vectorize -force-vector-unroll=2 -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Make sure consecutive vector generates correct negative indices.
+; PR15882
+
+; CHECK: reverse_induction_i64
+; CHECK: add <4 x i64> %[[SPLAT:.*]], <i64 0, i64 -1, i64 -2, i64 -3>
+; CHECK: add <4 x i64> %[[SPLAT]], <i64 -4, i64 -5, i64 -6, i64 -7>
+
+define i32 @reverse_induction_i64(i64 %startval, i32 * %ptr) {
+entry:
+  br label %for.body
+
+for.body:
+  %add.i7 = phi i64 [ %startval, %entry ], [ %add.i, %for.body ]
+  %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
+  %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
+  %add.i = add i64 %add.i7, -1
+  %kind_.i = getelementptr inbounds i32* %ptr, i64 %add.i
+  %tmp.i1 = load i32* %kind_.i, align 4
+  %inc.redux = add i32 %tmp.i1, %redux5
+  %inc4 = add i32 %i.06, 1
+  %exitcond = icmp ne i32 %inc4, 1024
+  br i1 %exitcond, label %for.body, label %loopend
+
+loopend:
+  ret i32 %inc.redux
+}
+
+; CHECK: reverse_induction_i128
+; CHECK: add <4 x i128> %[[SPLAT:.*]], <i128 0, i128 -1, i128 -2, i128 -3>
+; CHECK: add <4 x i128> %[[SPLAT]], <i128 -4, i128 -5, i128 -6, i128 -7>
+define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) {
+entry:
+  br label %for.body
+
+for.body:
+  %add.i7 = phi i128 [ %startval, %entry ], [ %add.i, %for.body ]
+  %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
+  %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
+  %add.i = add i128 %add.i7, -1
+  %kind_.i = getelementptr inbounds i32* %ptr, i128 %add.i
+  %tmp.i1 = load i32* %kind_.i, align 4
+  %inc.redux = add i32 %tmp.i1, %redux5
+  %inc4 = add i32 %i.06, 1
+  %exitcond = icmp ne i32 %inc4, 1024
+  br i1 %exitcond, label %for.body, label %loopend
+
+loopend:
+  ret i32 %inc.redux
+}
+
+; CHECK: reverse_induction_i16
+; CHECK: add <4 x i16> %[[SPLAT:.*]], <i16 0, i16 -1, i16 -2, i16 -3>
+; CHECK: add <4 x i16> %[[SPLAT]], <i16 -4, i16 -5, i16 -6, i16 -7>
+
+define i32 @reverse_induction_i16(i16 %startval, i32 * %ptr) {
+entry:
+  br label %for.body
+
+for.body:
+  %add.i7 = phi i16 [ %startval, %entry ], [ %add.i, %for.body ]
+  %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
+  %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
+  %add.i = add i16 %add.i7, -1
+  %kind_.i = getelementptr inbounds i32* %ptr, i16 %add.i
+  %tmp.i1 = load i32* %kind_.i, align 4
+  %inc.redux = add i32 %tmp.i1, %redux5
+  %inc4 = add i32 %i.06, 1
+  %exitcond = icmp ne i32 %inc4, 1024
+  br i1 %exitcond, label %for.body, label %loopend
+
+loopend:
+  ret i32 %inc.redux
+}
+
+
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
new file mode 100644
index 0000000..4145d13
--- /dev/null
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: add_ints
+;CHECK: br
+;CHECK: getelementptr
+;CHECK-NEXT: getelementptr
+;CHECK-NEXT: icmp uge
+;CHECK-NEXT: icmp uge
+;CHECK-NEXT: icmp uge
+;CHECK-NEXT: icmp uge
+;CHECK-NEXT: and
+;CHECK: ret
+define void @add_ints(i32* nocapture %A, i32* nocapture %B, i32* nocapture %C) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %C, i64 %indvars.iv
+  %1 = load i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx4, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 200
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index 86098a6..014c4fc 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -22,10 +22,10 @@ entry:
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float* %arrayidx, align 4
   %mul = fmul float %0, 3.000000e+00
   %arrayidx2 = getelementptr inbounds float* %a, i64 %indvars.iv
-  store float %mul, float* %arrayidx2, align 4, !tbaa !0
+  store float %mul, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
@@ -34,7 +34,3 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret i32 undef
 }
-
-!0 = metadata !{metadata !"float", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll
new file mode 100644
index 0000000..d783974
--- /dev/null
+++ b/test/Transforms/LoopVectorize/runtime-limit.ll
@@ -0,0 +1,84 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; We are vectorizing with 6 runtime checks.
+;CHECK: func1x6
+;CHECK: <4 x i32>
+;CHECK: ret
+define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.016 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %i.016
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %B, i64 %i.016
+  %1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx2 = getelementptr inbounds i32* %C, i64 %i.016
+  %2 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %add, %2
+  %arrayidx4 = getelementptr inbounds i32* %E, i64 %i.016
+  %3 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %add3, %3
+  %arrayidx6 = getelementptr inbounds i32* %F, i64 %i.016
+  %4 = load i32* %arrayidx6, align 4
+  %add7 = add nsw i32 %add5, %4
+  %arrayidx8 = getelementptr inbounds i32* %out, i64 %i.016
+  store i32 %add7, i32* %arrayidx8, align 4
+  %inc = add i64 %i.016, 1
+  %exitcond = icmp eq i64 %inc, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 undef
+}
+
+; We are not vectorizing with 12 runtime checks.
+;CHECK: func2x6
+;CHECK-NOT: <4 x i32>
+;CHECK: ret
+define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.037 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %i.037
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %B, i64 %i.037
+  %1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx2 = getelementptr inbounds i32* %C, i64 %i.037
+  %2 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %add, %2
+  %arrayidx4 = getelementptr inbounds i32* %E, i64 %i.037
+  %3 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %add3, %3
+  %arrayidx6 = getelementptr inbounds i32* %F, i64 %i.037
+  %4 = load i32* %arrayidx6, align 4
+  %add7 = add nsw i32 %add5, %4
+  %arrayidx8 = getelementptr inbounds i32* %out, i64 %i.037
+  store i32 %add7, i32* %arrayidx8, align 4
+  %5 = load i32* %arrayidx, align 4
+  %6 = load i32* %arrayidx1, align 4
+  %add11 = add nsw i32 %6, %5
+  %7 = load i32* %arrayidx2, align 4
+  %add13 = add nsw i32 %add11, %7
+  %8 = load i32* %arrayidx4, align 4
+  %add15 = add nsw i32 %add13, %8
+  %9 = load i32* %arrayidx6, align 4
+  %add17 = add nsw i32 %add15, %9
+  %arrayidx18 = getelementptr inbounds i32* %out2, i64 %i.037
+  store i32 %add17, i32* %arrayidx18, align 4
+  %inc = add i64 %i.037, 1
+  %exitcond = icmp eq i64 %inc, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 undef
+}
+
diff --git a/test/Transforms/LoopVectorize/start-non-zero.ll b/test/Transforms/LoopVectorize/start-non-zero.ll
index 998001c..e8a089a 100644
--- a/test/Transforms/LoopVectorize/start-non-zero.ll
+++ b/test/Transforms/LoopVectorize/start-non-zero.ll
@@ -18,9 +18,9 @@ for.body.lr.ph:                                   ; preds = %entry
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
-  %1 = load i32* %arrayidx, align 4, !tbaa !0
+  %1 = load i32* %arrayidx, align 4
   %mul = mul nuw i32 %1, 333
-  store i32 %mul, i32* %arrayidx, align 4, !tbaa !0
+  store i32 %mul, i32* %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %2 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp slt i32 %2, %end
@@ -29,7 +29,3 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
 for.end:                                          ; preds = %for.body, %entry
   ret i32 4
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll
index de65d0d..573480d 100644
--- a/test/Transforms/LoopVectorize/struct_access.ll
+++ b/test/Transforms/LoopVectorize/struct_access.ll
@@ -33,7 +33,7 @@ for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
   %x = getelementptr inbounds %struct.coordinate* %A, i64 %indvars.iv, i32 0
-  %0 = load i32* %x, align 4, !tbaa !0
+  %0 = load i32* %x, align 4
   %add = add nsw i32 %0, %sum.05
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -44,7 +44,3 @@ for.end:                                          ; preds = %for.body, %entry
   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
   ret i32 %sum.0.lcssa
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/value-ptr-bug.ll b/test/Transforms/LoopVectorize/value-ptr-bug.ll
new file mode 100644
index 0000000..f376656
--- /dev/null
+++ b/test/Transforms/LoopVectorize/value-ptr-bug.ll
@@ -0,0 +1,50 @@
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; PR16073
+
+; Because we were caching value pointers accross a function call that could RAUW
+; we would generate an undefined value store below:
+; SCEVExpander::expandCodeFor would change a value (the start value of an
+; induction) that we cached in the induction variable list.
+
+; CHECK: test_vh
+; CHECK-NOT: store <4 x i8> undef
+
+define void @test_vh(i32* %ptr265, i32* %ptr266, i32 %sub267) {
+entry:
+  br label %loop
+
+loop:
+  %inc = phi i32 [ %sub267, %entry ], [ %add, %loop]
+  %ext.inc = sext i32 %inc to i64
+  %add.ptr265 = getelementptr inbounds i32* %ptr265, i64 %ext.inc
+  %add.ptr266 = getelementptr inbounds i32* %ptr266, i64 %ext.inc
+  %add = add i32 %inc, 9
+  %cmp = icmp slt i32 %add, 140
+  br i1 %cmp, label %block1, label %loop
+
+block1:
+  %sub267.lcssa = phi i32 [ %add, %loop ]
+  %add.ptr266.lcssa = phi i32* [ %add.ptr266, %loop ]
+  %add.ptr265.lcssa = phi i32* [ %add.ptr265, %loop ]
+  %tmp29 = bitcast i32* %add.ptr265.lcssa to i8*
+  %tmp30 = bitcast i32* %add.ptr266.lcssa to i8*
+  br label %do.body272
+
+do.body272:
+  %row_width.5 = phi i32 [ %sub267.lcssa, %block1 ], [ %dec, %do.body272 ]
+  %sp.4 = phi i8* [ %tmp30, %block1 ], [ %incdec.ptr273, %do.body272 ]
+  %dp.addr.4 = phi i8* [ %tmp29, %block1 ], [ %incdec.ptr274, %do.body272 ]
+  %incdec.ptr273 = getelementptr inbounds i8* %sp.4, i64 1
+  %tmp31 = load i8* %sp.4, align 1
+  %incdec.ptr274 = getelementptr inbounds i8* %dp.addr.4, i64 1
+  store i8 %tmp31, i8* %dp.addr.4, align 1
+  %dec = add i32 %row_width.5, -1
+  %cmp276 = icmp eq i32 %dec, 0
+  br i1 %cmp276, label %loop.exit, label %do.body272
+
+loop.exit:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll
index ac16948..f289ded 100644
--- a/test/Transforms/LoopVectorize/vectorize-once.ll
+++ b/test/Transforms/LoopVectorize/vectorize-once.ll
@@ -29,7 +29,7 @@ entry:
 for.body.i:                                       ; preds = %entry, %for.body.i
   %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
   %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
-  %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0
+  %0 = load i32* %__first.addr.04.i, align 4
   %add.i = add nsw i32 %0, %__init.addr.05.i
   %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
   %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
@@ -55,7 +55,7 @@ entry:
 for.body.i:                                       ; preds = %entry, %for.body.i
   %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
   %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
-  %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0
+  %0 = load i32* %__first.addr.04.i, align 4
   %add.i = add nsw i32 %0, %__init.addr.05.i
   %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1
   %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
@@ -68,8 +68,5 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
 
 attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
 !3 = metadata !{}
 
diff --git a/test/Transforms/MergeFunc/crash.ll b/test/Transforms/MergeFunc/crash.ll
new file mode 100644
index 0000000..0897ba2
--- /dev/null
+++ b/test/Transforms/MergeFunc/crash.ll
@@ -0,0 +1,46 @@
+; RUN: opt -mergefunc -disable-output < %s
+; PR15185
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+%.qux.2496 = type { i32, %.qux.2497 }
+%.qux.2497 = type { i8, i32 }
+%.qux.2585 = type { i32, i32, i8* }
+
+@g2 = external unnamed_addr constant [9 x i8], align 1
+@g3 = internal hidden unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585*)* @func35 to i8*)]
+
+define internal hidden i32 @func1(i32* %ptr, { i32, i32 }* nocapture %method) align 2 {
+  br label %1
+
+; <label>:1
+  br label %2
+
+; <label>:2
+  ret i32 undef
+}
+
+define internal hidden i32 @func10(%.qux.2496* nocapture %this) align 2 {
+  %1 = getelementptr inbounds %.qux.2496* %this, i32 0, i32 1, i32 1
+  %2 = load i32* %1, align 4
+  ret i32 %2
+}
+
+define internal hidden i8* @func29(i32* nocapture %this) align 2 {
+  ret i8* getelementptr inbounds ([9 x i8]* @g2, i32 0, i32 0)
+}
+
+define internal hidden i32* @func33(%.qux.2585* nocapture %this) align 2 {
+  ret i32* undef
+}
+
+define internal hidden i32* @func34(%.qux.2585* nocapture %this) align 2 {
+  %1 = getelementptr inbounds %.qux.2585* %this, i32 0
+  ret i32* undef
+}
+
+define internal hidden i8* @func35(%.qux.2585* nocapture %this) align 2 {
+  %1 = getelementptr inbounds %.qux.2585* %this, i32 0, i32 2
+  %2 = load i8** %1, align 4
+  ret i8* %2
+}
diff --git a/test/Transforms/MergeFunc/inttoptr.ll b/test/Transforms/MergeFunc/inttoptr.ll
new file mode 100644
index 0000000..93250fa
--- /dev/null
+++ b/test/Transforms/MergeFunc/inttoptr.ll
@@ -0,0 +1,55 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; PR15185
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-pc-linux-gnu"
+
+%.qux.2496 = type { i32, %.qux.2497 }
+%.qux.2497 = type { i8, i32 }
+%.qux.2585 = type { i32, i32, i8* }
+
+@g2 = external unnamed_addr constant [9 x i8], align 1
+@g3 = internal hidden unnamed_addr constant [1 x i8*] [i8* bitcast (i8* (%.qux.2585*)* @func35 to i8*)]
+
+define internal hidden i32 @func1(i32* %ptr, { i32, i32 }* nocapture %method) align 2 {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb1
+  ret i32 undef
+}
+
+define internal hidden i32 @func10(%.qux.2496* nocapture %this) align 2 {
+bb:
+  %tmp = getelementptr inbounds %.qux.2496* %this, i32 0, i32 1, i32 1
+  %tmp1 = load i32* %tmp, align 4
+  ret i32 %tmp1
+}
+
+define internal hidden i8* @func29(i32* nocapture %this) align 2 {
+bb:
+  ret i8* getelementptr inbounds ([9 x i8]* @g2, i32 0, i32 0)
+}
+
+define internal hidden i32* @func33(%.qux.2585* nocapture %this) align 2 {
+bb:
+  ret i32* undef
+}
+
+define internal hidden i32* @func34(%.qux.2585* nocapture %this) align 2 {
+bb:
+  %tmp = getelementptr inbounds %.qux.2585* %this, i32 0
+  ret i32* undef
+}
+
+define internal hidden i8* @func35(%.qux.2585* nocapture %this) align 2 {
+bb:
+; CHECK: %[[V2:.+]] = bitcast %.qux.2585* %{{.*}} to %.qux.2496*
+; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496* %[[V2]])
+; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
+  %tmp = getelementptr inbounds %.qux.2585* %this, i32 0, i32 2
+  %tmp1 = load i8** %tmp, align 4
+  ret i8* %tmp1
+}
diff --git a/test/Transforms/MergeFunc/vector.ll b/test/Transforms/MergeFunc/vector.ll
index dba5fa3..56f74e6 100644
--- a/test/Transforms/MergeFunc/vector.ll
+++ b/test/Transforms/MergeFunc/vector.ll
@@ -22,7 +22,7 @@ target triple = "x86_64-unknown-linux-gnu"
 define linkonce_odr void @_ZNSt6vectorIlSaIlEED1Ev(%"class.std::vector"* nocapture %this) unnamed_addr align 2 {
 entry:
   %tmp2.i.i = bitcast %"class.std::vector"* %this to i64**
-  %tmp3.i.i = load i64** %tmp2.i.i, align 8, !tbaa !0
+  %tmp3.i.i = load i64** %tmp2.i.i, align 8
   %tobool.i.i.i = icmp eq i64* %tmp3.i.i, null
   br i1 %tobool.i.i.i, label %_ZNSt6vectorIlSaIlEED2Ev.exit, label %if.then.i.i.i
 
@@ -40,7 +40,7 @@ declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
 define linkonce_odr void @_ZNSt6vectorIPvSaIS0_EED1Ev(%"class.std::vector"* nocapture %this) unnamed_addr align 2 {
 entry:
   %tmp2.i.i = bitcast %"class.std::vector"* %this to i8***
-  %tmp3.i.i = load i8*** %tmp2.i.i, align 8, !tbaa !0
+  %tmp3.i.i = load i8*** %tmp2.i.i, align 8
   %tobool.i.i.i = icmp eq i8** %tmp3.i.i, null
   br i1 %tobool.i.i.i, label %_ZNSt6vectorIPvSaIS0_EED2Ev.exit, label %if.then.i.i.i
 
@@ -70,8 +70,3 @@ declare void @_ZNSt6vectorIlSaIlEE13_M_insert_auxEN9__gnu_cxx17__normal_iterator
 declare void @_GLOBAL__I_a()
 
 declare %1 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
-
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"long", metadata !1}
diff --git a/test/Transforms/ObjCARC/apelim.ll b/test/Transforms/ObjCARC/apelim.ll
index 4541b3f..14412c6 100644
--- a/test/Transforms/ObjCARC/apelim.ll
+++ b/test/Transforms/ObjCARC/apelim.ll
@@ -26,7 +26,7 @@ entry:
   ret void
 }
 
-; CHECK: define internal void @_GLOBAL__I_x()
+; CHECK: define internal void @_GLOBAL__I_x() {
 ; CHECK-NOT: @objc
 ; CHECK: }
 define internal void @_GLOBAL__I_x() {
@@ -37,7 +37,7 @@ entry:
   ret void
 }
 
-; CHECK: define internal void @_GLOBAL__I_y()
+; CHECK: define internal void @_GLOBAL__I_y() {
 ; CHECK: %0 = call i8* @objc_autoreleasePoolPush() [[NUW:#[0-9]+]]
 ; CHECK: call void @objc_autoreleasePoolPop(i8* %0) [[NUW]]
 ; CHECK: }
diff --git a/test/Transforms/ObjCARC/arc-annotations.ll b/test/Transforms/ObjCARC/arc-annotations.ll
index 4c56b4a..c0dea4b 100644
--- a/test/Transforms/ObjCARC/arc-annotations.ll
+++ b/test/Transforms/ObjCARC/arc-annotations.ll
@@ -30,25 +30,25 @@ declare i8* @returner()
 ; CHECK: define void @test0(
 ; CHECK: entry:
 ; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
-; CHECK:   %0 = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !0, !llvm.arc.annotation.topdown !1
+; CHECK:   %0 = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup ![[ANN0:[0-9]+]], !llvm.arc.annotation.topdown ![[ANN1:[0-9]+]]
 ; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Use)
 ; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
 ; CHECK: t:
 ; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
 ; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
-; CHECK:   store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup !2
+; CHECK:   store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup ![[ANN2:[0-9]+]]
 ; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
 ; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
 ; CHECK: f:
 ; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
 ; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
-; CHECK:   store i32 7, i32* %x, !llvm.arc.annotation.bottomup !2
+; CHECK:   store i32 7, i32* %x, !llvm.arc.annotation.bottomup ![[ANN2]]
 ; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
 ; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
 ; CHECK: return:
 ; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
 ; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release)
-; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !3, !llvm.arc.annotation.topdown !4
+; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup ![[ANN3:[0-9]+]], !llvm.arc.annotation.topdown ![[ANN4:[0-9]+]]
 ; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
 ; CHECK: }
 define void @test0(i32* %x, i1 %p) nounwind {
@@ -73,235 +73,11 @@ return:
   ret void
 }
 
-; Like test0 but the release isn't always executed when the retain is,
-; so the optimization is not safe.
-
-; TODO: Make the objc_release's argument be %0.
-
-; CHECK: define void @test1(
-; CHECK: entry:
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
-; CHECK:   %0 = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !5, !llvm.arc.annotation.topdown !6
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_None)
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
-; CHECK: t:
-; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
-; CHECK:   store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup !7
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
-; CHECK: f:
-; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
-; CHECK:   call void @callee(), !llvm.arc.annotation.topdown !8
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_None)
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_CanRelease)
-; CHECK: return:
-; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None)
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release)
-; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !9
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
-; CHECK: alt_return:
-; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None)
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
-; CHECK: }
-define void @test1(i32* %x, i1 %p, i1 %q) nounwind {
-entry:
-  %a = bitcast i32* %x to i8*
-  %0 = call i8* @objc_retain(i8* %a) nounwind
-  br i1 %p, label %t, label %f
-
-t:
-  store i8 3, i8* %a
-  %b = bitcast i32* %x to float*
-  store float 2.0, float* %b
-  br label %return
-
-f:
-  store i32 7, i32* %x
-  call void @callee()
-  br i1 %q, label %return, label %alt_return
-
-return:
-  %c = bitcast i32* %x to i8*
-  call void @objc_release(i8* %c) nounwind
-  ret void
-
-alt_return:
-  ret void
-}
-
-; Don't do partial elimination into two different CFG diamonds.
-
-; CHECK: define void @test1b(
-; CHECK: entry:
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
-; CHECK:   %0 = tail call i8* @objc_retain(i8* %x) #0, !llvm.arc.annotation.bottomup !10, !llvm.arc.annotation.topdown !11
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_None)
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
-; CHECK: if.then:
-; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_CanRelease)
-; CHECK:   tail call void @callee(), !llvm.arc.annotation.bottomup !12, !llvm.arc.annotation.topdown !13
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Use)
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_CanRelease)
-; CHECK: if.end:
-; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_CanRelease)
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Use)
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_CanRelease)
-; CHECK: if.then3:
-; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_CanRelease)
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
-; CHECK:   tail call void @use_pointer(i8* %x), !llvm.arc.annotation.bottomup !14, !llvm.arc.annotation.topdown !15
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_MovableRelease)
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Use)
-; CHECK: if.end5:
-; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None)
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_MovableRelease)
-; CHECK:   tail call void @objc_release(i8* %x) #0, !clang.imprecise_release !16, !llvm.arc.annotation.bottomup !17
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
-; CHECK: }
-define void @test1b(i8* %x, i1 %p, i1 %q) {
-entry:
-  tail call i8* @objc_retain(i8* %x) nounwind
-  br i1 %p, label %if.then, label %if.end
-
-if.then:                                          ; preds = %entry
-  tail call void @callee()
-  br label %if.end
-
-if.end:                                           ; preds = %if.then, %entry
-  br i1 %q, label %if.then3, label %if.end5
-
-if.then3:                                         ; preds = %if.end
-  tail call void @use_pointer(i8* %x)
-  br label %if.end5
-
-if.end5:                                          ; preds = %if.then3, %if.end
-  tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
-  ret void
-}
-
-; Like test0 but the pointer is passed to an intervening call,
-; so the optimization is not safe.
-
-; CHECK: define void @test2(
-; CHECK: entry:
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
-; CHECK:   %e = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !18, !llvm.arc.annotation.topdown !19
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_CanRelease)
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
-; CHECK: t:
-; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use)
-; CHECK:   store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup !20
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
-; CHECK: f:
-; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_CanRelease)
-; CHECK:   call void @use_pointer(i8* %e), !llvm.arc.annotation.bottomup !21, !llvm.arc.annotation.topdown !22
-; CHECK:   store float 3.000000e+00, float* %d, !llvm.arc.annotation.bottomup !20, !llvm.arc.annotation.topdown !23
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Use)
-; CHECK: return:
-; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Use)
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release)
-; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !24, !llvm.arc.annotation.topdown !25
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
-; CHECK: }
-define void @test2(i32* %x, i1 %p) nounwind {
-entry:
-  %a = bitcast i32* %x to i8*
-  %e = call i8* @objc_retain(i8* %a) nounwind
-  br i1 %p, label %t, label %f
-
-t:
-  store i8 3, i8* %a
-  %b = bitcast i32* %x to float*
-  store float 2.0, float* %b
-  br label %return
-
-f:
-  store i32 7, i32* %x
-  call void @use_pointer(i8* %e)
-  %d = bitcast i32* %x to float*
-  store float 3.0, float* %d
-  br label %return
-
-return:
-  %c = bitcast i32* %x to i8*
-  call void @objc_release(i8* %c) nounwind
-  ret void
-}
-
-; Like test0 but the release is in a loop,
-; so the optimization is not safe.
-
-; TODO: For now, assume this can't happen.
-
-; CHECK: define void @test3(
-; CHECK: entry:
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None)
-; CHECK:   tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !26, !llvm.arc.annotation.topdown !27
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release)
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain)
-; CHECK: loop:
-; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain)
-; CHECK:   call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release)
-; CHECK:   call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !28, !llvm.arc.annotation.topdown !29
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
-; CHECK: return:
-; CHECK:   call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None)
-; CHECK:   call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None)
-; CHECK: }
-define void @test3(i32* %x, i1* %q) nounwind {
-entry:
-  %a = bitcast i32* %x to i8*
-  %0 = call i8* @objc_retain(i8* %a) nounwind
-  br label %loop
-
-loop:
-  %c = bitcast i32* %x to i8*
-  call void @objc_release(i8* %c) nounwind
-  %j = load volatile i1* %q
-  br i1 %j, label %loop, label %return
-
-return:
-  ret void
-}
-
 !0 = metadata !{}
 
-; CHECK: !0 = metadata !{metadata !"(test0,%x)", metadata !"S_Use", metadata !"S_None"}
-; CHECK: !1 = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Retain"}
-; CHECK: !2 = metadata !{metadata !"(test0,%x)", metadata !"S_Release", metadata !"S_Use"}
-; CHECK: !3 = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Release"}
-; CHECK: !4 = metadata !{metadata !"(test0,%x)", metadata !"S_Retain", metadata !"S_None"}
-; CHECK: !5 = metadata !{metadata !"(test1,%x)", metadata !"S_None", metadata !"S_None"}
-; CHECK: !6 = metadata !{metadata !"(test1,%x)", metadata !"S_None", metadata !"S_Retain"}
-; CHECK: !7 = metadata !{metadata !"(test1,%x)", metadata !"S_Release", metadata !"S_Use"}
-; CHECK: !8 = metadata !{metadata !"(test1,%x)", metadata !"S_Retain", metadata !"S_CanRelease"}
-; CHECK: !9 = metadata !{metadata !"(test1,%x)", metadata !"S_None", metadata !"S_Release"}
-; CHECK: !10 = metadata !{metadata !"(test1b,%x)", metadata !"S_None", metadata !"S_None"}
-; CHECK: !11 = metadata !{metadata !"(test1b,%x)", metadata !"S_None", metadata !"S_Retain"}
-; CHECK: !12 = metadata !{metadata !"(test1b,%x)", metadata !"S_Use", metadata !"S_CanRelease"}
-; CHECK: !13 = metadata !{metadata !"(test1b,%x)", metadata !"S_Retain", metadata !"S_CanRelease"}
-; CHECK: !14 = metadata !{metadata !"(test1b,%x)", metadata !"S_MovableRelease", metadata !"S_Use"}
-; CHECK: !15 = metadata !{metadata !"(test1b,%x)", metadata !"S_CanRelease", metadata !"S_Use"}
-; CHECK: !16 = metadata !{}
-; CHECK: !17 = metadata !{metadata !"(test1b,%x)", metadata !"S_None", metadata !"S_MovableRelease"}
-; CHECK: !18 = metadata !{metadata !"(test2,%x)", metadata !"S_CanRelease", metadata !"S_None"}
-; CHECK: !19 = metadata !{metadata !"(test2,%x)", metadata !"S_None", metadata !"S_Retain"}
-; CHECK: !20 = metadata !{metadata !"(test2,%x)", metadata !"S_Release", metadata !"S_Use"}
-; CHECK: !21 = metadata !{metadata !"(test2,%x)", metadata !"S_Use", metadata !"S_CanRelease"}
-; CHECK: !22 = metadata !{metadata !"(test2,%x)", metadata !"S_Retain", metadata !"S_CanRelease"}
-; CHECK: !23 = metadata !{metadata !"(test2,%x)", metadata !"S_CanRelease", metadata !"S_Use"}
-; CHECK: !24 = metadata !{metadata !"(test2,%x)", metadata !"S_None", metadata !"S_Release"}
-; CHECK: !25 = metadata !{metadata !"(test2,%x)", metadata !"S_Use", metadata !"S_None"}
-; CHECK: !26 = metadata !{metadata !"(test3,%x)", metadata !"S_Release", metadata !"S_None"}
-; CHECK: !27 = metadata !{metadata !"(test3,%x)", metadata !"S_None", metadata !"S_Retain"}
-; CHECK: !28 = metadata !{metadata !"(test3,%x)", metadata !"S_None", metadata !"S_Release"}
-; CHECK: !29 = metadata !{metadata !"(test3,%x)", metadata !"S_Retain", metadata !"S_None"}
+; CHECK: ![[ANN0]] = metadata !{metadata !"(test0,%x)", metadata !"S_Use", metadata !"S_None"}
+; CHECK: ![[ANN1]] = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Retain"}
+; CHECK: ![[ANN2]] = metadata !{metadata !"(test0,%x)", metadata !"S_Release", metadata !"S_Use"}
+; CHECK: ![[ANN3]] = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Release"}
+; CHECK: ![[ANN4]] = metadata !{metadata !"(test0,%x)", metadata !"S_Retain", metadata !"S_None"}
 
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 828a8a7..ca12792 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -20,6 +20,7 @@ declare void @callee()
 declare void @callee_fnptr(void ()*)
 declare void @invokee()
 declare i8* @returner()
+declare void @bar(i32 ()*)
 
 declare void @llvm.dbg.value(metadata, i64, metadata)
 
@@ -28,10 +29,11 @@ declare i8* @objc_msgSend(i8*, i8*, ...)
 ; Simple retain+release pair deletion, with some intervening control
 ; flow and harmless instructions.
 
-; CHECK: define void @test0(
-; CHECK-NOT: @objc_
+; CHECK: define void @test0_precise(i32* %x, i1 %p) [[NUW:#[0-9]+]] {
+; CHECK: @objc_retain
+; CHECK: @objc_release
 ; CHECK: }
-define void @test0(i32* %x, i1 %p) nounwind {
+define void @test0_precise(i32* %x, i1 %p) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
@@ -53,16 +55,41 @@ return:
   ret void
 }
 
+; CHECK: define void @test0_imprecise(i32* %x, i1 %p) [[NUW]] {
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test0_imprecise(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
 ; Like test0 but the release isn't always executed when the retain is,
 ; so the optimization is not safe.
 
 ; TODO: Make the objc_release's argument be %0.
 
-; CHECK: define void @test1(
+; CHECK: define void @test1_precise(i32* %x, i1 %p, i1 %q) [[NUW]] {
 ; CHECK: @objc_retain(i8* %a)
 ; CHECK: @objc_release
 ; CHECK: }
-define void @test1(i32* %x, i1 %p, i1 %q) nounwind {
+define void @test1_precise(i32* %x, i1 %p, i1 %q) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
@@ -88,9 +115,69 @@ alt_return:
   ret void
 }
 
+; CHECK: define void @test1_imprecise(i32* %x, i1 %p, i1 %q) [[NUW]] {
+; CHECK: @objc_retain(i8* %a)
+; CHECK: @objc_release
+; CHECK: }
+define void @test1_imprecise(i32* %x, i1 %p, i1 %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @callee()
+  br i1 %q, label %return, label %alt_return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  ret void
+
+alt_return:
+  ret void
+}
+
+
 ; Don't do partial elimination into two different CFG diamonds.
 
-; CHECK: define void @test1b(
+; CHECK: define void @test1b_precise(i8* %x, i1 %p, i1 %q) {
+; CHECK: entry:
+; CHECK:   tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK-NOT: @objc_
+; CHECK: if.end5:
+; CHECK:   tail call void @objc_release(i8* %x) [[NUW]]
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test1b_precise(i8* %x, i1 %p, i1 %q) {
+entry:
+  tail call i8* @objc_retain(i8* %x) nounwind
+  br i1 %p, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @callee()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  br i1 %q, label %if.then3, label %if.end5
+
+if.then3:                                         ; preds = %if.end
+  tail call void @use_pointer(i8* %x)
+  br label %if.end5
+
+if.end5:                                          ; preds = %if.then3, %if.end
+  tail call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; CHECK: define void @test1b_imprecise(
 ; CHECK: entry:
 ; CHECK:   tail call i8* @objc_retain(i8* %x) [[NUW:#[0-9]+]]
 ; CHECK-NOT: @objc_
@@ -98,7 +185,7 @@ alt_return:
 ; CHECK:   tail call void @objc_release(i8* %x) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NOT: @objc_
 ; CHECK: }
-define void @test1b(i8* %x, i1 %p, i1 %q) {
+define void @test1b_imprecise(i8* %x, i1 %p, i1 %q) {
 entry:
   tail call i8* @objc_retain(i8* %x) nounwind
   br i1 %p, label %if.then, label %if.end
@@ -119,14 +206,15 @@ if.end5:                                          ; preds = %if.then3, %if.end
   ret void
 }
 
+
 ; Like test0 but the pointer is passed to an intervening call,
 ; so the optimization is not safe.
 
-; CHECK: define void @test2(
+; CHECK: define void @test2_precise(
 ; CHECK: @objc_retain(i8* %a)
 ; CHECK: @objc_release
 ; CHECK: }
-define void @test2(i32* %x, i1 %p) nounwind {
+define void @test2_precise(i32* %x, i1 %p) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
@@ -151,16 +239,45 @@ return:
   ret void
 }
 
+; CHECK: define void @test2_imprecise(
+; CHECK: @objc_retain(i8* %a)
+; CHECK: @objc_release
+; CHECK: }
+define void @test2_imprecise(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @use_pointer(i8* %0)
+  %d = bitcast i32* %x to float*
+  store float 3.0, float* %d
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
 ; Like test0 but the release is in a loop,
 ; so the optimization is not safe.
 
 ; TODO: For now, assume this can't happen.
 
-; CHECK: define void @test3(
+; CHECK: define void @test3_precise(
 ; TODO: @objc_retain(i8* %a)
 ; TODO: @objc_release
 ; CHECK: }
-define void @test3(i32* %x, i1* %q) nounwind {
+define void @test3_precise(i32* %x, i1* %q) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
@@ -176,16 +293,37 @@ return:
   ret void
 }
 
+; CHECK: define void @test3_imprecise(
+; TODO: @objc_retain(i8* %a)
+; TODO: @objc_release
+; CHECK: }
+define void @test3_imprecise(i32* %x, i1* %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br label %loop
+
+loop:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  %j = load volatile i1* %q
+  br i1 %j, label %loop, label %return
+
+return:
+  ret void
+}
+
+
 ; TODO: For now, assume this can't happen.
 
 ; Like test0 but the retain is in a loop,
 ; so the optimization is not safe.
 
-; CHECK: define void @test4(
+; CHECK: define void @test4_precise(
 ; TODO: @objc_retain(i8* %a)
 ; TODO: @objc_release
 ; CHECK: }
-define void @test4(i32* %x, i1* %q) nounwind {
+define void @test4_precise(i32* %x, i1* %q) nounwind {
 entry:
   br label %loop
 
@@ -201,14 +339,35 @@ return:
   ret void
 }
 
+; CHECK: define void @test4_imprecise(
+; TODO: @objc_retain(i8* %a)
+; TODO: @objc_release
+; CHECK: }
+define void @test4_imprecise(i32* %x, i1* %q) nounwind {
+entry:
+  br label %loop
+
+loop:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  %j = load volatile i1* %q
+  br i1 %j, label %loop, label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+
 ; Like test0 but the pointer is conditionally passed to an intervening call,
 ; so the optimization is not safe.
 
-; CHECK: define void @test5(
+; CHECK: define void @test5a(
 ; CHECK: @objc_retain(i8*
 ; CHECK: @objc_release
 ; CHECK: }
-define void @test5(i32* %x, i1 %q, i8* %y) nounwind {
+define void @test5a(i32* %x, i1 %q, i8* %y) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
@@ -220,13 +379,98 @@ entry:
   ret void
 }
 
+; CHECK: define void @test5b(
+; CHECK: @objc_retain(i8*
+; CHECK: @objc_release
+; CHECK: }
+define void @test5b(i32* %x, i1 %q, i8* %y) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  %s = select i1 %q, i8* %y, i8* %0
+  call void @use_pointer(i8* %s)
+  store i32 7, i32* %x
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+
 ; retain+release pair deletion, where the release happens on two different
 ; flow paths.
 
-; CHECK: define void @test6(
+; CHECK: define void @test6a(
+; CHECK: entry:
+; CHECK:   tail call i8* @objc_retain(
+; CHECK: t:
+; CHECK:   call void @objc_release(
+; CHECK: f:
+; CHECK:   call void @objc_release(
+; CHECK: return:
+; CHECK: }
+define void @test6a(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  %ct = bitcast i32* %x to i8*
+  call void @objc_release(i8* %ct) nounwind
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @callee()
+  %cf = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cf) nounwind
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK: define void @test6b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
-define void @test6(i32* %x, i1 %p) nounwind {
+define void @test6b(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  %ct = bitcast i32* %x to i8*
+  call void @objc_release(i8* %ct) nounwind, !clang.imprecise_release !0
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @callee()
+  %cf = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cf) nounwind, !clang.imprecise_release !0
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK: define void @test6c(
+; CHECK: entry:
+; CHECK:   tail call i8* @objc_retain(
+; CHECK: t:
+; CHECK:   call void @objc_release(
+; CHECK: f:
+; CHECK:   call void @objc_release(
+; CHECK: return:
+; CHECK: }
+define void @test6c(i32* %x, i1 %p) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
@@ -244,6 +488,40 @@ f:
   store i32 7, i32* %x
   call void @callee()
   %cf = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cf) nounwind, !clang.imprecise_release !0
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK: define void @test6d(
+; CHECK: entry:
+; CHECK:   tail call i8* @objc_retain(
+; CHECK: t:
+; CHECK:   call void @objc_release(
+; CHECK: f:
+; CHECK:   call void @objc_release(
+; CHECK: return:
+; CHECK: }
+define void @test6d(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  %ct = bitcast i32* %x to i8*
+  call void @objc_release(i8* %ct) nounwind, !clang.imprecise_release !0
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @callee()
+  %cf = bitcast i32* %x to i8*
   call void @objc_release(i8* %cf) nounwind
   br label %return
 
@@ -251,11 +529,19 @@ return:
   ret void
 }
 
+
 ; retain+release pair deletion, where the retain happens on two different
 ; flow paths.
 
-; CHECK: define void @test7(
-; CHECK-NOT: @objc_
+; CHECK:     define void @test7(
+; CHECK:     entry:
+; CHECK-NOT:   objc_
+; CHECK:     t:
+; CHECK:       call i8* @objc_retain
+; CHECK:     f:
+; CHECK:       call i8* @objc_retain
+; CHECK:     return:
+; CHECK:       call void @objc_release
 ; CHECK: }
 define void @test7(i32* %x, i1 %p) nounwind {
 entry:
@@ -281,17 +567,44 @@ return:
   ret void
 }
 
+; CHECK: define void @test7b(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test7b(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  br i1 %p, label %t, label %f
+
+t:
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  %1 = call i8* @objc_retain(i8* %a) nounwind
+  store i32 7, i32* %x
+  call void @callee()
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
 ; Like test7, but there's a retain/retainBlock mismatch. Don't delete!
 
-; CHECK: define void @test7b
+; CHECK: define void @test7c
 ; CHECK: t:
-; CHECK: call i8* @objc_retainBlock
+; CHECK:   call i8* @objc_retainBlock
 ; CHECK: f:
-; CHECK: call i8* @objc_retain
+; CHECK:   call i8* @objc_retain
 ; CHECK: return:
-; CHECK: call void @objc_release
+; CHECK:   call void @objc_release
 ; CHECK: }
-define void @test7b(i32* %x, i1 %p) nounwind {
+define void @test7c(i32* %x, i1 %p) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   br i1 %p, label %t, label %f
@@ -318,10 +631,106 @@ return:
 ; retain+release pair deletion, where the retain and release both happen on
 ; different flow paths. Wild!
 
-; CHECK: define void @test8(
+; CHECK: define void @test8a(
+; CHECK: entry:
+; CHECK: t:
+; CHECK:   @objc_retain
+; CHECK: f:
+; CHECK:   @objc_retain
+; CHECK: mid:
+; CHECK: u:
+; CHECK:   @objc_release
+; CHECK: g:
+; CHECK:   @objc_release
+; CHECK: return:
+; CHECK: }
+define void @test8a(i32* %x, i1 %p, i1 %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  br i1 %p, label %t, label %f
+
+t:
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %mid
+
+f:
+  %1 = call i8* @objc_retain(i8* %a) nounwind
+  store i32 7, i32* %x
+  br label %mid
+
+mid:
+  br i1 %q, label %u, label %g
+
+u:
+  call void @callee()
+  %cu = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cu) nounwind
+  br label %return
+
+g:
+  %cg = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cg) nounwind
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK: define void @test8b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
-define void @test8(i32* %x, i1 %p, i1 %q) nounwind {
+define void @test8b(i32* %x, i1 %p, i1 %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  br i1 %p, label %t, label %f
+
+t:
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %mid
+
+f:
+  %1 = call i8* @objc_retain(i8* %a) nounwind
+  store i32 7, i32* %x
+  br label %mid
+
+mid:
+  br i1 %q, label %u, label %g
+
+u:
+  call void @callee()
+  %cu = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cu) nounwind, !clang.imprecise_release !0
+  br label %return
+
+g:
+  %cg = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cg) nounwind, !clang.imprecise_release !0
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK: define void @test8c(
+; CHECK: entry:
+; CHECK: t:
+; CHECK:   @objc_retain
+; CHECK: f:
+; CHECK:   @objc_retain
+; CHECK: mid:
+; CHECK: u:
+; CHECK:   @objc_release
+; CHECK: g:
+; CHECK:   @objc_release
+; CHECK: return:
+; CHECK: }
+define void @test8c(i32* %x, i1 %p, i1 %q) nounwind {
 entry:
   %a = bitcast i32* %x to i8*
   br i1 %p, label %t, label %f
@@ -349,6 +758,54 @@ u:
 
 g:
   %cg = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cg) nounwind, !clang.imprecise_release !0
+  br label %return
+
+return:
+  ret void
+}
+
+; CHECK: define void @test8d(
+; CHECK: entry:
+; CHECK: t:
+; CHECK:   @objc_retain
+; CHECK: f:
+; CHECK:   @objc_retain
+; CHECK: mid:
+; CHECK: u:
+; CHECK:   @objc_release
+; CHECK: g:
+; CHECK:   @objc_release
+; CHECK: return:
+; CHECK: }
+define void @test8d(i32* %x, i1 %p, i1 %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  br i1 %p, label %t, label %f
+
+t:
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %mid
+
+f:
+  %1 = call i8* @objc_retain(i8* %a) nounwind
+  store i32 7, i32* %x
+  br label %mid
+
+mid:
+  br i1 %q, label %u, label %g
+
+u:
+  call void @callee()
+  %cu = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cu) nounwind, !clang.imprecise_release !0
+  br label %return
+
+g:
+  %cg = bitcast i32* %x to i8*
   call void @objc_release(i8* %cg) nounwind
   br label %return
 
@@ -486,6 +943,7 @@ entry:
 ; CHECK-NEXT: @use_pointer
 ; CHECK-NEXT: @use_pointer
 ; CHECK-NEXT: ret void
+; CHECK-NEXT: }
 define void @test13b(i8* %x, i64 %n) {
 entry:
   call i8* @objc_retain(i8* %x) nounwind
@@ -527,6 +985,7 @@ entry:
 ; CHECK-NEXT: @use_pointer
 ; CHECK-NEXT: @use_pointer
 ; CHECK-NEXT: ret void
+; CHECK-NEXT: }
 define void @test13d(i8* %x, i64 %n) {
 entry:
   call i8* @objc_retain(i8* %x) nounwind
@@ -583,7 +1042,9 @@ entry:
 
 ; CHECK: define void @test15b
 ; CHECK-NEXT: entry:
+; CHECK-NEXT: @objc_retain
 ; CHECK-NEXT: @objc_autorelease
+; CHECK-NEXT: @objc_release
 ; CHECK-NEXT: ret void
 ; CHECK-NEXT: }
 define void @test15b(i8* %x, i64 %n) {
@@ -594,13 +1055,60 @@ entry:
   ret void
 }
 
+; CHECK: define void @test15c
+; CHECK-NEXT: entry:
+; CHECK-NEXT: @objc_autorelease
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test15c(i8* %x, i64 %n) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %x) nounwind
+  call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
 ; Retain+release pairs in diamonds, all dominated by a retain.
 
-; CHECK: define void @test16(
+; CHECK: define void @test16a(
+; CHECK: @objc_retain(i8* %x)
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test16a(i1 %a, i1 %b, i8* %x) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  br i1 %a, label %red, label %orange
+
+red:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+orange:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+yellow:
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  br i1 %b, label %green, label %blue
+
+green:
+  call void @objc_release(i8* %x) nounwind
+  br label %purple
+
+blue:
+  call void @objc_release(i8* %x) nounwind
+  br label %purple
+
+purple:
+  ret void
+}
+
+; CHECK: define void @test16b(
 ; CHECK: @objc_retain(i8* %x)
 ; CHECK-NOT: @objc
 ; CHECK: }
-define void @test16(i1 %a, i1 %b, i8* %x) {
+define void @test16b(i1 %a, i1 %b, i8* %x) {
 entry:
   call i8* @objc_retain(i8* %x) nounwind
   br i1 %a, label %red, label %orange
@@ -619,17 +1127,86 @@ yellow:
   br i1 %b, label %green, label %blue
 
 green:
+  call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  br label %purple
+
+blue:
   call void @objc_release(i8* %x) nounwind
   br label %purple
 
+purple:
+  ret void
+}
+
+; CHECK: define void @test16c(
+; CHECK: @objc_retain(i8* %x)
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test16c(i1 %a, i1 %b, i8* %x) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  br i1 %a, label %red, label %orange
+
+red:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+orange:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+yellow:
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  br i1 %b, label %green, label %blue
+
+green:
+  call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  br label %purple
+
 blue:
+  call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  br label %purple
+
+purple:
+  ret void
+}
+
+; CHECK: define void @test16d(
+; CHECK: @objc_retain(i8* %x)
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test16d(i1 %a, i1 %b, i8* %x) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  br i1 %a, label %red, label %orange
+
+red:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+orange:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+yellow:
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  br i1 %b, label %green, label %blue
+
+green:
   call void @objc_release(i8* %x) nounwind
   br label %purple
 
+blue:
+  call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  br label %purple
+
 purple:
   ret void
 }
 
+
 ; Retain+release pairs in diamonds, all post-dominated by a release.
 
 ; CHECK: define void @test17(
@@ -720,6 +1297,7 @@ entry:
 ; CHECK: define void @test20(
 ; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %tmp) [[NUW]]
 ; CHECK-NEXT: invoke
+; CHECK: }
 define void @test20(double* %self) {
 if.then12:
   %tmp = bitcast double* %self to i8*
@@ -747,6 +1325,7 @@ if.end:                                           ; preds = %invoke.cont23
 ; CHECK: define i8* @test21(
 ; CHECK: call i8* @returner()
 ; CHECK-NEXT: ret i8* %call
+; CHECK-NEXT: }
 define i8* @test21() {
 entry:
   %call = call i8* @returner()
@@ -799,7 +1378,7 @@ entry:
 
 ; Don't optimize objc_retainBlock, but do strength reduce it.
 
-; CHECK: define void @test23b
+; CHECK: define void @test23b(i8* %p) {
 ; CHECK: @objc_retain
 ; CHECK: @objc_release
 ; CHECK: }
@@ -1163,12 +1742,16 @@ done:
   ret void
 }
 
-; Delete retain,release if there's just a possible dec.
+; Delete retain,release if there's just a possible dec and we have imprecise
+; releases.
 
-; CHECK: define void @test34(
-; CHECK-NOT: @objc_
+; CHECK: define void @test34a(
+; CHECK:   call i8* @objc_retain
+; CHECK: true:
+; CHECK: done:
+; CHECK: call void @objc_release
 ; CHECK: }
-define void @test34(i8* %p, i1 %x, i8* %y) {
+define void @test34a(i8* %p, i1 %x, i8* %y) {
 entry:
   %f0 = call i8* @objc_retain(i8* %p)
   br i1 %x, label %true, label %done
@@ -1184,12 +1767,38 @@ done:
   ret void
 }
 
-; Delete retain,release if there's just a use.
-
-; CHECK: define void @test35(
+; CHECK: define void @test34b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
-define void @test35(i8* %p, i1 %x, i8* %y) {
+define void @test34b(i8* %p, i1 %x, i8* %y) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %done
+
+true:
+  call void @callee()
+  br label %done
+
+done:
+  %g = bitcast i8* %p to i8*
+  %h = getelementptr i8* %g, i64 0
+  call void @objc_release(i8* %g), !clang.imprecise_release !0
+  ret void
+}
+
+
+; Delete retain,release if there's just a use and we do not have a precise
+; release.
+
+; Precise.
+; CHECK: define void @test35a(
+; CHECK: entry:
+; CHECK:   call i8* @objc_retain
+; CHECK: true:
+; CHECK: done:
+; CHECK:   call void @objc_release
+; CHECK: }
+define void @test35a(i8* %p, i1 %x, i8* %y) {
 entry:
   %f0 = call i8* @objc_retain(i8* %p)
   br i1 %x, label %true, label %done
@@ -1205,16 +1814,36 @@ done:
   ret void
 }
 
-; Delete a retain,release if there's no actual use.
-
-; CHECK: define void @test36(
+; Imprecise.
+; CHECK: define void @test35b(
 ; CHECK-NOT: @objc_
+; CHECK: }
+define void @test35b(i8* %p, i1 %x, i8* %y) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %done
+
+true:
+  %v = icmp eq i8* %p, %y
+  br label %done
+
+done:
+  %g = bitcast i8* %p to i8*
+  %h = getelementptr i8* %g, i64 0
+  call void @objc_release(i8* %g), !clang.imprecise_release !0
+  ret void
+}
+
+; Delete a retain,release if there's no actual use and we have precise release.
+
+; CHECK: define void @test36a(
+; CHECK: @objc_retain
 ; CHECK: call void @callee()
 ; CHECK-NOT: @objc_
 ; CHECK: call void @callee()
-; CHECK-NOT: @objc_
+; CHECK: @objc_release
 ; CHECK: }
-define void @test36(i8* %p) {
+define void @test36a(i8* %p) {
 entry:
   call i8* @objc_retain(i8* %p)
   call void @callee()
@@ -1225,10 +1854,10 @@ entry:
 
 ; Like test36, but with metadata.
 
-; CHECK: define void @test37(
+; CHECK: define void @test36b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
-define void @test37(i8* %p) {
+define void @test36b(i8* %p) {
 entry:
   call i8* @objc_retain(i8* %p)
   call void @callee()
@@ -1439,6 +2068,7 @@ define void @test44(i8** %pp) {
 ; CHECK: call void @objc_release(i8* %q)
 ; CHECK: call void @use_pointer(i8* %p)
 ; CHECK: call void @objc_release(i8* %p)
+; CHECK: }
 define void @test45(i8** %pp, i8** %qq) {
   %p = load i8** %pp
   %q = load i8** %qq
@@ -1455,6 +2085,7 @@ define void @test45(i8** %pp, i8** %qq) {
 ; CHECK: tail call i8* @objc_retain(i8* %p) [[NUW]]
 ; CHECK: true:
 ; CHECK: call i8* @objc_autorelease(i8* %p) [[NUW]]
+; CHECK: }
 define void @test46(i8* %p, i1 %a) {
 entry:
   call i8* @objc_retain(i8* %p)
@@ -1474,6 +2105,7 @@ false:
 ; CHECK: define i8* @test47(
 ; CHECK-NOT: call
 ; CHECK: ret i8* %p
+; CHECK: }
 define i8* @test47(i8* %p) nounwind {
   %x = call i8* @objc_retainedObject(i8* %p)
   ret i8* %x
@@ -1484,6 +2116,7 @@ define i8* @test47(i8* %p) nounwind {
 ; CHECK: define i8* @test48(
 ; CHECK-NOT: call
 ; CHECK: ret i8* %p
+; CHECK: }
 define i8* @test48(i8* %p) nounwind {
   %x = call i8* @objc_unretainedObject(i8* %p)
   ret i8* %x
@@ -1494,32 +2127,51 @@ define i8* @test48(i8* %p) nounwind {
 ; CHECK: define i8* @test49(
 ; CHECK-NOT: call
 ; CHECK: ret i8* %p
+; CHECK: }
 define i8* @test49(i8* %p) nounwind {
   %x = call i8* @objc_unretainedPointer(i8* %p)
   ret i8* %x
 }
 
-; Do delete retain+release with intervening stores of the
-; address value.
+; Do delete retain+release with intervening stores of the address value if we
+; have imprecise release attached to objc_release.
 
-; CHECK: define void @test50(
+; CHECK:      define void @test50a(
+; CHECK-NEXT:   call i8* @objc_retain
+; CHECK-NEXT:   call void @callee
+; CHECK-NEXT:   store
+; CHECK-NEXT:   call void @objc_release
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test50a(i8* %p, i8** %pp) {
+  call i8* @objc_retain(i8* %p)
+  call void @callee()
+  store i8* %p, i8** %pp
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; CHECK: define void @test50b(
 ; CHECK-NOT: @objc_
 ; CHECK: }
-define void @test50(i8* %p, i8** %pp) {
+define void @test50b(i8* %p, i8** %pp) {
   call i8* @objc_retain(i8* %p)
   call void @callee()
   store i8* %p, i8** %pp
-  call void @objc_release(i8* %p)
+  call void @objc_release(i8* %p), !clang.imprecise_release !0
   ret void
 }
 
+
 ; Don't delete retain+release with intervening stores through the
 ; address value.
 
-; CHECK: define void @test51(
+; CHECK: define void @test51a(
 ; CHECK: call i8* @objc_retain(i8* %p)
 ; CHECK: call void @objc_release(i8* %p)
-define void @test51(i8* %p) {
+; CHECK: ret void
+; CHECK: }
+define void @test51a(i8* %p) {
   call i8* @objc_retain(i8* %p)
   call void @callee()
   store i8 0, i8* %p
@@ -1527,15 +2179,30 @@ define void @test51(i8* %p) {
   ret void
 }
 
+; CHECK: define void @test51b(
+; CHECK: call i8* @objc_retain(i8* %p)
+; CHECK: call void @objc_release(i8* %p)
+; CHECK: ret void
+; CHECK: }
+define void @test51b(i8* %p) {
+  call i8* @objc_retain(i8* %p)
+  call void @callee()
+  store i8 0, i8* %p
+  call void @objc_release(i8* %p), !clang.imprecise_release !0
+  ret void
+}
+
 ; Don't delete retain+release with intervening use of a pointer of
 ; unknown provenance.
 
-; CHECK: define void @test52(
+; CHECK: define void @test52a(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call void @callee()
 ; CHECK: call void @use_pointer(i8* %z)
 ; CHECK: call void @objc_release
-define void @test52(i8** %zz, i8** %pp) {
+; CHECK: ret void
+; CHECK: }
+define void @test52a(i8** %zz, i8** %pp) {
   %p = load i8** %pp
   %1 = call i8* @objc_retain(i8* %p)
   call void @callee()
@@ -1545,6 +2212,23 @@ define void @test52(i8** %zz, i8** %pp) {
   ret void
 }
 
+; CHECK: define void @test52b(
+; CHECK: call i8* @objc_retain
+; CHECK: call void @callee()
+; CHECK: call void @use_pointer(i8* %z)
+; CHECK: call void @objc_release
+; CHECK: ret void
+; CHECK: }
+define void @test52b(i8** %zz, i8** %pp) {
+  %p = load i8** %pp
+  %1 = call i8* @objc_retain(i8* %p)
+  call void @callee()
+  %z = load i8** %zz
+  call void @use_pointer(i8* %z)
+  call void @objc_release(i8* %p), !clang.imprecise_release !0
+  ret void
+}
+
 ; Like test52, but the pointer has function type, so it's assumed to
 ; be not reference counted.
 ; Oops. That's wrong. Clang sometimes uses function types gratuitously.
@@ -1569,6 +2253,7 @@ define void @test53(void ()** %zz, i8** %pp) {
 ; CHECK: call i8* @returner()
 ; CHECK-NEXT: call void @objc_release(i8* %t) [[NUW]], !clang.imprecise_release !0
 ; CHECK-NEXT: ret void
+; CHECK: }
 define void @test54() {
   %t = call i8* @returner()
   call i8* @objc_autorelease(i8* %t)
@@ -1697,19 +2382,78 @@ entry:
 @constptr = external constant i8*
 @something = external global i8*
 
-; CHECK: define void @test60(
-; CHECK-NOT: @objc_
+; We have a precise lifetime retain/release here. We can not remove them since
+; @something is not constant.
+
+; CHECK: define void @test60a(
+; CHECK: call i8* @objc_retain
+; CHECK: call void @objc_release
+; CHECK: }
+define void @test60a() {
+  %t = load i8** @constptr
+  %s = load i8** @something
+  call i8* @objc_retain(i8* %s)
+  call void @callee()
+  call void @use_pointer(i8* %t)
+  call void @objc_release(i8* %s)
+  ret void
+}
+
+; CHECK: define void @test60b(
+; CHECK: call i8* @objc_retain
+; CHECK-NOT: call i8* @objc_retain
+; CHECK-NOT: call i8* @objc_rrelease
 ; CHECK: }
-define void @test60() {
+define void @test60b() {
   %t = load i8** @constptr
   %s = load i8** @something
   call i8* @objc_retain(i8* %s)
+  call i8* @objc_retain(i8* %s)
   call void @callee()
   call void @use_pointer(i8* %t)
   call void @objc_release(i8* %s)
   ret void
 }
 
+; CHECK: define void @test60c(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test60c() {
+  %t = load i8** @constptr
+  %s = load i8** @something
+  call i8* @objc_retain(i8* %s)
+  call void @callee()
+  call void @use_pointer(i8* %t)
+  call void @objc_release(i8* %s), !clang.imprecise_release !0
+  ret void
+}
+
+; CHECK: define void @test60d(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test60d() {
+  %t = load i8** @constptr
+  %s = load i8** @something
+  call i8* @objc_retain(i8* %t)
+  call void @callee()
+  call void @use_pointer(i8* %s)
+  call void @objc_release(i8* %t)
+  ret void
+}
+
+; CHECK: define void @test60e(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test60e() {
+  %t = load i8** @constptr
+  %s = load i8** @something
+  call i8* @objc_retain(i8* %t)
+  call void @callee()
+  call void @use_pointer(i8* %s)
+  call void @objc_release(i8* %t), !clang.imprecise_release !0
+  ret void
+}
+
 ; Constant pointers to objects don't need to be considered related to other
 ; pointers.
 
@@ -1876,11 +2620,13 @@ return:                                           ; preds = %if.then, %entry
 ; An objc_retain can serve as a may-use for a different pointer.
 ; rdar://11931823
 
-; CHECK: define void @test66(
-; CHECK:   %tmp7 = tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK: define void @test66a(
+; CHECK:   tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %call) [[NUW]]
+; CHECK:   tail call i8* @objc_retain(i8* %tmp8) [[NUW]]
 ; CHECK:   tail call void @objc_release(i8* %cond) [[NUW]]
 ; CHECK: }
-define void @test66(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
+define void @test66a(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
 entry:
   br i1 %tobool, label %cond.true, label %cond.end
 
@@ -1897,7 +2643,74 @@ cond.end:                                         ; preds = %cond.true, %entry
   ret void
 }
 
-declare void @bar(i32 ()*)
+; CHECK: define void @test66b(
+; CHECK:   tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %call) [[NUW]]
+; CHECK:   tail call i8* @objc_retain(i8* %tmp8) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %cond) [[NUW]]
+; CHECK: }
+define void @test66b(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
+entry:
+  br i1 %tobool, label %cond.true, label %cond.end
+
+cond.true:
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %entry
+  %cond = phi i8* [ %tmp5, %cond.true ], [ %call, %entry ]
+  %tmp7 = tail call i8* @objc_retain(i8* %cond) nounwind
+  tail call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+  %tmp8 = select i1 %tobool1, i8* %cond, i8* %bar
+  %tmp9 = tail call i8* @objc_retain(i8* %tmp8) nounwind
+  tail call void @objc_release(i8* %cond) nounwind
+  ret void
+}
+
+; CHECK: define void @test66c(
+; CHECK:   tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %call) [[NUW]]
+; CHECK:   tail call i8* @objc_retain(i8* %tmp8) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %cond) [[NUW]]
+; CHECK: }
+define void @test66c(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
+entry:
+  br i1 %tobool, label %cond.true, label %cond.end
+
+cond.true:
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %entry
+  %cond = phi i8* [ %tmp5, %cond.true ], [ %call, %entry ]
+  %tmp7 = tail call i8* @objc_retain(i8* %cond) nounwind
+  tail call void @objc_release(i8* %call) nounwind
+  %tmp8 = select i1 %tobool1, i8* %cond, i8* %bar
+  %tmp9 = tail call i8* @objc_retain(i8* %tmp8) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %cond) nounwind
+  ret void
+}
+
+; CHECK: define void @test66d(
+; CHECK:   tail call i8* @objc_retain(i8* %cond) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %call) [[NUW]]
+; CHECK:   tail call i8* @objc_retain(i8* %tmp8) [[NUW]]
+; CHECK:   tail call void @objc_release(i8* %cond) [[NUW]]
+; CHECK: }
+define void @test66d(i8* %tmp5, i8* %bar, i1 %tobool, i1 %tobool1, i8* %call) {
+entry:
+  br i1 %tobool, label %cond.true, label %cond.end
+
+cond.true:
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.true, %entry
+  %cond = phi i8* [ %tmp5, %cond.true ], [ %call, %entry ]
+  %tmp7 = tail call i8* @objc_retain(i8* %cond) nounwind
+  tail call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+  %tmp8 = select i1 %tobool1, i8* %cond, i8* %bar
+  %tmp9 = tail call i8* @objc_retain(i8* %tmp8) nounwind
+  tail call void @objc_release(i8* %cond) nounwind, !clang.imprecise_release !0
+  ret void
+}
 
 ; A few real-world testcases.
 
@@ -1907,7 +2720,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 declare i32 @puts(i8* nocapture) nounwind
 @str = internal constant [16 x i8] c"-[ Top0 _getX ]\00"
 
-; CHECK: @"\01-[A z]"
+; CHECK: define { <2 x float>, <2 x float> } @"\01-[A z]"({}* %self, i8* nocapture %_cmd) [[NUW]] {
 ; CHECK-NOT: @objc_
 ; CHECK: }
 
@@ -1953,7 +2766,7 @@ invoke.cont:
   ret {<2 x float>, <2 x float>} %tmp35
 }
 
-; CHECK: @"\01-[Top0 _getX]"
+; CHECK: @"\01-[Top0 _getX]"({}* %self, i8* nocapture %_cmd) [[NUW]] {
 ; CHECK-NOT: @objc_
 ; CHECK: }
 
@@ -1972,12 +2785,13 @@ invoke.cont:
 
 ; A simple loop. Eliminate the retain and release inside of it!
 
-; CHECK: define void @loop
+; CHECK: define void @loop(i8* %x, i64 %n) {
 ; CHECK: for.body:
 ; CHECK-NOT: @objc_
 ; CHECK: @objc_msgSend
 ; CHECK-NOT: @objc_
 ; CHECK: for.end:
+; CHECK: }
 define void @loop(i8* %x, i64 %n) {
 entry:
   %0 = tail call i8* @objc_retain(i8* %x) nounwind
@@ -2001,7 +2815,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 ; ObjCARCOpt can delete the retain,release on self.
 
-; CHECK: define void @TextEditTest
+; CHECK: define void @TextEditTest(%2* %self, %3* %pboard) {
 ; CHECK-NOT: call i8* @objc_retain(i8* %tmp7)
 ; CHECK: }
 
diff --git a/test/Transforms/ObjCARC/cfg-hazards.ll b/test/Transforms/ObjCARC/cfg-hazards.ll
index 899298b..0156d5b 100644
--- a/test/Transforms/ObjCARC/cfg-hazards.ll
+++ b/test/Transforms/ObjCARC/cfg-hazards.ll
@@ -8,6 +8,7 @@ declare void @use_pointer(i8*)
 declare i8* @objc_retain(i8*)
 declare void @objc_release(i8*)
 declare void @callee()
+declare void @block_callee(void ()*)
 
 ; CHECK: define void @test0(
 ; CHECK:   call i8* @objc_retain(
@@ -394,6 +395,41 @@ exit:
   ret void
 }
 
+; Do not improperly pair retains in a for loop with releases outside of a for
+; loop when the proper pairing is disguised by a separate provenance represented
+; by an alloca.
+; rdar://12969722
+
+; CHECK: define void @test13(i8* %a) [[NUW]] {
+; CHECK: entry:
+; CHECK:   tail call i8* @objc_retain(i8* %a) [[NUW]]
+; CHECK: loop:
+; CHECK:   tail call i8* @objc_retain(i8* %a) [[NUW]]
+; CHECK:   call void @block_callee
+; CHECK:   call void @objc_release(i8* %reloaded_a) [[NUW]]
+; CHECK: exit:
+; CHECK:   call void @objc_release(i8* %a) [[NUW]]
+; CHECK: }
+define void @test13(i8* %a) nounwind {
+entry:
+  %block = alloca i8*
+  %a1 = tail call i8* @objc_retain(i8* %a) nounwind
+  br label %loop
+
+loop:
+  %a2 = tail call i8* @objc_retain(i8* %a) nounwind
+  store i8* %a, i8** %block, align 8
+  %casted_block = bitcast i8** %block to void ()*
+  call void @block_callee(void ()* %casted_block)
+  %reloaded_a = load i8** %block, align 8
+  call void @objc_release(i8* %reloaded_a) nounwind, !clang.imprecise_release !0
+  br i1 undef, label %loop, label %exit
+  
+exit:
+  call void @objc_release(i8* %a) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
 ; CHECK: attributes [[NUW]] = { nounwind }
 
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/contract-marker.ll b/test/Transforms/ObjCARC/contract-marker.ll
index 01fd1e7..55a1b28 100644
--- a/test/Transforms/ObjCARC/contract-marker.ll
+++ b/test/Transforms/ObjCARC/contract-marker.ll
@@ -1,9 +1,11 @@
 ; RUN: opt -S -objc-arc-contract < %s | FileCheck %s
 
+; CHECK: define void @foo() {
 ; CHECK:      %call = tail call i32* @qux()
 ; CHECK-NEXT: %tcall = bitcast i32* %call to i8*
 ; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
 ; CHECK-NEXT: %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tcall) [[NUW:#[0-9]+]]
+; CHECK: }
 
 define void @foo() {
 entry:
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index 6999237..023604e 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -12,6 +12,7 @@ declare void @use_pointer(i8*)
 ; CHECK: entry:
 ; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) [[NUW:#[0-9]+]]
 ; CHECK-NEXT: ret void
+; CHECK-NEXT: }
 define void @test0(i8* %p) {
 entry:
   %0 = tail call i8* @objc_retain(i8* %p) nounwind
@@ -107,6 +108,7 @@ entry:
 ; CHECK: define i1 @test5(i8* %newValue, i8* %foo) {
 ; CHECK: %t = icmp eq i8* %x1, %foo
 ; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) [[NUW]]
+; CHECK: }
 define i1 @test5(i8* %newValue, i8* %foo) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
@@ -122,6 +124,7 @@ entry:
 ; CHECK: define i1 @test6(i8* %newValue, i8* %foo) {
 ; CHECK: %t = icmp eq i8* %x1, %foo
 ; CHECK: tail call void @objc_storeStrong(i8** @x, i8* %newValue) [[NUW]]
+; CHECK: }
 define i1 @test6(i8* %newValue, i8* %foo) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
diff --git a/test/Transforms/ObjCARC/contract-testcases.ll b/test/Transforms/ObjCARC/contract-testcases.ll
index 85b03be..fc023f8 100644
--- a/test/Transforms/ObjCARC/contract-testcases.ll
+++ b/test/Transforms/ObjCARC/contract-testcases.ll
@@ -50,6 +50,7 @@ bb6:                                              ; preds = %bb5, %bb4, %bb4, %b
 ; CHECK: br i1 undef, label %bb7, label %bb7
 ; CHECK: bb7:
 ; CHECK: %tmp8 = phi %0* [ %0, %bb ], [ %0, %bb ]
+; CHECK: }
 define void @test1() {
 bb:
   %tmp = tail call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* ()*)()
@@ -70,6 +71,7 @@ bb7:                                              ; preds = %bb6, %bb6, %bb5
 ; CHECK: invoke.cont:                                      ; preds = %entry
 ; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
 ; CHECK-NEXT: %tmp = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) [[NUW:#[0-9]+]]
+; CHECK: }
 define void @_Z6doTestP8NSString() {
 entry:
   %call = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* ()*)()
diff --git a/test/Transforms/ObjCARC/contract.ll b/test/Transforms/ObjCARC/contract.ll
index 0b60683..3544f88 100644
--- a/test/Transforms/ObjCARC/contract.ll
+++ b/test/Transforms/ObjCARC/contract.ll
@@ -10,6 +10,7 @@ declare i8* @objc_retainAutoreleasedReturnValue(i8*)
 
 declare void @use_pointer(i8*)
 declare i8* @returner()
+declare void @callee()
 
 ; CHECK: define void @test0
 ; CHECK: call void @use_pointer(i8* %0)
@@ -137,6 +138,7 @@ define i8* @test6() {
 ; CHECK: call void @use_pointer(i8* %1)
 ; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %1)
 ; CHECK: ret i8* %2
+; CHECK-NEXT: }
 define i8* @test7(i8* %p) {
   %1 = tail call i8* @objc_retain(i8* %p)
   call void @use_pointer(i8* %p)
@@ -171,6 +173,60 @@ define void @test9(i8* %a, i8* %b) {
   ret void
 }
 
+
+; Turn objc_retain into objc_retainAutoreleasedReturnValue if its operand
+; is a return value.
+
+; CHECK: define void @test10()
+; CHECK: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
+define void @test10() {
+  %p = call i8* @returner()
+  tail call i8* @objc_retain(i8* %p) nounwind
+  ret void
+}
+
+; Convert objc_retain to objc_retainAutoreleasedReturnValue if its
+; argument is a return value.
+
+; CHECK: define void @test11(
+; CHECK-NEXT: %y = call i8* @returner()
+; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) [[NUW]]
+; CHECK-NEXT: ret void
+define void @test11() {
+  %y = call i8* @returner()
+  tail call i8* @objc_retain(i8* %y) nounwind
+  ret void
+}
+
+; Don't convert objc_retain to objc_retainAutoreleasedReturnValue if its
+; argument is not a return value.
+
+; CHECK: define void @test12(
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test12(i8* %y) {
+  tail call i8* @objc_retain(i8* %y) nounwind
+  ret void
+}
+
+; Don't Convert objc_retain to objc_retainAutoreleasedReturnValue if it
+; isn't next to the call providing its return value.
+
+; CHECK: define void @test13(
+; CHECK-NEXT: %y = call i8* @returner()
+; CHECK-NEXT: call void @callee()
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test13() {
+  %y = call i8* @returner()
+  call void @callee()
+  tail call i8* @objc_retain(i8* %y) nounwind
+  ret void
+}
+
+
 declare void @clang.arc.use(...) nounwind
 
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/expand.ll b/test/Transforms/ObjCARC/expand.ll
index 5388673..fe47ee5 100644
--- a/test/Transforms/ObjCARC/expand.ll
+++ b/test/Transforms/ObjCARC/expand.ll
@@ -4,25 +4,91 @@ target datalayout = "e-p:64:64:64"
 
 declare i8* @objc_retain(i8*)
 declare i8* @objc_autorelease(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
+declare i8* @objc_autoreleaseReturnValue(i8*)
+declare i8* @objc_retainAutorelease(i8*)
+declare i8* @objc_retainAutoreleaseReturnValue(i8*)
+declare i8* @objc_retainBlock(i8*)
 
 declare void @use_pointer(i8*)
 
-; CHECK: define void @test0
+; CHECK: define void @test_retain(i8* %x) [[NUW:#[0-9]+]] {
+; CHECK: call i8* @objc_retain(i8* %x)
 ; CHECK: call void @use_pointer(i8* %x)
 ; CHECK: }
-define void @test0(i8* %x) nounwind {
+define void @test_retain(i8* %x) nounwind {
 entry:
   %0 = call i8* @objc_retain(i8* %x) nounwind
   call void @use_pointer(i8* %0)
   ret void
 }
 
-; CHECK: define void @test1
+; CHECK: define void @test_retainAutoreleasedReturnValue(i8* %x) [[NUW]] {
+; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %x)
 ; CHECK: call void @use_pointer(i8* %x)
 ; CHECK: }
-define void @test1(i8* %x) nounwind {
+define void @test_retainAutoreleasedReturnValue(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %x) nounwind
+  call void @use_pointer(i8* %0)
+  ret void
+}
+
+; CHECK: define void @test_retainAutorelease(i8* %x) [[NUW]] {
+; CHECK: call i8* @objc_retainAutorelease(i8* %x)
+; CHECK: call void @use_pointer(i8* %x)
+; CHECK: }
+define void @test_retainAutorelease(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_retainAutorelease(i8* %x) nounwind
+  call void @use_pointer(i8* %0)
+  ret void
+}
+
+; CHECK: define void @test_retainAutoreleaseReturnValue(i8* %x) [[NUW]] {
+; CHECK: call i8* @objc_retainAutoreleaseReturnValue(i8* %x)
+; CHECK: call void @use_pointer(i8* %x)
+; CHECK: }
+define void @test_retainAutoreleaseReturnValue(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_retainAutoreleaseReturnValue(i8* %x) nounwind
+  call void @use_pointer(i8* %0)
+  ret void
+}
+
+; CHECK: define void @test_autorelease(i8* %x) [[NUW]] {
+; CHECK: call i8* @objc_autorelease(i8* %x)
+; CHECK: call void @use_pointer(i8* %x)
+; CHECK: }
+define void @test_autorelease(i8* %x) nounwind {
 entry:
   %0 = call i8* @objc_autorelease(i8* %x) nounwind
-  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %0)
+  ret void
+}
+
+; CHECK: define void @test_autoreleaseReturnValue(i8* %x) [[NUW]] {
+; CHECK: call i8* @objc_autoreleaseReturnValue(i8* %x)
+; CHECK: call void @use_pointer(i8* %x)
+; CHECK: }
+define void @test_autoreleaseReturnValue(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_autoreleaseReturnValue(i8* %x) nounwind
+  call void @use_pointer(i8* %0)
+  ret void
+}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; RetainBlock is not strictly forwarding. Do not touch it. ;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; CHECK: define void @test_retainBlock(i8* %x) [[NUW]] {
+; CHECK: call i8* @objc_retainBlock(i8* %x)
+; CHECK: call void @use_pointer(i8* %0)
+; CHECK: }
+define void @test_retainBlock(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_retainBlock(i8* %x) nounwind
+  call void @use_pointer(i8* %0)
   ret void
 }
diff --git a/test/Transforms/ObjCARC/gvn.ll b/test/Transforms/ObjCARC/gvn.ll
index 3648866..a828b54 100644
--- a/test/Transforms/ObjCARC/gvn.ll
+++ b/test/Transforms/ObjCARC/gvn.ll
@@ -7,11 +7,12 @@ declare i8* @objc_retain(i8*)
 ; GVN should be able to eliminate this redundant load, with ARC-specific
 ; alias analysis.
 
-; CHECK: @foo
+; CHECK: define i8* @foo(i32 %n)
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: %s = load i8** @x
 ; CHECK-NOT: load
 ; CHECK: ret i8* %s
+; CHECK-NEXT: }
 define i8* @foo(i32 %n) nounwind {
 entry:
   %s = load i8** @x
diff --git a/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll b/test/Transforms/ObjCARC/intrinsic-use-isolated.ll
index 4215b5c..4215b5c 100644
--- a/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll
+++ b/test/Transforms/ObjCARC/intrinsic-use-isolated.ll
diff --git a/test/Transforms/ObjCARC/intrinsic-use.ll b/test/Transforms/ObjCARC/intrinsic-use.ll
index 9c7b81a..60370c1 100644
--- a/test/Transforms/ObjCARC/intrinsic-use.ll
+++ b/test/Transforms/ObjCARC/intrinsic-use.ll
@@ -34,8 +34,11 @@ declare void @test0_helper(i8*, i8**)
 ; CHECK-NEXT:   @objc_release(i8* [[VAL1]])
 ; CHECK-NEXT:   @objc_autorelease(i8* %x)
 ; CHECK-NEXT:   store i8* %x, i8** %out
+; CHECK-NEXT:   @objc_retain(i8* %x)
 ; CHECK-NEXT:   @objc_release(i8* [[VAL2]])
+; CHECK-NEXT:   @objc_release(i8* %x)
 ; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
 define void @test0(i8** %out, i8* %x, i8* %y) {
 entry:
   %temp0 = alloca i8*, align 8
@@ -61,3 +64,53 @@ entry:
   call void @objc_release(i8* %x) nounwind
   ret void
 }
+
+; CHECK:      define void @test0a(
+; CHECK:        @objc_retain(i8* %x)
+; CHECK-NEXT:   store i8* %y, i8** %temp0
+; CHECK-NEXT:   @objc_retain(i8* %y)
+; CHECK-NEXT:   call void @test0_helper
+; CHECK-NEXT:   [[VAL1:%.*]] = load i8** %temp0
+; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* %y)
+; CHECK-NEXT:   @objc_retain(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_release(i8* %y)
+; CHECK-NEXT:   store i8* [[VAL1]], i8** %temp1
+; CHECK-NEXT:   call void @test0_helper
+; CHECK-NEXT:   [[VAL2:%.*]] = load i8** %temp1
+; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_retain(i8* [[VAL2]])
+; CHECK-NEXT:   @objc_release(i8* [[VAL1]])
+; CHECK-NEXT:   @objc_autorelease(i8* %x)
+; CHECK-NEXT:   @objc_release(i8* [[VAL2]])
+; CHECK-NEXT:   store i8* %x, i8** %out
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test0a(i8** %out, i8* %x, i8* %y) {
+entry:
+  %temp0 = alloca i8*, align 8
+  %temp1 = alloca i8*, align 8
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  %1 = call i8* @objc_retain(i8* %y) nounwind
+  store i8* %y, i8** %temp0
+  call void @test0_helper(i8* %x, i8** %temp0)
+  %val1 = load i8** %temp0
+  %2 = call i8* @objc_retain(i8* %val1) nounwind
+  call void (...)* @clang.arc.use(i8* %y) nounwind
+  call void @objc_release(i8* %y) nounwind, !clang.imprecise_release !0
+  store i8* %val1, i8** %temp1
+  call void @test0_helper(i8* %x, i8** %temp1)
+  %val2 = load i8** %temp1
+  %3 = call i8* @objc_retain(i8* %val2) nounwind
+  call void (...)* @clang.arc.use(i8* %val1) nounwind
+  call void @objc_release(i8* %val1) nounwind, !clang.imprecise_release !0
+  %4 = call i8* @objc_retain(i8* %x) nounwind
+  %5 = call i8* @objc_autorelease(i8* %x) nounwind
+  store i8* %x, i8** %out
+  call void @objc_release(i8* %val2) nounwind, !clang.imprecise_release !0
+  call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+
+!0 = metadata !{}
+
diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll
index f528b4a..9510f2e 100644
--- a/test/Transforms/ObjCARC/invoke.ll
+++ b/test/Transforms/ObjCARC/invoke.ll
@@ -17,6 +17,7 @@ declare i8* @returner()
 ; CHECK: lpad:
 ; CHECK:   call void @objc_release(i8* %zipFile) [[NUW]], !clang.imprecise_release !0
 ; CHECK:   ret void
+; CHECK-NEXT: }
 define void @test0(i8* %zipFile) {
 entry:
   call i8* @objc_retain(i8* %zipFile) nounwind
@@ -48,6 +49,7 @@ lpad:                                             ; preds = %entry
 ; CHECK:   br label %done
 ; CHECK: done:
 ; CHECK-NEXT: ret void
+; CHECK-NEXT: }
 define void @test1(i8* %zipFile) {
 entry:
   call i8* @objc_retain(i8* %zipFile) nounwind
@@ -110,6 +112,7 @@ finally.rethrow:                                  ; preds = %invoke.cont, %entry
 ; CHECK: if.end:
 ; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
+; CHECK-NEXT: }
 define void @test3(i8* %p, i1 %b) {
 entry:
   %0 = call i8* @objc_retain(i8* %p)
@@ -145,6 +148,7 @@ if.end:
 ; CHECK: if.end:
 ; CHECK-NEXT: call void @objc_release(i8* %p) [[NUW]]
 ; CHECK-NEXT: ret void
+; CHECK-NEXT: }
 define void @test4(i8* %p, i1 %b) {
 entry:
   %0 = call i8* @objc_retain(i8* %p)
diff --git a/test/Transforms/ObjCARC/move-and-merge-autorelease.ll b/test/Transforms/ObjCARC/move-and-merge-autorelease.ll
index 8462c70..e5d2f07 100644
--- a/test/Transforms/ObjCARC/move-and-merge-autorelease.ll
+++ b/test/Transforms/ObjCARC/move-and-merge-autorelease.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -objc-arc < %s | FileCheck %s
+; RUN: opt -S -objc-arc -objc-arc-contract < %s | FileCheck %s
 
 ; The optimizer should be able to move the autorelease past two phi nodes
 ; and fold it with the release in bb65.
diff --git a/test/Transforms/ObjCARC/retain-block-escape-analysis.ll b/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
index 2c1ddce..8df05ad 100644
--- a/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
+++ b/test/Transforms/ObjCARC/retain-block-escape-analysis.ll
@@ -23,6 +23,23 @@ define void @bitcasttest(i8* %storage, void (...)* %block)  {
 ; CHECK: define void @bitcasttest
 entry:
   %t1 = bitcast void (...)* %block to i8*
+; CHECK: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %storage to void (...)**
+  %t5 = bitcast i8* %t3 to void (...)*
+  store void (...)* %t5, void (...)** %t4, align 8
+; CHECK: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+; CHECK: }
+}
+
+define void @bitcasttest_a(i8* %storage, void (...)* %block)  {
+; CHECK: define void @bitcasttest_a
+entry:
+  %t1 = bitcast void (...)* %block to i8*
 ; CHECK-NOT: tail call i8* @objc_retain
   %t2 = tail call i8* @objc_retain(i8* %t1)
 ; CHECK: tail call i8* @objc_retainBlock
@@ -31,14 +48,34 @@ entry:
   %t5 = bitcast i8* %t3 to void (...)*
   store void (...)* %t5, void (...)** %t4, align 8
 ; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1)
+  call void @objc_release(i8* %t1), !clang.imprecise_release !0
   ret void
+; CHECK: }
 }
 
 define void @geptest(void (...)** %storage_array, void (...)* %block)  {
 ; CHECK: define void @geptest
 entry:
   %t1 = bitcast void (...)* %block to i8*
+; CHECK: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %t3 to void (...)*
+  
+  %storage = getelementptr inbounds void (...)** %storage_array, i64 0
+  
+  store void (...)* %t4, void (...)** %storage, align 8
+; CHECK: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+; CHECK: }
+}
+
+define void @geptest_a(void (...)** %storage_array, void (...)* %block)  {
+; CHECK: define void @geptest_a
+entry:
+  %t1 = bitcast void (...)* %block to i8*
 ; CHECK-NOT: tail call i8* @objc_retain
   %t2 = tail call i8* @objc_retain(i8* %t1)
 ; CHECK: tail call i8* @objc_retainBlock
@@ -49,8 +86,9 @@ entry:
   
   store void (...)* %t4, void (...)** %storage, align 8
 ; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1)
+  call void @objc_release(i8* %t1), !clang.imprecise_release !0
   ret void
+; CHECK: }
 }
 
 define void @selecttest(void (...)** %store1, void (...)** %store2,
@@ -58,6 +96,24 @@ define void @selecttest(void (...)** %store1, void (...)** %store2,
 ; CHECK: define void @selecttest
 entry:
   %t1 = bitcast void (...)* %block to i8*
+; CHECK: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %t3 to void (...)*
+  %store = select i1 undef, void (...)** %store1, void (...)** %store2
+  store void (...)* %t4, void (...)** %store, align 8
+; CHECK: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+; CHECK: }
+}
+
+define void @selecttest_a(void (...)** %store1, void (...)** %store2,
+                          void (...)* %block) {
+; CHECK: define void @selecttest_a
+entry:
+  %t1 = bitcast void (...)* %block to i8*
 ; CHECK-NOT: tail call i8* @objc_retain
   %t2 = tail call i8* @objc_retain(i8* %t1)
 ; CHECK: tail call i8* @objc_retainBlock
@@ -66,8 +122,9 @@ entry:
   %store = select i1 undef, void (...)** %store1, void (...)** %store2
   store void (...)* %t4, void (...)** %store, align 8
 ; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1)
+  call void @objc_release(i8* %t1), !clang.imprecise_release !0
   ret void
+; CHECK: }
 }
 
 define void @phinodetest(void (...)** %storage1,
@@ -76,6 +133,36 @@ define void @phinodetest(void (...)** %storage1,
 ; CHECK: define void @phinodetest
 entry:
   %t1 = bitcast void (...)* %block to i8*
+; CHECK: tail call i8* @objc_retain
+  %t2 = tail call i8* @objc_retain(i8* %t1)
+; CHECK: tail call i8* @objc_retainBlock
+  %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0
+  %t4 = bitcast i8* %t3 to void (...)*
+  br i1 undef, label %store1_set, label %store2_set
+; CHECK: store1_set:
+
+store1_set:
+  br label %end
+
+store2_set:
+  br label %end
+
+end:
+; CHECK: end:
+  %storage = phi void (...)** [ %storage1, %store1_set ], [ %storage2, %store2_set]
+  store void (...)* %t4, void (...)** %storage, align 8
+; CHECK: call void @objc_release
+  call void @objc_release(i8* %t1)
+  ret void
+; CHECK: }
+}
+
+define void @phinodetest_a(void (...)** %storage1,
+                           void (...)** %storage2,
+                           void (...)* %block) {
+; CHECK: define void @phinodetest_a
+entry:
+  %t1 = bitcast void (...)* %block to i8*
 ; CHECK-NOT: tail call i8* @objc_retain
   %t2 = tail call i8* @objc_retain(i8* %t1)
 ; CHECK: tail call i8* @objc_retainBlock
@@ -93,10 +180,11 @@ end:
   %storage = phi void (...)** [ %storage1, %store1_set ], [ %storage2, %store2_set]
   store void (...)* %t4, void (...)** %storage, align 8
 ; CHECK-NOT: call void @objc_release
-  call void @objc_release(i8* %t1)
+  call void @objc_release(i8* %t1), !clang.imprecise_release !0
   ret void
 }
 
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; This test makes sure that we do not hang clang when visiting a use ;
 ; cycle caused by phi nodes during objc-arc analysis. *NOTE* This    ;
diff --git a/test/Transforms/ObjCARC/rv.ll b/test/Transforms/ObjCARC/rv.ll
index 589c60f..e857c9f 100644
--- a/test/Transforms/ObjCARC/rv.ll
+++ b/test/Transforms/ObjCARC/rv.ll
@@ -136,17 +136,6 @@ define i8* @test7b() {
   ret i8* %p
 }
 
-; Turn objc_retain into objc_retainAutoreleasedReturnValue if its operand
-; is a return value.
-
-; CHECK: define void @test8()
-; CHECK: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
-define void @test8() {
-  %p = call i8* @returner()
-  call i8* @objc_retain(i8* %p)
-  ret void
-}
-
 ; Don't apply the RV optimization to autorelease if there's no retain.
 
 ; CHECK: define i8* @test9(i8* %p)
@@ -235,45 +224,6 @@ define void @test15() {
   ret void
 }
 
-; Convert objc_retain to objc_retainAutoreleasedReturnValue if its
-; argument is a return value.
-
-; CHECK: define void @test16(
-; CHECK-NEXT: %y = call i8* @returner()
-; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) [[NUW]]
-; CHECK-NEXT: ret void
-define void @test16() {
-  %y = call i8* @returner()
-  call i8* @objc_retain(i8* %y)
-  ret void
-}
-
-; Don't convert objc_retain to objc_retainAutoreleasedReturnValue if its
-; argument is not a return value.
-
-; CHECK: define void @test17(
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
-; CHECK-NEXT: ret void
-define void @test17(i8* %y) {
-  call i8* @objc_retain(i8* %y)
-  ret void
-}
-
-; Don't Convert objc_retain to objc_retainAutoreleasedReturnValue if it
-; isn't next to the call providing its return value.
-
-; CHECK: define void @test18(
-; CHECK-NEXT: %y = call i8* @returner()
-; CHECK-NEXT: call void @callee()
-; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) [[NUW]]
-; CHECK-NEXT: ret void
-define void @test18() {
-  %y = call i8* @returner()
-  call void @callee()
-  call i8* @objc_retain(i8* %y)
-  ret void
-}
-
 ; Delete autoreleaseRV+retainRV pairs.
 
 ; CHECK: define i8* @test19(i8* %p) {
diff --git a/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
index 26cd677..1ec61c8 100644
--- a/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
+++ b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
@@ -1,74 +1,89 @@
 ; RUN: opt -objc-arc -S < %s | FileCheck %s
 
-declare i8* @objc_release(i8* %x)
+declare void @objc_release(i8* %x)
 declare i8* @objc_retain(i8* %x)
 declare i8* @objc_autorelease(i8* %x)
 declare i8* @objc_autoreleaseReturnValue(i8* %x)
 declare i8* @objc_retainAutoreleasedReturnValue(i8* %x)
+declare i8* @tmp(i8*)
 
 ; Never tail call objc_autorelease.
-define i8* @test0(i8* %x) {
+
+; CHECK: define i8* @test0(i8* %x) [[NUW:#[0-9]+]] {
+; CHECK: %tmp0 = call i8* @objc_autorelease(i8* %x) [[NUW]]
+; CHECK: %tmp1 = call i8* @objc_autorelease(i8* %x) [[NUW]]
+; CHECK: }
+define i8* @test0(i8* %x) nounwind {
 entry:
-  ; CHECK: %tmp0 = call i8* @objc_autorelease(i8* %x)
   %tmp0 = call i8* @objc_autorelease(i8* %x)
-  ; CHECK: %tmp1 = call i8* @objc_autorelease(i8* %x)
   %tmp1 = tail call i8* @objc_autorelease(i8* %x)
 
   ret i8* %x
 }
 
 ; Always tail call autoreleaseReturnValue.
-define i8* @test1(i8* %x) {
+
+; CHECK: define i8* @test1(i8* %x) [[NUW]] {
+; CHECK: %tmp0 = tail call i8* @objc_autoreleaseReturnValue(i8* %x) [[NUW]]
+; CHECK: %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x) [[NUW]]
+; CHECK: }
+define i8* @test1(i8* %x) nounwind {
 entry:
-  ; CHECK: %tmp0 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
   %tmp0 = call i8* @objc_autoreleaseReturnValue(i8* %x)
-  ; CHECK: %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
   %tmp1 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
   ret i8* %x
 }
 
 ; Always tail call objc_retain.
-define i8* @test2(i8* %x) {
+
+; CHECK: define i8* @test2(i8* %x) [[NUW]] {
+; CHECK: %tmp0 = tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %x) [[NUW]]
+; CHECK: }
+define i8* @test2(i8* %x) nounwind {
 entry:
-  ; CHECK: %tmp0 = tail call i8* @objc_retain(i8* %x)
   %tmp0 = call i8* @objc_retain(i8* %x)
-  ; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %x)
   %tmp1 = tail call i8* @objc_retain(i8* %x)
   ret i8* %x
 }
 
-define i8* @tmp(i8* %x) {
-  ret i8* %x
-}
-
 ; Always tail call objc_retainAutoreleasedReturnValue.
-define i8* @test3(i8* %x) {
+; CHECK: define i8* @test3(i8* %x) [[NUW]] {
+; CHECK: %tmp0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) [[NUW]]
+; CHECK: %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %z) [[NUW]]
+; CHECK: }
+define i8* @test3(i8* %x) nounwind {
 entry:
   %y = call i8* @tmp(i8* %x)
-  ; CHECK: %tmp0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y)
   %tmp0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %y)
   %z = call i8* @tmp(i8* %x)
-  ; CHECK: %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
   %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
   ret i8* %x
 }
 
 ; By itself, we should never change whether or not objc_release is tail called.
-define i8* @test4(i8* %x) {
+
+; CHECK: define void @test4(i8* %x) [[NUW]] {
+; CHECK: call void @objc_release(i8* %x) [[NUW]]
+; CHECK: tail call void @objc_release(i8* %x) [[NUW]]
+; CHECK: }
+define void @test4(i8* %x) nounwind {
 entry:
-  ; CHECK: %tmp0 = call i8* @objc_release(i8* %x)
-  %tmp0 = call i8* @objc_release(i8* %x)
-  ; CHECK: %tmp1 = tail call i8* @objc_release(i8* %x)
-  %tmp1 = tail call i8* @objc_release(i8* %x)
-  ret i8* %x
+  call void @objc_release(i8* %x)
+  tail call void @objc_release(i8* %x)
+  ret void
 }
 
 ; If we convert a tail called @objc_autoreleaseReturnValue to an
 ; @objc_autorelease, ensure that the tail call is removed.
-define i8* @test5(i8* %x) {
+; CHECK: define i8* @test5(i8* %x) [[NUW]] {
+; CHECK: %tmp0 = call i8* @objc_autorelease(i8* %x) [[NUW]]
+; CHECK: }
+define i8* @test5(i8* %x) nounwind {
 entry:
-  ; CHECK: %tmp0 = call i8* @objc_autorelease(i8* %x)
   %tmp0 = tail call i8* @objc_autoreleaseReturnValue(i8* %x)
   ret i8* %tmp0
 }
 
+; CHECK: attributes [[NUW]] = { nounwind }
+
diff --git a/test/Transforms/Reassociate/pr12245.ll b/test/Transforms/Reassociate/pr12245.ll
index 84098bd..e9b5355 100644
--- a/test/Transforms/Reassociate/pr12245.ll
+++ b/test/Transforms/Reassociate/pr12245.ll
@@ -6,36 +6,36 @@
 
 define i32 @fn2() nounwind uwtable ssp {
 entry:
-  %0 = load i32* @a, align 4, !tbaa !0
+  %0 = load i32* @a, align 4
   %dec = add nsw i32 %0, -1
-  store i32 %dec, i32* @a, align 4, !tbaa !0
-  %1 = load i32* @d, align 4, !tbaa !0
+  store i32 %dec, i32* @a, align 4
+  %1 = load i32* @d, align 4
   %sub = sub nsw i32 %dec, %1
-  store i32 %sub, i32* @d, align 4, !tbaa !0
-  %2 = load i32* @a, align 4, !tbaa !0
+  store i32 %sub, i32* @d, align 4
+  %2 = load i32* @a, align 4
   %dec1 = add nsw i32 %2, -1
-  store i32 %dec1, i32* @a, align 4, !tbaa !0
-  %3 = load i32* @d, align 4, !tbaa !0
+  store i32 %dec1, i32* @a, align 4
+  %3 = load i32* @d, align 4
   %sub2 = sub nsw i32 %dec1, %3
-  store i32 %sub2, i32* @d, align 4, !tbaa !0
-  %4 = load i32* @a, align 4, !tbaa !0
+  store i32 %sub2, i32* @d, align 4
+  %4 = load i32* @a, align 4
   %dec3 = add nsw i32 %4, -1
-  store i32 %dec3, i32* @a, align 4, !tbaa !0
-  %5 = load i32* @d, align 4, !tbaa !0
+  store i32 %dec3, i32* @a, align 4
+  %5 = load i32* @d, align 4
   %sub4 = sub nsw i32 %dec3, %5
-  store i32 %sub4, i32* @d, align 4, !tbaa !0
-  %6 = load i32* @a, align 4, !tbaa !0
+  store i32 %sub4, i32* @d, align 4
+  %6 = load i32* @a, align 4
   %dec5 = add nsw i32 %6, -1
-  store i32 %dec5, i32* @a, align 4, !tbaa !0
-  %7 = load i32* @d, align 4, !tbaa !0
+  store i32 %dec5, i32* @a, align 4
+  %7 = load i32* @d, align 4
   %sub6 = sub nsw i32 %dec5, %7
-  store i32 %sub6, i32* @d, align 4, !tbaa !0
-  %8 = load i32* @a, align 4, !tbaa !0
+  store i32 %sub6, i32* @d, align 4
+  %8 = load i32* @a, align 4
   %dec7 = add nsw i32 %8, -1
-  store i32 %dec7, i32* @a, align 4, !tbaa !0
-  %9 = load i32* @d, align 4, !tbaa !0
+  store i32 %dec7, i32* @a, align 4
+  %9 = load i32* @d, align 4
   %sub8 = sub nsw i32 %dec7, %9
-  store i32 %sub8, i32* @d, align 4, !tbaa !0
+  store i32 %sub8, i32* @d, align 4
   ret i32 0
 }
 
@@ -44,7 +44,3 @@ entry:
   %call = call i32 @fn2()
   ret i32 %call
 }
-
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll
index d371a9b..b9353c7 100644
--- a/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@@ -164,3 +164,30 @@ define void @xor_bug1() {
   %3 = and i64 undef, %2
   ret void
 }
+
+; The bug was that when the compiler optimize "(x | c1)" ^ "(x & c2)", it may
+; swap the two xor-subexpressions if they are not in canoninical order; however,
+; when optimizer swaps two sub-expressions, if forgot to swap the cached value
+; of c1 and c2 accordingly, hence cause the problem.
+;
+define i32 @xor_bug2(i32, i32, i32, i32) {
+  %5 = mul i32 %0, 123
+  %6 = add i32 %2, 24
+  %7 = add i32 %1, 8
+  %8 = and i32 %1, 3456789
+  %9 = or i32 %8,  4567890
+  %10 = and i32 %1, 543210987
+  %11 = or i32 %1, 891034567
+  %12 = and i32 %2, 255
+  %13 = xor i32 %9, %10
+  %14 = xor i32 %11, %13
+  %15 = xor i32 %5, %14
+  %16 = and i32 %3, 255
+  %17 = xor i32 %16, 42
+  %18 = add i32 %6, %7
+  %19 = add i32 %18, %12
+  %20 = add i32 %19, %15
+  ret i32 %20
+;CHECK: @xor_bug2
+;CHECK: xor i32 %5, 891034567
+}
diff --git a/test/Transforms/SLPVectorizer/X86/barriercall.ll b/test/Transforms/SLPVectorizer/X86/barriercall.ll
new file mode 100644
index 0000000..04eb8f9
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/barriercall.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @foo
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @foo(i32* nocapture %A, i32 %n) {
+entry:
+  %call = tail call i32 (...)* @bar() #2
+  %mul = mul nsw i32 %n, 5
+  %add = add nsw i32 %mul, 9
+  store i32 %add, i32* %A, align 4
+  %mul1 = mul nsw i32 %n, 9
+  %add2 = add nsw i32 %mul1, 9
+  %arrayidx3 = getelementptr inbounds i32* %A, i64 1
+  store i32 %add2, i32* %arrayidx3, align 4
+  %mul4 = shl i32 %n, 3
+  %add5 = add nsw i32 %mul4, 9
+  %arrayidx6 = getelementptr inbounds i32* %A, i64 2
+  store i32 %add5, i32* %arrayidx6, align 4
+  %mul7 = mul nsw i32 %n, 10
+  %add8 = add nsw i32 %mul7, 9
+  %arrayidx9 = getelementptr inbounds i32* %A, i64 3
+  store i32 %add8, i32* %arrayidx9, align 4
+  ret i32 undef
+}
+
+  ; We can still vectorize the stores below.
+
+declare i32 @bar(...)
diff --git a/test/Transforms/SLPVectorizer/X86/cast.ll b/test/Transforms/SLPVectorizer/X86/cast.ll
new file mode 100644
index 0000000..344dbbc
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/cast.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; int foo(int * restrict A, char * restrict B) {
+;     A[0] = B[0];
+;     A[1] = B[1];
+;     A[2] = B[2];
+;     A[3] = B[3];
+; }
+;CHECK: @foo
+;CHECK: load <4 x i8>
+;CHECK: sext
+;CHECK: store <4 x i32>
+define i32 @foo(i32* noalias nocapture %A, i8* noalias nocapture %B) {
+entry:
+  %0 = load i8* %B, align 1
+  %conv = sext i8 %0 to i32
+  store i32 %conv, i32* %A, align 4
+  %arrayidx2 = getelementptr inbounds i8* %B, i64 1
+  %1 = load i8* %arrayidx2, align 1
+  %conv3 = sext i8 %1 to i32
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 1
+  store i32 %conv3, i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i8* %B, i64 2
+  %2 = load i8* %arrayidx5, align 1
+  %conv6 = sext i8 %2 to i32
+  %arrayidx7 = getelementptr inbounds i32* %A, i64 2
+  store i32 %conv6, i32* %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i8* %B, i64 3
+  %3 = load i8* %arrayidx8, align 1
+  %conv9 = sext i8 %3 to i32
+  %arrayidx10 = getelementptr inbounds i32* %A, i64 3
+  store i32 %conv9, i32* %arrayidx10, align 4
+  ret i32 undef
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
new file mode 100644
index 0000000..05f8e61
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+@.str = private unnamed_addr constant [6 x i8] c"bingo\00", align 1
+
+;CHECK: @reduce_compare
+;CHECK: load <2 x double>
+;CHECK: fmul <2 x double>
+;CHECK: fmul <2 x double>
+;CHECK: fadd <2 x double>
+;CHECK: extractelement
+;CHECK: extractelement
+;CHECK: ret
+define void @reduce_compare(double* nocapture %A, i32 %n) {
+entry:
+  %conv = sitofp i32 %n to double
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %0 = shl nsw i64 %indvars.iv, 1
+  %arrayidx = getelementptr inbounds double* %A, i64 %0
+  %1 = load double* %arrayidx, align 8
+  %mul1 = fmul double %conv, %1
+  %mul2 = fmul double %mul1, 7.000000e+00
+  %add = fadd double %mul2, 5.000000e+00
+  %2 = or i64 %0, 1
+  %arrayidx6 = getelementptr inbounds double* %A, i64 %2
+  %3 = load double* %arrayidx6, align 8
+  %mul8 = fmul double %conv, %3
+  %mul9 = fmul double %mul8, 4.000000e+00
+  %add10 = fadd double %mul9, 9.000000e+00
+  %cmp11 = fcmp ogt double %add, %add10
+  br i1 %cmp11, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0))
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  ret void
+}
+
+declare i32 @printf(i8* nocapture, ...)
+
diff --git a/test/Transforms/SLPVectorizer/X86/diamond.ll b/test/Transforms/SLPVectorizer/X86/diamond.ll
new file mode 100644
index 0000000..8e85cb6
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/diamond.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; int foo(int * restrict B,  int * restrict A, int n, int m) {
+;   B[0] = n * A[0] + m * A[0];
+;   B[1] = n * A[1] + m * A[1];
+;   B[2] = n * A[2] + m * A[2];
+;   B[3] = n * A[3] + m * A[3];
+;   return 0;
+; }
+
+; CHECK: @foo
+; CHECK: load <4 x i32>
+; CHECK: mul <4 x i32>
+; CHECK: store <4 x i32>
+; CHECK: ret
+define i32 @foo(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) #0 {
+entry:
+  %0 = load i32* %A, align 4
+  %mul238 = add i32 %m, %n
+  %add = mul i32 %0, %mul238
+  store i32 %add, i32* %B, align 4
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 1
+  %1 = load i32* %arrayidx4, align 4
+  %add8 = mul i32 %1, %mul238
+  %arrayidx9 = getelementptr inbounds i32* %B, i64 1
+  store i32 %add8, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %A, i64 2
+  %2 = load i32* %arrayidx10, align 4
+  %add14 = mul i32 %2, %mul238
+  %arrayidx15 = getelementptr inbounds i32* %B, i64 2
+  store i32 %add14, i32* %arrayidx15, align 4
+  %arrayidx16 = getelementptr inbounds i32* %A, i64 3
+  %3 = load i32* %arrayidx16, align 4
+  %add20 = mul i32 %3, %mul238
+  %arrayidx21 = getelementptr inbounds i32* %B, i64 3
+  store i32 %add20, i32* %arrayidx21, align 4
+  ret i32 0
+}
+
+
+; int foo_fail(int * restrict B,  int * restrict A, int n, int m) {
+;   B[0] = n * A[0] + m * A[0];
+;   B[1] = n * A[1] + m * A[1];
+;   B[2] = n * A[2] + m * A[2];
+;   B[3] = n * A[3] + m * A[3];
+;   return A[0];
+; }
+
+; CHECK: @foo_fail
+; CHECK-NOT: load <4 x i32>
+; CHECK: ret
+define i32 @foo_fail(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
+entry:
+  %0 = load i32* %A, align 4
+  %mul238 = add i32 %m, %n
+  %add = mul i32 %0, %mul238
+  store i32 %add, i32* %B, align 4
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 1
+  %1 = load i32* %arrayidx4, align 4
+  %add8 = mul i32 %1, %mul238
+  %arrayidx9 = getelementptr inbounds i32* %B, i64 1
+  store i32 %add8, i32* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds i32* %A, i64 2
+  %2 = load i32* %arrayidx10, align 4
+  %add14 = mul i32 %2, %mul238
+  %arrayidx15 = getelementptr inbounds i32* %B, i64 2
+  store i32 %add14, i32* %arrayidx15, align 4
+  %arrayidx16 = getelementptr inbounds i32* %A, i64 3
+  %3 = load i32* %arrayidx16, align 4
+  %add20 = mul i32 %3, %mul238
+  %arrayidx21 = getelementptr inbounds i32* %B, i64 3
+  store i32 %add20, i32* %arrayidx21, align 4
+  ret i32 %0  ;<--------- This value has multiple users and can't be vectorized.
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/flag.ll b/test/Transforms/SLPVectorizer/X86/flag.ll
new file mode 100644
index 0000000..3ca5407
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/flag.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=1000 -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Check that the command line flag works.
+;CHECK:rollable
+;CHECK-NOT:load <4 x i32>
+;CHECK: ret
+
+define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i64 %n) {
+  %1 = icmp eq i64 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
+  %2 = shl i64 %i.019, 2
+  %3 = getelementptr inbounds i32* %in, i64 %2
+  %4 = load i32* %3, align 4
+  %5 = or i64 %2, 1
+  %6 = getelementptr inbounds i32* %in, i64 %5
+  %7 = load i32* %6, align 4
+  %8 = or i64 %2, 2
+  %9 = getelementptr inbounds i32* %in, i64 %8
+  %10 = load i32* %9, align 4
+  %11 = or i64 %2, 3
+  %12 = getelementptr inbounds i32* %in, i64 %11
+  %13 = load i32* %12, align 4
+  %14 = mul i32 %4, 7
+  %15 = add i32 %14, 7
+  %16 = mul i32 %7, 7
+  %17 = add i32 %16, 14
+  %18 = mul i32 %10, 7
+  %19 = add i32 %18, 21
+  %20 = mul i32 %13, 7
+  %21 = add i32 %20, 28
+  %22 = getelementptr inbounds i32* %out, i64 %2
+  store i32 %15, i32* %22, align 4
+  %23 = getelementptr inbounds i32* %out, i64 %5
+  store i32 %17, i32* %23, align 4
+  %24 = getelementptr inbounds i32* %out, i64 %8
+  store i32 %19, i32* %24, align 4
+  %25 = getelementptr inbounds i32* %out, i64 %11
+  store i32 %21, i32* %25, align 4
+  %26 = add i64 %i.019, 1
+  %exitcond = icmp eq i64 %26, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}
diff --git a/test/Transforms/SLPVectorizer/X86/hoist.ll b/test/Transforms/SLPVectorizer/X86/hoist.ll
new file mode 100644
index 0000000..5074cea
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/hoist.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.9.0"
+
+;int foo(int *A, int n, int k) {
+;  for (int i=0; i < 10000; i+=4) {
+;    A[i]   += n;
+;    A[i+1] += k;
+;    A[i+2] += n;
+;    A[i+3] += k;
+;  }
+;}
+
+; preheader:
+;CHECK: entry
+;CHECK-NEXT: insertelement
+;CHECK-NEXT: insertelement
+;CHECK-NEXT: insertelement
+;CHECK-NEXT: insertelement
+; loop body:
+;CHECK: phi
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.024 = phi i32 [ 0, %entry ], [ %add10, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.024
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %n
+  store i32 %add, i32* %arrayidx, align 4
+  %add121 = or i32 %i.024, 1
+  %arrayidx2 = getelementptr inbounds i32* %A, i32 %add121
+  %1 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %1, %k
+  store i32 %add3, i32* %arrayidx2, align 4
+  %add422 = or i32 %i.024, 2
+  %arrayidx5 = getelementptr inbounds i32* %A, i32 %add422
+  %2 = load i32* %arrayidx5, align 4
+  %add6 = add nsw i32 %2, %n
+  store i32 %add6, i32* %arrayidx5, align 4
+  %add723 = or i32 %i.024, 3
+  %arrayidx8 = getelementptr inbounds i32* %A, i32 %add723
+  %3 = load i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %3, %k
+  store i32 %add9, i32* %arrayidx8, align 4
+  %add10 = add nsw i32 %i.024, 4
+  %cmp = icmp slt i32 %add10, 10000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret i32 undef
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/lit.local.cfg b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
new file mode 100644
index 0000000..a8ad0f1
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
new file mode 100644
index 0000000..4a37fce
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
@@ -0,0 +1,69 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @foo
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @foo(i32* nocapture %A, i32 %n) #0 {
+entry:
+  %cmp62 = icmp sgt i32 %n, 0
+  br i1 %cmp62, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add1 = add nsw i32 %0, %n
+  store i32 %add1, i32* %arrayidx, align 4
+  %1 = or i64 %indvars.iv, 1
+  %arrayidx4 = getelementptr inbounds i32* %A, i64 %1
+  %2 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %2, %n
+  store i32 %add5, i32* %arrayidx4, align 4
+  %3 = or i64 %indvars.iv, 2
+  %arrayidx8 = getelementptr inbounds i32* %A, i64 %3
+  %4 = load i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %4, %n
+  store i32 %add9, i32* %arrayidx8, align 4
+  %5 = or i64 %indvars.iv, 3
+  %arrayidx12 = getelementptr inbounds i32* %A, i64 %5
+  %6 = load i32* %arrayidx12, align 4
+  %add13 = add nsw i32 %6, %n
+  store i32 %add13, i32* %arrayidx12, align 4
+  %7 = or i64 %indvars.iv, 4
+  %arrayidx16 = getelementptr inbounds i32* %A, i64 %7
+  %8 = load i32* %arrayidx16, align 4
+  %add17 = add nsw i32 %8, %n
+  store i32 %add17, i32* %arrayidx16, align 4
+  %9 = or i64 %indvars.iv, 5
+  %arrayidx20 = getelementptr inbounds i32* %A, i64 %9
+  %10 = load i32* %arrayidx20, align 4
+  %add21 = add nsw i32 %10, %n
+  store i32 %add21, i32* %arrayidx20, align 4
+  %11 = or i64 %indvars.iv, 6
+  %arrayidx24 = getelementptr inbounds i32* %A, i64 %11
+  %12 = load i32* %arrayidx24, align 4
+  %add25 = add nsw i32 %12, %n
+  store i32 %add25, i32* %arrayidx24, align 4
+  %13 = or i64 %indvars.iv, 7
+  %arrayidx28 = getelementptr inbounds i32* %A, i64 %13
+  %14 = load i32* %arrayidx28, align 4
+  %add29 = add nsw i32 %14, %n
+  store i32 %add29, i32* %arrayidx28, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 8
+  %15 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %15, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret i32 undef
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/SLPVectorizer/X86/multi_user.ll b/test/Transforms/SLPVectorizer/X86/multi_user.ll
new file mode 100644
index 0000000..aaa6063
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/multi_user.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+;int foo (int *A, int n) {
+;  A[0] += n * 5 + 7;
+;  A[1] += n * 5 + 8;
+;  A[2] += n * 5 + 9;
+;  A[3] += n * 5 + 10;
+;  A[4] += n * 5 + 11;
+;}
+
+;CHECK: @foo
+;CHECK: insertelement <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret
+define i32 @foo(i32* nocapture %A, i32 %n) {
+  %1 = mul nsw i32 %n, 5
+  %2 = add nsw i32 %1, 7
+  %3 = load i32* %A, align 4
+  %4 = add nsw i32 %2, %3
+  store i32 %4, i32* %A, align 4
+  %5 = add nsw i32 %1, 8
+  %6 = getelementptr inbounds i32* %A, i64 1
+  %7 = load i32* %6, align 4
+  %8 = add nsw i32 %5, %7
+  store i32 %8, i32* %6, align 4
+  %9 = add nsw i32 %1, 9
+  %10 = getelementptr inbounds i32* %A, i64 2
+  %11 = load i32* %10, align 4
+  %12 = add nsw i32 %9, %11
+  store i32 %12, i32* %10, align 4
+  %13 = add nsw i32 %1, 10
+  %14 = getelementptr inbounds i32* %A, i64 3
+  %15 = load i32* %14, align 4
+  %16 = add nsw i32 %13, %15
+  store i32 %16, i32* %14, align 4
+  %17 = add nsw i32 %1, 11
+  %18 = getelementptr inbounds i32* %A, i64 4
+  %19 = load i32* %18, align 4
+  %20 = add nsw i32 %17, %19
+  store i32 %20, i32* %18, align 4
+  ret i32 undef
+}
diff --git a/test/Transforms/SLPVectorizer/X86/reduction.ll b/test/Transforms/SLPVectorizer/X86/reduction.ll
new file mode 100644
index 0000000..70b7c3a
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/reduction.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.8.0"
+
+; int foo(double *A, int n, int m) {
+;   double sum = 0, v1 = 2, v0 = 3;
+;   for (int i=0; i < n; ++i)
+;     sum += 7*A[i*2] + 7*A[i*2+1];
+;   return sum;
+; }
+
+;CHECK: reduce
+;CHECK: load <2 x double>
+;CHECK: fmul <2 x double>
+;CHECK: ret
+define i32 @reduce(double* nocapture %A, i32 %n, i32 %m) {
+entry:
+  %cmp13 = icmp sgt i32 %n, 0
+  br i1 %cmp13, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.015 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %sum.014 = phi double [ %add6, %for.body ], [ 0.000000e+00, %entry ]
+  %mul = shl nsw i32 %i.015, 1
+  %arrayidx = getelementptr inbounds double* %A, i32 %mul
+  %0 = load double* %arrayidx, align 4
+  %mul1 = fmul double %0, 7.000000e+00
+  %add12 = or i32 %mul, 1
+  %arrayidx3 = getelementptr inbounds double* %A, i32 %add12
+  %1 = load double* %arrayidx3, align 4
+  %mul4 = fmul double %1, 7.000000e+00
+  %add5 = fadd double %mul1, %mul4
+  %add6 = fadd double %sum.014, %add5
+  %inc = add nsw i32 %i.015, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %phitmp = fptosi double %add6 to i32
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/reduction2.ll b/test/Transforms/SLPVectorizer/X86/reduction2.ll
new file mode 100644
index 0000000..7aa7d7e
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/reduction2.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.8.0"
+
+;CHECK: @foo
+;CHECK: load <2 x double>
+;CHECK: ret
+define double @foo(double* nocapture %D) {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %i.02 = phi i32 [ 0, %0 ], [ %10, %1 ]
+  %sum.01 = phi double [ 0.000000e+00, %0 ], [ %9, %1 ]
+  %2 = shl nsw i32 %i.02, 1
+  %3 = getelementptr inbounds double* %D, i32 %2
+  %4 = load double* %3, align 4
+  %A4 = fmul double %4, %4
+  %5 = or i32 %2, 1
+  %6 = getelementptr inbounds double* %D, i32 %5
+  %7 = load double* %6, align 4
+  %A7 = fmul double %7, %7
+  %8 = fadd double %A4, %A7
+  %9 = fadd double %sum.01, %8
+  %10 = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %10, 100
+  br i1 %exitcond, label %11, label %1
+
+; <label>:11                                      ; preds = %1
+  ret double %9
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/saxpy.ll b/test/Transforms/SLPVectorizer/X86/saxpy.ll
new file mode 100644
index 0000000..b520913
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/saxpy.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; SLP vectorization example from http://cs.stanford.edu/people/eschkufz/research/asplos291-schkufza.pdf
+;CHECK: SAXPY
+;CHECK: mul <4 x i32>
+;CHECK: ret
+
+define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a, i64 %i) {
+  %1 = getelementptr inbounds i32* %x, i64 %i
+  %2 = load i32* %1, align 4
+  %3 = mul nsw i32 %2, %a
+  %4 = getelementptr inbounds i32* %y, i64 %i
+  %5 = load i32* %4, align 4
+  %6 = add nsw i32 %3, %5
+  store i32 %6, i32* %1, align 4
+  %7 = add i64 %i, 1
+  %8 = getelementptr inbounds i32* %x, i64 %7
+  %9 = load i32* %8, align 4
+  %10 = mul nsw i32 %9, %a
+  %11 = getelementptr inbounds i32* %y, i64 %7
+  %12 = load i32* %11, align 4
+  %13 = add nsw i32 %10, %12
+  store i32 %13, i32* %8, align 4
+  %14 = add i64 %i, 2
+  %15 = getelementptr inbounds i32* %x, i64 %14
+  %16 = load i32* %15, align 4
+  %17 = mul nsw i32 %16, %a
+  %18 = getelementptr inbounds i32* %y, i64 %14
+  %19 = load i32* %18, align 4
+  %20 = add nsw i32 %17, %19
+  store i32 %20, i32* %15, align 4
+  %21 = add i64 %i, 3
+  %22 = getelementptr inbounds i32* %x, i64 %21
+  %23 = load i32* %22, align 4
+  %24 = mul nsw i32 %23, %a
+  %25 = getelementptr inbounds i32* %y, i64 %21
+  %26 = load i32* %25, align 4
+  %27 = add nsw i32 %24, %26
+  store i32 %27, i32* %22, align 4
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/simple-loop.ll b/test/Transforms/SLPVectorizer/X86/simple-loop.ll
new file mode 100644
index 0000000..0111b94
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/simple-loop.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK:rollable
+define i32 @rollable(i32* noalias nocapture %in, i32* noalias nocapture %out, i64 %n) {
+  %1 = icmp eq i64 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
+  %2 = shl i64 %i.019, 2
+  %3 = getelementptr inbounds i32* %in, i64 %2
+;CHECK:load <4 x i32>
+  %4 = load i32* %3, align 4
+  %5 = or i64 %2, 1
+  %6 = getelementptr inbounds i32* %in, i64 %5
+  %7 = load i32* %6, align 4
+  %8 = or i64 %2, 2
+  %9 = getelementptr inbounds i32* %in, i64 %8
+  %10 = load i32* %9, align 4
+  %11 = or i64 %2, 3
+  %12 = getelementptr inbounds i32* %in, i64 %11
+  %13 = load i32* %12, align 4
+;CHECK:mul <4 x i32>
+  %14 = mul i32 %4, 7
+;CHECK:add <4 x i32>
+  %15 = add i32 %14, 7
+  %16 = mul i32 %7, 7
+  %17 = add i32 %16, 14
+  %18 = mul i32 %10, 7
+  %19 = add i32 %18, 21
+  %20 = mul i32 %13, 7
+  %21 = add i32 %20, 28
+  %22 = getelementptr inbounds i32* %out, i64 %2
+;CHECK:store <4 x i32>
+  store i32 %15, i32* %22, align 4
+  %23 = getelementptr inbounds i32* %out, i64 %5
+  store i32 %17, i32* %23, align 4
+  %24 = getelementptr inbounds i32* %out, i64 %8
+  store i32 %19, i32* %24, align 4
+  %25 = getelementptr inbounds i32* %out, i64 %11
+  store i32 %21, i32* %25, align 4
+  %26 = add i64 %i.019, 1
+  %exitcond = icmp eq i64 %26, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+;CHECK: ret
+  ret i32 undef
+}
+
+;CHECK:unrollable
+;CHECK-NOT: <4 x i32>
+;CHECK: ret
+define i32 @unrollable(i32* %in, i32* %out, i64 %n) nounwind ssp uwtable {
+  %1 = icmp eq i64 %n, 0
+  br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph:                                           ; preds = %0, %.lr.ph
+  %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
+  %2 = shl i64 %i.019, 2
+  %3 = getelementptr inbounds i32* %in, i64 %2
+  %4 = load i32* %3, align 4
+  %5 = or i64 %2, 1
+  %6 = getelementptr inbounds i32* %in, i64 %5
+  %7 = load i32* %6, align 4
+  %8 = or i64 %2, 2
+  %9 = getelementptr inbounds i32* %in, i64 %8
+  %10 = load i32* %9, align 4
+  %11 = or i64 %2, 3
+  %12 = getelementptr inbounds i32* %in, i64 %11
+  %13 = load i32* %12, align 4
+  %14 = mul i32 %4, 7
+  %15 = add i32 %14, 7
+  %16 = mul i32 %7, 7
+  %17 = add i32 %16, 14
+  %18 = mul i32 %10, 7
+  %19 = add i32 %18, 21
+  %20 = mul i32 %13, 7
+  %21 = add i32 %20, 28
+  %22 = getelementptr inbounds i32* %out, i64 %2
+  store i32 %15, i32* %22, align 4
+  %23 = getelementptr inbounds i32* %out, i64 %5
+  store i32 %17, i32* %23, align 4
+  %barrier = call i32 @goo(i32 0)                      ; <---------------- memory barrier.
+  %24 = getelementptr inbounds i32* %out, i64 %8
+  store i32 %19, i32* %24, align 4
+  %25 = getelementptr inbounds i32* %out, i64 %11
+  store i32 %21, i32* %25, align 4
+  %26 = add i64 %i.019, 1
+  %exitcond = icmp eq i64 %26, %n
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph, %0
+  ret i32 undef
+}
+
+declare i32 @goo(i32)
diff --git a/test/Transforms/SLPVectorizer/X86/simplebb.ll b/test/Transforms/SLPVectorizer/X86/simplebb.ll
new file mode 100644
index 0000000..cd0b99e
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/simplebb.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Simple 3-pair chain with loads and stores
+; CHECK: test1
+; CHECK: store <2 x double>
+; CHECK: ret
+define void @test1(double* %a, double* %b, double* %c) {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/vector.ll b/test/Transforms/SLPVectorizer/X86/vector.ll
new file mode 100644
index 0000000..02a1897
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/vector.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure that we are not crashing or changing the code.
+;CHECK: test
+;CHECK: icmp
+;CHECK: ret
+define void @test(<4 x i32> %in, <4 x i32> %in2) {
+  %k = icmp eq <4 x i32> %in, %in2
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/lit.local.cfg b/test/Transforms/SLPVectorizer/lit.local.cfg
new file mode 100644
index 0000000..19eebc0
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 30dd217..8340322 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1243,3 +1243,77 @@ entry:
   %v = load i32* %a
   ret i32 %v
 }
+
+define void @PR15674(i8* %data, i8* %src, i32 %size) {
+; Arrange (via control flow) to have unmerged stores of a particular width to
+; an alloca where we incrementally store from the end of the array toward the
+; beginning of the array. Ensure that the final integer store, despite being
+; convertable to the integer type that we end up promoting this alloca toward,
+; doesn't get widened to a full alloca store.
+; CHECK: @PR15674
+
+entry:
+  %tmp = alloca [4 x i8], align 1
+; CHECK: alloca i32
+
+  switch i32 %size, label %end [
+    i32 4, label %bb4
+    i32 3, label %bb3
+    i32 2, label %bb2
+    i32 1, label %bb1
+  ]
+
+bb4:
+  %src.gep3 = getelementptr inbounds i8* %src, i32 3
+  %src.3 = load i8* %src.gep3
+  %tmp.gep3 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 3
+  store i8 %src.3, i8* %tmp.gep3
+; CHECK: store i8
+
+  br label %bb3
+
+bb3:
+  %src.gep2 = getelementptr inbounds i8* %src, i32 2
+  %src.2 = load i8* %src.gep2
+  %tmp.gep2 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 2
+  store i8 %src.2, i8* %tmp.gep2
+; CHECK: store i8
+
+  br label %bb2
+
+bb2:
+  %src.gep1 = getelementptr inbounds i8* %src, i32 1
+  %src.1 = load i8* %src.gep1
+  %tmp.gep1 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 1
+  store i8 %src.1, i8* %tmp.gep1
+; CHECK: store i8
+
+  br label %bb1
+
+bb1:
+  %src.gep0 = getelementptr inbounds i8* %src, i32 0
+  %src.0 = load i8* %src.gep0
+  %tmp.gep0 = getelementptr inbounds [4 x i8]* %tmp, i32 0, i32 0
+  store i8 %src.0, i8* %tmp.gep0
+; CHECK: store i8
+
+  br label %end
+
+end:
+  %tmp.raw = bitcast [4 x i8]* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %data, i8* %tmp.raw, i32 %size, i32 1, i1 false)
+  ret void
+; CHECK: ret void
+}
+
+define void @PR15805(i1 %a, i1 %b) {
+; CHECK: @PR15805
+; CHECK: select i1 undef, i64* %c, i64* %c
+; CHECK: ret void
+
+  %c = alloca i64, align 8
+  %p.0.c = select i1 undef, i64* %c, i64* %c
+  %cond.in = select i1 undef, i64* %p.0.c, i64* %c
+  %cond = load i64* %cond.in, align 8
+  ret void
+}
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
index 02f6d04..3336515 100644
--- a/test/Transforms/SROA/vector-promotion.ll
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -224,26 +224,26 @@ entry:
   %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
   store <2 x i32> <i32 0, i32 0>, <2 x i32>* %a.cast0
 ; CHECK-NOT: store
-; CHECK:      %[[insert1:.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>, <4 x i32> undef, <4 x i32> <i32 0, i32 1, {{.*}}>
+; CHECK:     select <4 x i1> <i1 true, i1 true, i1 false, i1 false> 
 
   %a.gep1 = getelementptr <4 x i32>* %a, i32 0, i32 1
   %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
   store <2 x i32> <i32 1, i32 1>, <2 x i32>* %a.cast1
-; CHECK-NEXT: %[[insert2:.*]] = shufflevector <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>, <4 x i32> %[[insert1]], <4 x i32> <i32 4, i32 1, i32 2, {{.*}}>
+; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
 
   %a.gep2 = getelementptr <4 x i32>* %a, i32 0, i32 2
   %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
   store <2 x i32> <i32 2, i32 2>, <2 x i32>* %a.cast2
-; CHECK-NEXT: %[[insert3:.*]] = shufflevector <4 x i32> <i32 undef, i32 undef, i32 2, i32 2>, <4 x i32> %[[insert2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
 
   %a.gep3 = getelementptr <4 x i32>* %a, i32 0, i32 3
   store i32 3, i32* %a.gep3
-; CHECK-NEXT: %[[insert4:.*]] = insertelement <4 x i32> %[[insert3]], i32 3, i32 3
+; CHECK-NEXT: insertelement <4 x i32>
 
   %ret = load <4 x i32>* %a
 
   ret <4 x i32> %ret
-; CHECK-NEXT: ret <4 x i32> %[[insert4]]
+; CHECK-NEXT: ret <4 x i32> 
 }
 
 define <4 x i32> @test_subvec_load() {
@@ -291,27 +291,27 @@ entry:
   %a.cast0 = bitcast float* %a.gep0 to i8*
   call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i32 0, i1 false)
 ; CHECK-NOT: store
-; CHECK:      %[[insert1:.*]] = shufflevector <4 x float> <float 0.000000e+00, float 0.000000e+00, float undef, float undef>, <4 x float> undef, <4 x i32> <i32 0, i32 1, {{.*}}>
+; CHECK: select <4 x i1> <i1 true, i1 true, i1 false, i1 false>
 
   %a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1
   %a.cast1 = bitcast float* %a.gep1 to i8*
   call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i32 0, i1 false)
-; CHECK-NEXT: %[[insert2:.*]] = shufflevector <4 x float> <float undef, float 0x3820202020000000, float 0x3820202020000000, float undef>, <4 x float> %[[insert1]], <4 x i32> <i32 4, i32 1, i32 2, {{.*}}>
+; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
 
   %a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2
   %a.cast2 = bitcast float* %a.gep2 to i8*
   call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i32 0, i1 false)
-; CHECK-NEXT: %[[insert3:.*]] = shufflevector <4 x float> <float undef, float undef, float 0x3860606060000000, float 0x3860606060000000>, <4 x float> %[[insert2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
 
   %a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3
   %a.cast3 = bitcast float* %a.gep3 to i8*
   call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i32 0, i1 false)
-; CHECK-NEXT: %[[insert4:.*]] = insertelement <4 x float> %[[insert3]], float 0x38E0E0E0E0000000, i32 3
+; CHECK-NEXT: insertelement <4 x float> 
 
   %ret = load <4 x float>* %a
 
   ret <4 x float> %ret
-; CHECK-NEXT: ret <4 x float> %[[insert4]]
+; CHECK-NEXT: ret <4 x float> 
 }
 
 define <4 x float> @test_subvec_memcpy(i8* %x, i8* %y, i8* %z, i8* %f, i8* %out) {
@@ -326,7 +326,7 @@ entry:
 ; CHECK:      %[[xptr:.*]] = bitcast i8* %x to <2 x float>*
 ; CHECK-NEXT: %[[x:.*]] = load <2 x float>* %[[xptr]]
 ; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT: %[[insert_x:.*]] = shufflevector <4 x float> %[[expand_x]], <4 x float> undef, <4 x i32> <i32 0, i32 1, {{.*}}>
+; CHECK-NEXT: select <4 x i1> <i1 true, i1 true, i1 false, i1 false>  
 
   %a.gep1 = getelementptr <4 x float>* %a, i32 0, i32 1
   %a.cast1 = bitcast float* %a.gep1 to i8*
@@ -334,7 +334,7 @@ entry:
 ; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>*
 ; CHECK-NEXT: %[[y:.*]] = load <2 x float>* %[[yptr]]
 ; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
-; CHECK-NEXT: %[[insert_y:.*]] = shufflevector <4 x float> %[[expand_y]], <4 x float> %[[insert_x]], <4 x i32> <i32 4, i32 1, i32 2, {{.*}}>
+; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
 
   %a.gep2 = getelementptr <4 x float>* %a, i32 0, i32 2
   %a.cast2 = bitcast float* %a.gep2 to i8*
@@ -342,14 +342,14 @@ entry:
 ; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>*
 ; CHECK-NEXT: %[[z:.*]] = load <2 x float>* %[[zptr]]
 ; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
-; CHECK-NEXT: %[[insert_z:.*]] = shufflevector <4 x float> %[[expand_z]], <4 x float> %[[insert_y]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
 
   %a.gep3 = getelementptr <4 x float>* %a, i32 0, i32 3
   %a.cast3 = bitcast float* %a.gep3 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i32 0, i1 false)
 ; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float*
 ; CHECK-NEXT: %[[f:.*]] = load float* %[[fptr]]
-; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> %[[insert_z]], float %[[f]], i32 3
+; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> 
 
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i32 0, i1 false)
 ; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>*
diff --git a/test/Transforms/ScalarRepl/dynamic-vector-gep.ll b/test/Transforms/ScalarRepl/dynamic-vector-gep.ll
deleted file mode 100644
index 565cd76..0000000
--- a/test/Transforms/ScalarRepl/dynamic-vector-gep.ll
+++ /dev/null
@@ -1,167 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-; CHECK: @test1
-; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
-; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
-; CHECK: memset
-; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
-
-; Split the array but don't replace the memset with an insert
-; element as its not a constant offset.
-; The load, however, can be replaced with an extract element.
-define float @test1(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
-  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
-  %cast = bitcast float* %ptr1 to i8*
-  call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i32 4, i1 false)
-  %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 1, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: @test2
-; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
-; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2
-
-; Do SROA on the array when it has dynamic vector reads and writes.
-define float @test2(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
-  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: test3
-; CHECK: %0 = alloca [4 x <4 x float>]
-; CHECK-NOT: alloca
-
-; Don't do SROA on a dynamically indexed vector when it spans
-; more than one array element of the alloca array it is within.
-define float @test3(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
-  %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
-  %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: test4
-; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
-; CHECK: extractelement <16 x float> %0, i32 %idx2
-
-; Don't do SROA on a dynamically indexed vector when it spans
-; more than one array element of the alloca array it is within.
-; However, unlike test3, the store is on the vector type
-; so SROA will convert the large alloca into the large vector
-; type and do all accesses with insert/extract element
-define float @test4(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
-  store <16 x float> zeroinitializer, <16 x float>* %bigvec
-  %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: @test5
-; CHECK: %0 = alloca [4 x <4 x float>]
-; CHECK-NOT: alloca
-
-; Don't do SROA as the is a second dynamically indexed array
-; which may span multiple elements of the alloca.
-define float @test5(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
-  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
-  %ptr2 = bitcast float* %ptr1 to [1 x <2 x float>]*
-  %ptr3 = getelementptr [1 x <2 x float>]* %ptr2, i32 0, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr4 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
-  %ret = load float* %ptr4
-  ret float %ret
-}
-
-; CHECK: test6
-; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
-; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
-
-%vector.pair = type { %vector.anon, %vector.anon }
-%vector.anon = type { %vector }
-%vector = type { <4 x float> }
-
-; Dynamic GEPs on vectors were crashing when the vector was inside a struct
-; as the new GEP for the new alloca might not include all the indices from
-; the original GEP, just the indices it needs to get to the correct offset of
-; some type, not necessarily the dynamic vector.
-; This test makes sure we don't have this crash.
-define float @test6(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca %vector.pair
-  store %vector.pair zeroinitializer, %vector.pair* %0
-  %ptr1 = getelementptr %vector.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr %vector.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: test7
-; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
-; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
-
-%array.pair = type { [2 x %array.anon], %array.anon }
-%array.anon = type { [2 x %vector] }
-
-; This is the same as test6 and tests the same crash, but on arrays.
-define float @test7(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca %array.pair
-  store %array.pair zeroinitializer, %array.pair* %0
-  %ptr1 = getelementptr %array.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr %array.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: test8
-; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1
-; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]]
-; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2
-; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]]
-
-; Do SROA on the vector when it has dynamic vector reads and writes
-; from a non-zero offset.
-define float @test8(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca <4 x float>
-  store <4 x float> zeroinitializer, <4 x float>* %0
-  %ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1
-  %ptr2 = bitcast float* %ptr1 to <3 x float>*
-  %ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1
-  store float 1.0, float* %ptr3
-  %ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2
-  %ptr5 = bitcast float* %ptr4 to <2 x float>*
-  %ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2
-  %ret = load float* %ptr6
-  ret float %ret
-}
-
-declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll b/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
index fc89b16..f6b068f 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
@@ -1,11 +1,11 @@
 ; This test checks to make sure that 'br X, Dest, Dest' is folded into 
 ; 'br Dest'
 
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep "br i1 %c2"
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 declare void @noop()
 
+; CHECK-NOT: br i1 %c2
 define i32 @test(i1 %c1, i1 %c2) {
 	call void @noop( )
 	br i1 %c1, label %A, label %Y
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll b/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
index c1b032f..7804908 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
@@ -3,8 +3,9 @@
 ; due to the fact that the SimplifyCFG function does not use 
 ; the ConstantFoldTerminator function.
 
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep "br i1 %c2"
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK-NOT: br i1 %c2
 
 declare void @noop()
 
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
index af59ba0..fbfb100 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
@@ -1,7 +1,6 @@
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep switch
-
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
+; CHECK-NOT: switch
         %llvm.dbg.anchor.type = type { i32, i32 }
         %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
 
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
index 93f851c..8066596 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep switch
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK-NOT: switch
 
 ; Test normal folding
 define i32 @test1() {
diff --git a/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll b/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
index 760aa13..907261b 100644
--- a/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
+++ b/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
@@ -1,9 +1,7 @@
 ; Make sure this doesn't turn into an infinite loop
 
-; RUN: opt < %s -simplifycfg -constprop -simplifycfg |\
-; RUN:   llvm-dis | grep bb86
-; END.
-	
+; RUN: opt < %s -simplifycfg -constprop -simplifycfg | llvm-dis | FileCheck %s
+
 %struct.anon = type { i32, i32, i32, i32, [1024 x i8] }
 @_zero_ = external global %struct.anon*		; <%struct.anon**> [#uses=2]
 @_one_ = external global %struct.anon*		; <%struct.anon**> [#uses=4]
@@ -112,6 +110,7 @@ cond_true83:		; preds = %bb80
 	%tmp71 = call i32 @_do_compare( %struct.anon* null, %struct.anon* null, i32 0, i32 1 )		; <i32> [#uses=1]
 	%tmp76 = icmp eq i32 %tmp71, 0		; <i1> [#uses=1]
 	br i1 %tmp76, label %bb80.outer, label %bb80
+; CHECK: bb86
 bb86:		; preds = %bb80
 	call void @free_num( %struct.anon** %num )
 	%tmp88 = load %struct.anon** %guess		; <%struct.anon*> [#uses=1]
diff --git a/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll b/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
index 009d1c8..8f21b9b 100644
--- a/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
+++ b/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
@@ -1,6 +1,7 @@
 ; PR957
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep select
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK-NOT: select
 
 @G = extern_weak global i32
 
diff --git a/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll b/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll
index a20c46e..a90e072 100644
--- a/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll
+++ b/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -simplifycfg -S | not grep invoke
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK-NOT: invoke
 
 declare i32 @func(i8*) nounwind
 
diff --git a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
index 14baeea..cf29b71 100644
--- a/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
+++ b/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
@@ -1,5 +1,5 @@
 ; The phi should not be eliminated in this case, because the fp op could trap.
-; RUN: opt < %s -simplifycfg -S | grep "= phi double"
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
@@ -19,6 +19,7 @@ cond_true:		; preds = %entry
 	br label %cond_next
 
 cond_next:		; preds = %cond_true, %entry
+; CHECK: = phi double
 	%F.0 = phi double [ %tmp, %entry ], [ %tmp7, %cond_true ]		; <double> [#uses=1]
 	store double %F.0, double* @G, align 8
 	ret void
diff --git a/test/Transforms/SimplifyCFG/speculate-store.ll b/test/Transforms/SimplifyCFG/speculate-store.ll
new file mode 100644
index 0000000..8d7fe79
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/speculate-store.ll
@@ -0,0 +1,108 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+
+define void @ifconvertstore(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+entry:
+  %arrayidx = getelementptr inbounds i32* %B, i64 0
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %C
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 0
+
+; First store to the location.
+  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds i32* %B, i64 1
+  %1 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %1, %D
+  %cmp6 = icmp sgt i32 %add5, %C
+  br i1 %cmp6, label %if.then, label %ret.end
+
+; Make sure we speculate stores like the following one. It is cheap compared to
+; a mispredicated branch.
+; CHECK: @ifconvertstore
+; CHECK: %add5.add = select i1 %cmp6, i32 %add5, i32 %add
+; CHECK: store i32 %add5.add, i32* %arrayidx2, align 4
+if.then:
+  store i32 %add5, i32* %arrayidx2, align 4
+  br label %ret.end
+
+ret.end:
+  ret void
+}
+
+define void @noifconvertstore1(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+entry:
+  %arrayidx = getelementptr inbounds i32* %B, i64 0
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %C
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 0
+
+; Store to a different location.
+  store i32 %add, i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32* %B, i64 1
+  %1 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %1, %D
+  %cmp6 = icmp sgt i32 %add5, %C
+  br i1 %cmp6, label %if.then, label %ret.end
+
+; CHECK: @noifconvertstore1
+; CHECK-NOT: select
+if.then:
+  store i32 %add5, i32* %arrayidx2, align 4
+  br label %ret.end
+
+ret.end:
+  ret void
+}
+
+declare void @unknown_fun()
+
+define void @noifconvertstore2(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+entry:
+  %arrayidx = getelementptr inbounds i32* %B, i64 0
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %C
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 0
+
+; First store to the location.
+  store i32 %add, i32* %arrayidx2, align 4
+  call void @unknown_fun()
+  %arrayidx4 = getelementptr inbounds i32* %B, i64 1
+  %1 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %1, %D
+  %cmp6 = icmp sgt i32 %add5, %C
+  br i1 %cmp6, label %if.then, label %ret.end
+
+; CHECK: @noifconvertstore2
+; CHECK-NOT: select
+if.then:
+  store i32 %add5, i32* %arrayidx2, align 4
+  br label %ret.end
+
+ret.end:
+  ret void
+}
+
+define void @noifconvertstore_volatile(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+entry:
+  %arrayidx = getelementptr inbounds i32* %B, i64 0
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %C
+  %arrayidx2 = getelementptr inbounds i32* %A, i64 0
+
+; First store to the location.
+  store i32 %add, i32* %arrayidx2, align 4
+  %arrayidx4 = getelementptr inbounds i32* %B, i64 1
+  %1 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %1, %D
+  %cmp6 = icmp sgt i32 %add5, %C
+  br i1 %cmp6, label %if.then, label %ret.end
+
+; Make sure we don't speculate volatile stores.
+; CHECK: @noifconvertstore_volatile
+; CHECK-NOT: select
+if.then:
+  store volatile i32 %add5, i32* %arrayidx2, align 4
+  br label %ret.end
+
+ret.end:
+  ret void
+}
diff --git a/test/Transforms/SimplifyCFG/switch-to-icmp.ll b/test/Transforms/SimplifyCFG/switch-to-icmp.ll
index 414f847..e9a6db4 100644
--- a/test/Transforms/SimplifyCFG/switch-to-icmp.ll
+++ b/test/Transforms/SimplifyCFG/switch-to-icmp.ll
@@ -37,3 +37,21 @@ lor.end:
 ; CHECK: @test2
 ; CHECK: %switch = icmp ult i32 %x, 2
 }
+
+define i32 @test3(i1 %flag) {
+entry:
+ switch i1 %flag, label %bad [
+   i1 true, label %good
+   i1 false, label %good
+ ]
+
+good:
+ ret i32 0
+
+bad:
+ ret i32 1
+
+; CHECK: @test3
+; CHECK: entry:
+; CHECK-NEXT: ret i32 0
+}
diff --git a/test/Verifier/2002-04-13-RetTypes.ll b/test/Verifier/2002-04-13-RetTypes.ll
index af46839..9385ebe 100644
--- a/test/Verifier/2002-04-13-RetTypes.ll
+++ b/test/Verifier/2002-04-13-RetTypes.ll
@@ -1,7 +1,8 @@
-; RUN: not llvm-as < %s 2>&1 | grep "value doesn't match function result type 'i32'"
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
 
 ; Verify the operand type of the ret instructions in a function match the
-; delcared return type of the function they live in.
+; declared return type of the function they live in.
+; CHECK: value doesn't match function result type 'i32'
 ;
 
 define i32 @testfunc() {
diff --git a/test/Verifier/2002-11-05-GetelementptrPointers.ll b/test/Verifier/2002-11-05-GetelementptrPointers.ll
index 108ae5f..66b233e 100644
--- a/test/Verifier/2002-11-05-GetelementptrPointers.ll
+++ b/test/Verifier/2002-11-05-GetelementptrPointers.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as < %s 2>&1 | grep "invalid getelementptr indices"
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: invalid getelementptr indices
 
 ; This testcase is invalid because we are indexing into a pointer that is 
 ; contained WITHIN a structure.
diff --git a/test/Verifier/2006-07-11-StoreStruct.ll b/test/Verifier/2006-07-11-StoreStruct.ll
index 65b229d..70aea87 100644
--- a/test/Verifier/2006-07-11-StoreStruct.ll
+++ b/test/Verifier/2006-07-11-StoreStruct.ll
@@ -1,4 +1,6 @@
-; RUN: llvm-as < %s 2>&1 | not grep "Instruction operands must be first-class"
+; RUN: llvm-as < %s 2>&1 | FileCheck %s 
+
+; CHECK-NOT: Instruction operands must be first-class
 
 ; This previously was for PR826, but structs are now first-class so
 ; the following is now valid.
diff --git a/test/Verifier/2006-10-15-AddrLabel.ll b/test/Verifier/2006-10-15-AddrLabel.ll
index c8fedb5..decbf5b 100644
--- a/test/Verifier/2006-10-15-AddrLabel.ll
+++ b/test/Verifier/2006-10-15-AddrLabel.ll
@@ -1,5 +1,6 @@
 ; RUN: not llvm-as < %s > /dev/null 2> %t
-; RUN: grep "basic block pointers are invalid" %t
+; RUN: FileCheck %s --input-file=%t
+; CHECK: basic block pointers are invalid
 
 define i32 @main() {
          %foo  = call i8* %llvm.stacksave()
diff --git a/test/Verifier/2006-12-12-IntrinsicDefine.ll b/test/Verifier/2006-12-12-IntrinsicDefine.ll
index 6e7468c..8cc3d24 100644
--- a/test/Verifier/2006-12-12-IntrinsicDefine.ll
+++ b/test/Verifier/2006-12-12-IntrinsicDefine.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as < %s 2>&1 | grep "llvm intrinsics cannot be defined"
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: llvm intrinsics cannot be defined
 ; PR1047
 
 define void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) {
diff --git a/test/Verifier/2008-03-01-AllocaSized.ll b/test/Verifier/2008-03-01-AllocaSized.ll
index 51258be..fc12a96 100644
--- a/test/Verifier/2008-03-01-AllocaSized.ll
+++ b/test/Verifier/2008-03-01-AllocaSized.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "Cannot allocate unsized type"
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+; CHECK: Cannot allocate unsized type
 ; PR2113
 
 define void @test() {
diff --git a/test/Verifier/2008-08-22-MemCpyAlignment.ll b/test/Verifier/2008-08-22-MemCpyAlignment.ll
index c6d5afd..3f7cb52 100644
--- a/test/Verifier/2008-08-22-MemCpyAlignment.ll
+++ b/test/Verifier/2008-08-22-MemCpyAlignment.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "alignment argument of memory intrinsics must be a constant int"
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+; CHECK: alignment argument of memory intrinsics must be a constant int
 ; PR2318
 
 define void @x(i8* %a, i8* %src, i64 %len, i32 %align) nounwind  {
diff --git a/test/Verifier/2008-11-15-RetVoid.ll b/test/Verifier/2008-11-15-RetVoid.ll
index 42503fa..62f6da1 100644
--- a/test/Verifier/2008-11-15-RetVoid.ll
+++ b/test/Verifier/2008-11-15-RetVoid.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as < %s 2>&1 | grep "value doesn't match function result type 'void'"
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: value doesn't match function result type 'void'
 
 define void @foo() {
   ret i32 0
diff --git a/test/Verifier/2010-08-07-PointerIntrinsic.ll b/test/Verifier/2010-08-07-PointerIntrinsic.ll
index 3136c61..a668d04 100644
--- a/test/Verifier/2010-08-07-PointerIntrinsic.ll
+++ b/test/Verifier/2010-08-07-PointerIntrinsic.ll
@@ -1,5 +1,6 @@
 ; RUN: not llvm-as < %s 2> %t
-; RUN: grep "Broken module" %t
+; RUN: FileCheck %s --input-file=%t
+; CHECK: Broken module
 ; PR7316
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
diff --git a/test/Verifier/AmbiguousPhi.ll b/test/Verifier/AmbiguousPhi.ll
index f31bc10..cb05a72 100644
--- a/test/Verifier/AmbiguousPhi.ll
+++ b/test/Verifier/AmbiguousPhi.ll
@@ -1,6 +1,5 @@
-; RUN: not llvm-as < %s 2>&1 | grep "multiple entries for the same basic block"
-
-
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: multiple entries for the same basic block
 
 define i32 @test(i32 %i, i32 %j, i1 %c) {
 	br i1 %c, label %A, label %A
diff --git a/test/Verifier/PhiGrouping.ll b/test/Verifier/PhiGrouping.ll
index 7b42fd2..291f084 100644
--- a/test/Verifier/PhiGrouping.ll
+++ b/test/Verifier/PhiGrouping.ll
@@ -1,6 +1,5 @@
-; RUN: not llvm-as < %s 2>&1 | grep "PHI nodes not grouped at top"
-
-
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: PHI nodes not grouped at top
 
 define i32 @test(i32 %i, i32 %j, i1 %c) {
 	br i1 %c, label %A, label %B
diff --git a/test/Verifier/SelfReferential.ll b/test/Verifier/SelfReferential.ll
index c24c0eb..7f0166a 100644
--- a/test/Verifier/SelfReferential.ll
+++ b/test/Verifier/SelfReferential.ll
@@ -1,4 +1,5 @@
-; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "Only PHI nodes may reference their own value"
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+; CHECK: Only PHI nodes may reference their own value
 
 ; Test that self referential instructions are not allowed
 
diff --git a/test/Verifier/aliasing-chain.ll b/test/Verifier/aliasing-chain.ll
index a52e796..ae0b77f 100644
--- a/test/Verifier/aliasing-chain.ll
+++ b/test/Verifier/aliasing-chain.ll
@@ -1,5 +1,5 @@
-; RUN:  not llvm-as %s -o /dev/null 2>&1 | grep "Aliasing chain should end with function or global variable"
-
+; RUN:  not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+; CHECK: Aliasing chain should end with function or global variable
 ; Test that alising chain does not create a cycle
 
 @b1 = alias i32* @c1
diff --git a/test/Verifier/llvm.compiler_used-invalid-type.ll b/test/Verifier/llvm.compiler_used-invalid-type.ll
new file mode 100644
index 0000000..0913027
--- /dev/null
+++ b/test/Verifier/llvm.compiler_used-invalid-type.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+@llvm.compiler_used = appending global [1 x i32] [i32 0], section "llvm.metadata"
+
+; CHECK:       wrong type for intrinsic global variable
+; CHECK-NEXT: [1 x i32]* @llvm.compiler_used
diff --git a/test/Verifier/llvm.used-invalid-init.ll b/test/Verifier/llvm.used-invalid-init.ll
new file mode 100644
index 0000000..b0887c9
--- /dev/null
+++ b/test/Verifier/llvm.used-invalid-init.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+@llvm.used = appending global [1 x i8*] zeroinitializer, section "llvm.metadata"
+
+; CHECK: wrong initalizer for intrinsic global variable
+; CHECK-NEXT: [1 x i8*] zeroinitializer
diff --git a/test/Verifier/llvm.used-invalid-init2.ll b/test/Verifier/llvm.used-invalid-init2.ll
new file mode 100644
index 0000000..ee8a970
--- /dev/null
+++ b/test/Verifier/llvm.used-invalid-init2.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+@a = global i8 42
+@llvm.used = appending global [2 x i8*] [i8* @a, i8* null], section "llvm.metadata"
+
+; CHECK: invalid llvm.used member
+; CHECK-NEXT: i8* null
diff --git a/test/Verifier/llvm.used-invalid-type.ll b/test/Verifier/llvm.used-invalid-type.ll
new file mode 100644
index 0000000..2de5c86
--- /dev/null
+++ b/test/Verifier/llvm.used-invalid-type.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+@llvm.used = appending global [1 x i32] [i32 0], section "llvm.metadata"
+
+; CHECK:       wrong type for intrinsic global variable
+; CHECK-NEXT: [1 x i32]* @llvm.used
diff --git a/test/Verifier/llvm.used-invalid-type2.ll b/test/Verifier/llvm.used-invalid-type2.ll
new file mode 100644
index 0000000..bff3f2d
--- /dev/null
+++ b/test/Verifier/llvm.used-invalid-type2.ll
@@ -0,0 +1,5 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+@llvm.used = appending global i32 0, section "llvm.metadata"
+
+; CHECK: Only global arrays can have appending linkage!
+; CHEKC-NEXT: i32* @llvm.used
diff --git a/test/Verifier/llvm.used-ptr-type.ll b/test/Verifier/llvm.used-ptr-type.ll
new file mode 100644
index 0000000..adfb169
--- /dev/null
+++ b/test/Verifier/llvm.used-ptr-type.ll
@@ -0,0 +1,4 @@
+; RUN: llvm-as < %s -o /dev/null
+
+@a = global i32 42
+@llvm.used = appending global [1 x i32*] [i32* @a], section "llvm.metadata"
diff --git a/test/lit.cfg b/test/lit.cfg
index 0ecd8fe..8272e97 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -22,9 +22,18 @@ if sys.platform in ['win32']:
                                      config.environment['PATH']))
         config.environment['PATH'] = path
 
+# Choose between lit's internal shell pipeline runner and a real shell.  If
+# LIT_USE_INTERNAL_SHELL is in the environment, we use that as an override.
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+if use_lit_shell:
+    # 0 is external, "" is default, and everything else is internal.
+    execute_external = (use_lit_shell == "0")
+else:
+    # Otherwise we default to internal on Windows and external elsewhere, as
+    # bash on Windows is usually very slow.
+    execute_external = (not sys.platform in ['win32'])
+
 # testFormat: The test format to use to interpret tests.
-execute_external = (not sys.platform in ['win32']
-                    or lit.getBashPath() not in [None, ""])
 config.test_format = lit.formats.ShTest(execute_external)
 
 # To ignore test output on stderr so it doesn't trigger failures uncomment this:
@@ -47,17 +56,8 @@ llvm_obj_root = getattr(config, 'llvm_obj_root', None)
 if llvm_obj_root is not None:
     config.test_exec_root = os.path.join(llvm_obj_root, 'test')
 
-# Tweak the PATH to include the scripts dir, the tools dir, and the llvm-gcc bin
-# dir (if available).
+# Tweak the PATH to include the tools dir.
 if llvm_obj_root is not None:
-    llvm_src_root = getattr(config, 'llvm_src_root', None)
-    if not llvm_src_root:
-        lit.fatal('No LLVM source root set!')
-    path = os.path.pathsep.join((os.path.join(llvm_src_root, 'test',
-                                              'Scripts'),
-                                 config.environment['PATH']))
-    config.environment['PATH'] = path
-
     llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
     if not llvm_tools_dir:
         lit.fatal('No LLVM tools dir set!')
@@ -160,7 +160,9 @@ config.substitutions.append( ('%lli_mcjit', lli_mcjit) )
 # but simply want use the currently considered most reliable jit for platform
 # FIXME: ppc32 is not ready for mcjit.
 if 'arm' in config.target_triple \
-   or 'powerpc64' in config.target_triple:
+   or 'aarch64' in config.target_triple \
+   or 'powerpc64' in config.target_triple \
+   or 's390x' in config.target_triple:
     defaultIsMCJIT = 'true'
 else:
     defaultIsMCJIT = 'false'
@@ -240,7 +242,7 @@ for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/|-)\bclang\b(?!-)",
 ### Features
 
 # Shell execution
-if sys.platform not in ['win32'] or lit.getBashPath() != '':
+if execute_external:
     config.available_features.add('shell')
 
 # Loadable module
@@ -264,6 +266,13 @@ if (config.llvm_use_sanitizer == "Memory" or
         config.llvm_use_sanitizer == "MemoryWithOrigins"):
     config.available_features.add("msan")
 
+# Direct object generation
+if not 'hexagon' in config.target_triple:
+    config.available_features.add("object-emission")
+
+if config.have_zlib == "1":
+    config.available_features.add("zlib")
+
 # llc knows whether he is compiled with -DNDEBUG.
 import subprocess
 try:
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 8024b24..1ae99eb 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -1,6 +1,6 @@
 ## Autogenerated by LLVM/Clang configuration.
 # Do not edit!
-config.host_triple = "@LLVM_HOSTTRIPLE@"
+config.host_triple = "@LLVM_HOST_TRIPLE@"
 config.target_triple = "@TARGET_TRIPLE@"
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
@@ -19,6 +19,7 @@ config.host_os = "@HOST_OS@"
 config.host_arch = "@HOST_ARCH@"
 config.llvm_use_intel_jitevents = "@LLVM_USE_INTEL_JITEVENTS@"
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
+config.have_zlib = "@HAVE_LIBZ@"
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.coff-i386 b/test/tools/llvm-readobj/Inputs/relocs.obj.coff-i386
new file mode 100644
index 0000000..15e43ef
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.coff-i386
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.coff-x86_64 b/test/tools/llvm-readobj/Inputs/relocs.obj.coff-x86_64
new file mode 100644
index 0000000..cd63173
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.coff-x86_64
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64 b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64
new file mode 100644
index 0000000..d39e60c
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-arm b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-arm
new file mode 100644
index 0000000..908507d
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-arm
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-i386 b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-i386
new file mode 100644
index 0000000..7860df6
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-i386
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips
new file mode 100644
index 0000000..e387942
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips64el b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips64el
new file mode 100644
index 0000000..a977964
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-mips64el
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-ppc64 b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-ppc64
new file mode 100644
index 0000000..c46e4c0
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-ppc64
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-x86_64 b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-x86_64
new file mode 100644
index 0000000..3ca9d8c
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-x86_64
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.macho-arm b/test/tools/llvm-readobj/Inputs/relocs.obj.macho-arm
new file mode 100644
index 0000000..992ae17
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.macho-arm
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.macho-i386 b/test/tools/llvm-readobj/Inputs/relocs.obj.macho-i386
new file mode 100644
index 0000000..5305fe8
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.macho-i386
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.macho-x86_64 b/test/tools/llvm-readobj/Inputs/relocs.obj.macho-x86_64
new file mode 100644
index 0000000..42b80dd
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.macho-x86_64
diff --git a/test/tools/llvm-readobj/Inputs/relocs.py b/test/tools/llvm-readobj/Inputs/relocs.py
new file mode 100644
index 0000000..232d080
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs.py
@@ -0,0 +1,1086 @@
+#!/usr/bin/env python
+
+# Generates ELF, COFF and MachO object files for different architectures
+# containing all relocations:
+#
+# ELF:   i386, x86_64, ppc64, aarch64, arm, mips, mips64el
+# COFF:  i386, x86_64
+# MachO: i386, x86_64, arm
+# (see end of file for triples)
+#
+# To simplify generation, object files are generated with just the proper
+# number of relocations through repeated instructions. Afterwards, the
+# relocations in the object file are patched to their proper value.
+
+import operator
+import shutil
+import StringIO
+import struct
+import subprocess
+import sys
+
+class EnumType(type):
+  def __init__(self, name, bases = (), attributes = {}):
+    super(EnumType, self).__init__(name, bases, attributes)
+
+    type.__setattr__(self, '_map', {})
+    type.__setattr__(self, '_nameMap', {})
+
+    for symbol in attributes:
+      if symbol.startswith('__') or symbol.endswith('__'):
+        continue
+
+      value = attributes[symbol]
+
+      # MyEnum.symbol == value
+      type.__setattr__(self, symbol, value)
+      self._nameMap[symbol] = value
+
+      # The first symbol with the given value is authoritative.
+      if not (value in self._map):
+        # MyEnum[value] == symbol
+        self._map[value] = symbol
+
+  # Not supported (Enums are immutable).
+  def __setattr__(self, name, value):
+    raise NotSupportedException, self.__setattr__
+
+  # Not supported (Enums are immutable).
+  def __delattr__(self, name):
+    raise NotSupportedException, self.__delattr__
+
+  # Gets the enum symbol for the specified value.
+  def __getitem__(self, value):
+    symbol = self._map.get(value)
+    if symbol is None:
+      raise KeyError, value
+    return symbol
+
+  # Gets the enum symbol for the specified value or none.
+  def lookup(self, value):
+    symbol = self._map.get(value)
+    return symbol
+
+  # Not supported (Enums are immutable).
+  def __setitem__(self, value, symbol):
+    raise NotSupportedException, self.__setitem__
+
+  # Not supported (Enums are immutable).
+  def __delitem__(self, value):
+    raise NotSupportedException, self.__delitem__
+
+  def entries(self):
+    # sort by (value, name)
+    def makeKey(item):
+      return (item[1], item[0])
+    e = []
+    for pair in sorted(self._nameMap.iteritems(), key=makeKey):
+      e.append(pair)
+    return e
+
+  def __iter__(self):
+    for e in self.entries():
+      yield e
+
+Enum = EnumType('Enum', (), {})
+
+class BinaryReader:
+  def __init__(self, path):
+    self.file = open(path, "r+b", 0)
+    self.isLSB = None
+    self.is64Bit = None
+    self.isN64 = False
+
+  def tell(self):
+    return self.file.tell()
+
+  def seek(self, pos):
+    self.file.seek(pos)
+
+  def read(self, N):
+    data = self.file.read(N)
+    if len(data) != N:
+      raise ValueError, "Out of data!"
+    return data
+
+  def int8(self):
+    return ord(self.read(1))
+
+  def uint8(self):
+    return ord(self.read(1))
+
+  def int16(self):
+    return struct.unpack('><'[self.isLSB] + 'h', self.read(2))[0]
+
+  def uint16(self):
+    return struct.unpack('><'[self.isLSB] + 'H', self.read(2))[0]
+
+  def int32(self):
+    return struct.unpack('><'[self.isLSB] + 'i', self.read(4))[0]
+
+  def uint32(self):
+    return struct.unpack('><'[self.isLSB] + 'I', self.read(4))[0]
+
+  def int64(self):
+    return struct.unpack('><'[self.isLSB] + 'q', self.read(8))[0]
+
+  def uint64(self):
+    return struct.unpack('><'[self.isLSB] + 'Q', self.read(8))[0]
+
+  def writeUInt8(self, value):
+    self.file.write(struct.pack('><'[self.isLSB] + 'B', value))
+
+  def writeUInt16(self, value):
+    self.file.write(struct.pack('><'[self.isLSB] + 'H', value))
+
+  def writeUInt32(self, value):
+    self.file.write(struct.pack('><'[self.isLSB] + 'I', value))
+
+  def writeUInt64(self, value):
+    self.file.write(struct.pack('><'[self.isLSB] + 'Q', value))
+
+  def word(self):
+    if self.is64Bit:
+      return self.uint64()
+    else:
+      return self.uint32()
+
+  def writeWord(self, value):
+    if self.is64Bit:
+      self.writeUInt64(value)
+    else:
+      self.writeUInt32(value)
+
+class StringTable:
+  def __init__(self, strings):
+    self.string_table = strings
+
+  def __getitem__(self, index):
+    end = self.string_table.index('\x00', index)
+    return self.string_table[index:end]
+
+class ElfSection:
+  def __init__(self, f):
+    self.sh_name = f.uint32()
+    self.sh_type = f.uint32()
+    self.sh_flags = f.word()
+    self.sh_addr = f.word()
+    self.sh_offset = f.word()
+    self.sh_size = f.word()
+    self.sh_link = f.uint32()
+    self.sh_info = f.uint32()
+    self.sh_addralign = f.word()
+    self.sh_entsize = f.word()
+
+  def patch(self, f, relocs):
+    if self.sh_type == 4 or self.sh_type == 9: # SHT_RELA / SHT_REL
+      self.patchRelocs(f, relocs)
+
+  def patchRelocs(self, f, relocs):
+    entries = self.sh_size // self.sh_entsize
+
+    for index in range(entries):
+      f.seek(self.sh_offset + index * self.sh_entsize)
+      r_offset = f.word()
+
+      if index < len(relocs):
+        ri = index
+      else:
+        ri = 0
+
+      if f.isN64:
+        r_sym =   f.uint32()
+        r_ssym =  f.uint8()
+        f.seek(f.tell())
+        f.writeUInt8(relocs[ri][1])
+        f.writeUInt8(relocs[ri][1])
+        f.writeUInt8(relocs[ri][1])
+      else:
+        pos = f.tell()
+        r_info = f.word()
+
+        r_type = relocs[ri][1]
+        if f.is64Bit:
+          r_info = (r_info & 0xFFFFFFFF00000000) | (r_type & 0xFFFFFFFF)
+        else:
+          r_info = (r_info & 0xFF00) | (r_type & 0xFF)
+
+        print("    %s" % relocs[ri][0])
+        f.seek(pos)
+        f.writeWord(r_info)
+
+
+class CoffSection:
+  def __init__(self, f):
+    self.raw_name                = f.read(8)
+    self.virtual_size            = f.uint32()
+    self.virtual_address         = f.uint32()
+    self.raw_data_size           = f.uint32()
+    self.pointer_to_raw_data     = f.uint32()
+    self.pointer_to_relocations  = f.uint32()
+    self.pointer_to_line_numbers = f.uint32()
+    self.relocation_count        = f.uint16()
+    self.line_number_count       = f.uint16()
+    self.characteristics         = f.uint32()
+
+
+def compileAsm(filename, triple, src):
+  cmd = ["llvm-mc", "-triple=" + triple, "-filetype=obj", "-o", filename]
+  print("  Running: " + " ".join(cmd))
+  p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
+  p.communicate(input=src)
+  p.wait()
+
+def compileIR(filename, triple, src):
+  cmd = ["llc", "-mtriple=" + triple, "-filetype=obj", "-o", filename]
+  print("  Running: " + " ".join(cmd))
+  p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
+  p.communicate(input=src)
+  p.wait()
+
+
+def craftElf(filename, triple, relocs, dummyReloc):
+  print("Crafting " + filename + " for " + triple)
+  if type(dummyReloc) is tuple:
+    preSrc, dummyReloc, relocsPerDummy = dummyReloc
+    src = preSrc + "\n"
+    for i in range((len(relocs) + relocsPerDummy - 1) / relocsPerDummy):
+      src += dummyReloc.format(i) + "\n"
+    compileIR(filename, triple, src)
+  else:
+    src = (dummyReloc + "\n") * len(relocs)
+    compileAsm(filename, triple, src)
+
+  print("  Patching relocations...")
+  patchElf(filename, relocs)
+
+def patchElf(path, relocs):
+  f = BinaryReader(path)
+
+  magic = f.read(4)
+  assert magic == '\x7FELF'
+
+  fileclass = f.uint8()
+  if fileclass == 1:
+    f.is64Bit = False
+  elif fileclass == 2:
+    f.is64Bit = True
+  else:
+    raise ValueError, "Unknown file class %x" % fileclass
+
+  byteordering = f.uint8()
+  if byteordering == 1:
+      f.isLSB = True
+  elif byteordering == 2:
+      f.isLSB = False
+  else:
+      raise ValueError, "Unknown byte ordering %x" % byteordering
+
+  f.seek(18)
+  e_machine = f.uint16()
+  if e_machine == 0x0008 and f.is64Bit: # EM_MIPS && 64 bit
+      f.isN64 = True
+
+  e_version = f.uint32()
+  e_entry = f.word()
+  e_phoff = f.word()
+  e_shoff = f.word()
+  e_flags = f.uint32()
+  e_ehsize = f.uint16()
+  e_phentsize = f.uint16()
+  e_phnum = f.uint16()
+  e_shentsize = f.uint16()
+  e_shnum = f.uint16()
+  e_shstrndx = f.uint16()
+
+  sections = []
+  for index in range(e_shnum):
+    f.seek(e_shoff + index * e_shentsize)
+    s = ElfSection(f)
+    sections.append(s)
+
+  f.seek(sections[e_shstrndx].sh_offset)
+  shstrtab = StringTable(f.read(sections[e_shstrndx].sh_size))
+
+  strtab = None
+  for section in sections:
+    if shstrtab[section.sh_name] == ".strtab":
+      f.seek(section.sh_offset)
+      strtab = StringTable(f.read(section.sh_size))
+      break
+
+  for index in range(e_shnum):
+    sections[index].patch(f, relocs)
+
+
+def craftCoff(filename, triple, relocs, dummyReloc):
+  print("Crafting " + filename + " for " + triple)
+  src = (dummyReloc + "\n") * len(relocs)
+  compileAsm(filename, triple, src)
+
+  print("  Patching relocations...")
+  patchCoff(filename, relocs)
+
+def patchCoff(path, relocs):
+  f = BinaryReader(path)
+  f.isLSB = True
+
+  machine_type            = f.uint16()
+  section_count           = f.uint16()
+  f.seek(20)
+  sections = [CoffSection(f) for idx in range(section_count)]
+
+  section = sections[0]
+  f.seek(section.pointer_to_relocations)
+  for i in range(section.relocation_count):
+    virtual_addr = f.uint32()
+    symtab_idx   = f.uint32()
+    print("    %s" % relocs[i][0])
+    f.writeUInt16(relocs[i][1])
+
+
+def craftMacho(filename, triple, relocs, dummyReloc):
+  print("Crafting " + filename + " for " + triple)
+
+  if type(dummyReloc) is tuple:
+    srcType, preSrc, dummyReloc, relocsPerDummy = dummyReloc
+    src = preSrc + "\n"
+    for i in range((len(relocs) + relocsPerDummy - 1) / relocsPerDummy):
+      src += dummyReloc.format(i) + "\n"
+    if srcType == "asm":
+      compileAsm(filename, triple, src)
+    elif srcType == "ir":
+      compileIR(filename, triple, src)
+  else:
+    src = (dummyReloc + "\n") * len(relocs)
+    compileAsm(filename, triple, src)
+
+  print("  Patching relocations...")
+  patchMacho(filename, relocs)
+
+def patchMacho(filename, relocs):
+  f = BinaryReader(filename)
+
+  magic = f.read(4)
+  if magic == '\xFE\xED\xFA\xCE':
+    f.isLSB, f.is64Bit = False, False
+  elif magic == '\xCE\xFA\xED\xFE':
+    f.isLSB, f.is64Bit = True, False
+  elif magic == '\xFE\xED\xFA\xCF':
+    f.isLSB, f.is64Bit = False, True
+  elif magic == '\xCF\xFA\xED\xFE':
+    f.isLSB, f.is64Bit = True, True
+  else:
+    raise ValueError,"Not a Mach-O object file: %r (bad magic)" % path
+
+  cputype = f.uint32()
+  cpusubtype = f.uint32()
+  filetype = f.uint32()
+  numLoadCommands = f.uint32()
+  loadCommandsSize = f.uint32()
+  flag = f.uint32()
+  if f.is64Bit:
+    reserved = f.uint32()
+
+  start = f.tell()
+
+  for i in range(numLoadCommands):
+    patchMachoLoadCommand(f, relocs)
+
+  if f.tell() - start != loadCommandsSize:
+    raise ValueError,"%s: warning: invalid load commands size: %r" % (
+      sys.argv[0], loadCommandsSize)
+
+def patchMachoLoadCommand(f, relocs):
+  start = f.tell()
+  cmd = f.uint32()
+  cmdSize = f.uint32()
+
+  if cmd == 1:
+    patchMachoSegmentLoadCommand(f, relocs)
+  elif cmd == 25:
+    patchMachoSegmentLoadCommand(f, relocs)
+  else:
+    f.read(cmdSize - 8)
+
+  if f.tell() - start != cmdSize:
+    raise ValueError,"%s: warning: invalid load command size: %r" % (
+      sys.argv[0], cmdSize)
+
+def patchMachoSegmentLoadCommand(f, relocs):
+  segment_name = f.read(16)
+  vm_addr = f.word()
+  vm_size = f.word()
+  file_offset = f.word()
+  file_size = f.word()
+  maxprot = f.uint32()
+  initprot = f.uint32()
+  numSections = f.uint32()
+  flags = f.uint32()
+  for i in range(numSections):
+    patchMachoSection(f, relocs)
+
+def patchMachoSection(f, relocs):
+  section_name = f.read(16)
+  segment_name = f.read(16)
+  address = f.word()
+  size = f.word()
+  offset = f.uint32()
+  alignment = f.uint32()
+  relocOffset = f.uint32()
+  numReloc = f.uint32()
+  flags = f.uint32()
+  reserved1 = f.uint32()
+  reserved2 = f.uint32()
+  if f.is64Bit:
+    reserved3 = f.uint32()
+
+  prev_pos = f.tell()
+
+  f.seek(relocOffset)
+  for i in range(numReloc):
+    ri = i < len(relocs) and i or 0
+    print("    %s" % relocs[ri][0])
+    word1 = f.uint32()
+    pos = f.tell()
+    value = f.uint32()
+    f.seek(pos)
+    value = (value & 0x0FFFFFFF) | ((relocs[ri][1] & 0xF) << 28)
+    f.writeUInt32(value)
+  f.seek(prev_pos)
+
+
+class Relocs_Elf_X86_64(Enum):
+  R_X86_64_NONE       = 0
+  R_X86_64_64         = 1
+  R_X86_64_PC32       = 2
+  R_X86_64_GOT32      = 3
+  R_X86_64_PLT32      = 4
+  R_X86_64_COPY       = 5
+  R_X86_64_GLOB_DAT   = 6
+  R_X86_64_JUMP_SLOT  = 7
+  R_X86_64_RELATIVE   = 8
+  R_X86_64_GOTPCREL   = 9
+  R_X86_64_32         = 10
+  R_X86_64_32S        = 11
+  R_X86_64_16         = 12
+  R_X86_64_PC16       = 13
+  R_X86_64_8          = 14
+  R_X86_64_PC8        = 15
+  R_X86_64_DTPMOD64   = 16
+  R_X86_64_DTPOFF64   = 17
+  R_X86_64_TPOFF64    = 18
+  R_X86_64_TLSGD      = 19
+  R_X86_64_TLSLD      = 20
+  R_X86_64_DTPOFF32   = 21
+  R_X86_64_GOTTPOFF   = 22
+  R_X86_64_TPOFF32    = 23
+  R_X86_64_PC64       = 24
+  R_X86_64_GOTOFF64   = 25
+  R_X86_64_GOTPC32    = 26
+  R_X86_64_GOT64      = 27
+  R_X86_64_GOTPCREL64 = 28
+  R_X86_64_GOTPC64    = 29
+  R_X86_64_GOTPLT64   = 30
+  R_X86_64_PLTOFF64   = 31
+  R_X86_64_SIZE32     = 32
+  R_X86_64_SIZE64     = 33
+  R_X86_64_GOTPC32_TLSDESC = 34
+  R_X86_64_TLSDESC_CALL    = 35
+  R_X86_64_TLSDESC    = 36
+  R_X86_64_IRELATIVE  = 37
+
+class Relocs_Elf_i386(Enum):
+  R_386_NONE          = 0
+  R_386_32            = 1
+  R_386_PC32          = 2
+  R_386_GOT32         = 3
+  R_386_PLT32         = 4
+  R_386_COPY          = 5
+  R_386_GLOB_DAT      = 6
+  R_386_JUMP_SLOT     = 7
+  R_386_RELATIVE      = 8
+  R_386_GOTOFF        = 9
+  R_386_GOTPC         = 10
+  R_386_32PLT         = 11
+  R_386_TLS_TPOFF     = 14
+  R_386_TLS_IE        = 15
+  R_386_TLS_GOTIE     = 16
+  R_386_TLS_LE        = 17
+  R_386_TLS_GD        = 18
+  R_386_TLS_LDM       = 19
+  R_386_16            = 20
+  R_386_PC16          = 21
+  R_386_8             = 22
+  R_386_PC8           = 23
+  R_386_TLS_GD_32     = 24
+  R_386_TLS_GD_PUSH   = 25
+  R_386_TLS_GD_CALL   = 26
+  R_386_TLS_GD_POP    = 27
+  R_386_TLS_LDM_32    = 28
+  R_386_TLS_LDM_PUSH  = 29
+  R_386_TLS_LDM_CALL  = 30
+  R_386_TLS_LDM_POP   = 31
+  R_386_TLS_LDO_32    = 32
+  R_386_TLS_IE_32     = 33
+  R_386_TLS_LE_32     = 34
+  R_386_TLS_DTPMOD32  = 35
+  R_386_TLS_DTPOFF32  = 36
+  R_386_TLS_TPOFF32   = 37
+  R_386_TLS_GOTDESC   = 39
+  R_386_TLS_DESC_CALL = 40
+  R_386_TLS_DESC      = 41
+  R_386_IRELATIVE     = 42
+  R_386_NUM           = 43
+
+class Relocs_Elf_MBlaze(Enum):
+  R_MICROBLAZE_NONE           = 0
+  R_MICROBLAZE_32             = 1
+  R_MICROBLAZE_32_PCREL       = 2
+  R_MICROBLAZE_64_PCREL       = 3
+  R_MICROBLAZE_32_PCREL_LO    = 4
+  R_MICROBLAZE_64             = 5
+  R_MICROBLAZE_32_LO          = 6
+  R_MICROBLAZE_SRO32          = 7
+  R_MICROBLAZE_SRW32          = 8
+  R_MICROBLAZE_64_NONE        = 9
+  R_MICROBLAZE_32_SYM_OP_SYM  = 10
+  R_MICROBLAZE_GNU_VTINHERIT  = 11
+  R_MICROBLAZE_GNU_VTENTRY    = 12
+  R_MICROBLAZE_GOTPC_64       = 13
+  R_MICROBLAZE_GOT_64         = 14
+  R_MICROBLAZE_PLT_64         = 15
+  R_MICROBLAZE_REL            = 16
+  R_MICROBLAZE_JUMP_SLOT      = 17
+  R_MICROBLAZE_GLOB_DAT       = 18
+  R_MICROBLAZE_GOTOFF_64      = 19
+  R_MICROBLAZE_GOTOFF_32      = 20
+  R_MICROBLAZE_COPY           = 21
+
+class Relocs_Elf_PPC32(Enum):
+  R_PPC_NONE                  = 0
+  R_PPC_ADDR32                = 1
+  R_PPC_ADDR24                = 2
+  R_PPC_ADDR16                = 3
+  R_PPC_ADDR16_LO             = 4
+  R_PPC_ADDR16_HI             = 5
+  R_PPC_ADDR16_HA             = 6
+  R_PPC_ADDR14                = 7
+  R_PPC_ADDR14_BRTAKEN        = 8
+  R_PPC_ADDR14_BRNTAKEN       = 9
+  R_PPC_REL24                 = 10
+  R_PPC_REL14                 = 11
+  R_PPC_REL14_BRTAKEN         = 12
+  R_PPC_REL14_BRNTAKEN        = 13
+  R_PPC_REL32                 = 26
+  R_PPC_TPREL16_LO            = 70
+  R_PPC_TPREL16_HA            = 72
+
+class Relocs_Elf_PPC64(Enum):
+  R_PPC64_NONE                = 0
+  R_PPC64_ADDR32              = 1
+  R_PPC64_ADDR16_LO           = 4
+  R_PPC64_ADDR16_HI           = 5
+  R_PPC64_ADDR14              = 7
+  R_PPC64_REL24               = 10
+  R_PPC64_REL32               = 26
+  R_PPC64_ADDR64              = 38
+  R_PPC64_ADDR16_HIGHER       = 39
+  R_PPC64_ADDR16_HIGHEST      = 41
+  R_PPC64_REL64               = 44
+  R_PPC64_TOC16               = 47
+  R_PPC64_TOC16_LO            = 48
+  R_PPC64_TOC16_HA            = 50
+  R_PPC64_TOC                 = 51
+  R_PPC64_ADDR16_DS           = 56
+  R_PPC64_ADDR16_LO_DS        = 57
+  R_PPC64_TOC16_DS            = 63
+  R_PPC64_TOC16_LO_DS         = 64
+  R_PPC64_TLS                 = 67
+  R_PPC64_TPREL16_LO          = 70
+  R_PPC64_TPREL16_HA          = 72
+  R_PPC64_DTPREL16_LO         = 75
+  R_PPC64_DTPREL16_HA         = 77
+  R_PPC64_GOT_TLSGD16_LO      = 80
+  R_PPC64_GOT_TLSGD16_HA      = 82
+  R_PPC64_GOT_TLSLD16_LO      = 84
+  R_PPC64_GOT_TLSLD16_HA      = 86
+  R_PPC64_GOT_TPREL16_LO_DS   = 88
+  R_PPC64_GOT_TPREL16_HA      = 90
+  R_PPC64_TLSGD               = 107
+  R_PPC64_TLSLD               = 108
+
+class Relocs_Elf_AArch64(Enum):
+  R_AARCH64_NONE                        = 0x100
+  R_AARCH64_ABS64                       = 0x101
+  R_AARCH64_ABS32                       = 0x102
+  R_AARCH64_ABS16                       = 0x103
+  R_AARCH64_PREL64                      = 0x104
+  R_AARCH64_PREL32                      = 0x105
+  R_AARCH64_PREL16                      = 0x106
+  R_AARCH64_MOVW_UABS_G0                = 0x107
+  R_AARCH64_MOVW_UABS_G0_NC             = 0x108
+  R_AARCH64_MOVW_UABS_G1                = 0x109
+  R_AARCH64_MOVW_UABS_G1_NC             = 0x10a
+  R_AARCH64_MOVW_UABS_G2                = 0x10b
+  R_AARCH64_MOVW_UABS_G2_NC             = 0x10c
+  R_AARCH64_MOVW_UABS_G3                = 0x10d
+  R_AARCH64_MOVW_SABS_G0                = 0x10e
+  R_AARCH64_MOVW_SABS_G1                = 0x10f
+  R_AARCH64_MOVW_SABS_G2                = 0x110
+  R_AARCH64_LD_PREL_LO19                = 0x111
+  R_AARCH64_ADR_PREL_LO21               = 0x112
+  R_AARCH64_ADR_PREL_PG_HI21            = 0x113
+  R_AARCH64_ADD_ABS_LO12_NC             = 0x115
+  R_AARCH64_LDST8_ABS_LO12_NC           = 0x116
+  R_AARCH64_TSTBR14                     = 0x117
+  R_AARCH64_CONDBR19                    = 0x118
+  R_AARCH64_JUMP26                      = 0x11a
+  R_AARCH64_CALL26                      = 0x11b
+  R_AARCH64_LDST16_ABS_LO12_NC          = 0x11c
+  R_AARCH64_LDST32_ABS_LO12_NC          = 0x11d
+  R_AARCH64_LDST64_ABS_LO12_NC          = 0x11e
+  R_AARCH64_LDST128_ABS_LO12_NC         = 0x12b
+  R_AARCH64_ADR_GOT_PAGE                = 0x137
+  R_AARCH64_LD64_GOT_LO12_NC            = 0x138
+  R_AARCH64_TLSLD_MOVW_DTPREL_G2        = 0x20b
+  R_AARCH64_TLSLD_MOVW_DTPREL_G1        = 0x20c
+  R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC     = 0x20d
+  R_AARCH64_TLSLD_MOVW_DTPREL_G0        = 0x20e
+  R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC     = 0x20f
+  R_AARCH64_TLSLD_ADD_DTPREL_HI12       = 0x210
+  R_AARCH64_TLSLD_ADD_DTPREL_LO12       = 0x211
+  R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC    = 0x212
+  R_AARCH64_TLSLD_LDST8_DTPREL_LO12     = 0x213
+  R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC  = 0x214
+  R_AARCH64_TLSLD_LDST16_DTPREL_LO12    = 0x215
+  R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC = 0x216
+  R_AARCH64_TLSLD_LDST32_DTPREL_LO12    = 0x217
+  R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC = 0x218
+  R_AARCH64_TLSLD_LDST64_DTPREL_LO12    = 0x219
+  R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC = 0x21a
+  R_AARCH64_TLSIE_MOVW_GOTTPREL_G1      = 0x21b
+  R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC   = 0x21c
+  R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21   = 0x21d
+  R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC = 0x21e
+  R_AARCH64_TLSIE_LD_GOTTPREL_PREL19    = 0x21f
+  R_AARCH64_TLSLE_MOVW_TPREL_G2         = 0x220
+  R_AARCH64_TLSLE_MOVW_TPREL_G1         = 0x221
+  R_AARCH64_TLSLE_MOVW_TPREL_G1_NC      = 0x222
+  R_AARCH64_TLSLE_MOVW_TPREL_G0         = 0x223
+  R_AARCH64_TLSLE_MOVW_TPREL_G0_NC      = 0x224
+  R_AARCH64_TLSLE_ADD_TPREL_HI12        = 0x225
+  R_AARCH64_TLSLE_ADD_TPREL_LO12        = 0x226
+  R_AARCH64_TLSLE_ADD_TPREL_LO12_NC     = 0x227
+  R_AARCH64_TLSLE_LDST8_TPREL_LO12      = 0x228
+  R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC   = 0x229
+  R_AARCH64_TLSLE_LDST16_TPREL_LO12     = 0x22a
+  R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC  = 0x22b
+  R_AARCH64_TLSLE_LDST32_TPREL_LO12     = 0x22c
+  R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC  = 0x22d
+  R_AARCH64_TLSLE_LDST64_TPREL_LO12     = 0x22e
+  R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC  = 0x22f
+  R_AARCH64_TLSDESC_ADR_PAGE            = 0x232
+  R_AARCH64_TLSDESC_LD64_LO12_NC        = 0x233
+  R_AARCH64_TLSDESC_ADD_LO12_NC         = 0x234
+  R_AARCH64_TLSDESC_CALL                = 0x239
+
+class Relocs_Elf_ARM(Enum):
+  R_ARM_NONE                  = 0x00
+  R_ARM_PC24                  = 0x01
+  R_ARM_ABS32                 = 0x02
+  R_ARM_REL32                 = 0x03
+  R_ARM_LDR_PC_G0             = 0x04
+  R_ARM_ABS16                 = 0x05
+  R_ARM_ABS12                 = 0x06
+  R_ARM_THM_ABS5              = 0x07
+  R_ARM_ABS8                  = 0x08
+  R_ARM_SBREL32               = 0x09
+  R_ARM_THM_CALL              = 0x0a
+  R_ARM_THM_PC8               = 0x0b
+  R_ARM_BREL_ADJ              = 0x0c
+  R_ARM_TLS_DESC              = 0x0d
+  R_ARM_THM_SWI8              = 0x0e
+  R_ARM_XPC25                 = 0x0f
+  R_ARM_THM_XPC22             = 0x10
+  R_ARM_TLS_DTPMOD32          = 0x11
+  R_ARM_TLS_DTPOFF32          = 0x12
+  R_ARM_TLS_TPOFF32           = 0x13
+  R_ARM_COPY                  = 0x14
+  R_ARM_GLOB_DAT              = 0x15
+  R_ARM_JUMP_SLOT             = 0x16
+  R_ARM_RELATIVE              = 0x17
+  R_ARM_GOTOFF32              = 0x18
+  R_ARM_BASE_PREL             = 0x19
+  R_ARM_GOT_BREL              = 0x1a
+  R_ARM_PLT32                 = 0x1b
+  R_ARM_CALL                  = 0x1c
+  R_ARM_JUMP24                = 0x1d
+  R_ARM_THM_JUMP24            = 0x1e
+  R_ARM_BASE_ABS              = 0x1f
+  R_ARM_ALU_PCREL_7_0         = 0x20
+  R_ARM_ALU_PCREL_15_8        = 0x21
+  R_ARM_ALU_PCREL_23_15       = 0x22
+  R_ARM_LDR_SBREL_11_0_NC     = 0x23
+  R_ARM_ALU_SBREL_19_12_NC    = 0x24
+  R_ARM_ALU_SBREL_27_20_CK    = 0x25
+  R_ARM_TARGET1               = 0x26
+  R_ARM_SBREL31               = 0x27
+  R_ARM_V4BX                  = 0x28
+  R_ARM_TARGET2               = 0x29
+  R_ARM_PREL31                = 0x2a
+  R_ARM_MOVW_ABS_NC           = 0x2b
+  R_ARM_MOVT_ABS              = 0x2c
+  R_ARM_MOVW_PREL_NC          = 0x2d
+  R_ARM_MOVT_PREL             = 0x2e
+  R_ARM_THM_MOVW_ABS_NC       = 0x2f
+  R_ARM_THM_MOVT_ABS          = 0x30
+  R_ARM_THM_MOVW_PREL_NC      = 0x31
+  R_ARM_THM_MOVT_PREL         = 0x32
+  R_ARM_THM_JUMP19            = 0x33
+  R_ARM_THM_JUMP6             = 0x34
+  R_ARM_THM_ALU_PREL_11_0     = 0x35
+  R_ARM_THM_PC12              = 0x36
+  R_ARM_ABS32_NOI             = 0x37
+  R_ARM_REL32_NOI             = 0x38
+  R_ARM_ALU_PC_G0_NC          = 0x39
+  R_ARM_ALU_PC_G0             = 0x3a
+  R_ARM_ALU_PC_G1_NC          = 0x3b
+  R_ARM_ALU_PC_G1             = 0x3c
+  R_ARM_ALU_PC_G2             = 0x3d
+  R_ARM_LDR_PC_G1             = 0x3e
+  R_ARM_LDR_PC_G2             = 0x3f
+  R_ARM_LDRS_PC_G0            = 0x40
+  R_ARM_LDRS_PC_G1            = 0x41
+  R_ARM_LDRS_PC_G2            = 0x42
+  R_ARM_LDC_PC_G0             = 0x43
+  R_ARM_LDC_PC_G1             = 0x44
+  R_ARM_LDC_PC_G2             = 0x45
+  R_ARM_ALU_SB_G0_NC          = 0x46
+  R_ARM_ALU_SB_G0             = 0x47
+  R_ARM_ALU_SB_G1_NC          = 0x48
+  R_ARM_ALU_SB_G1             = 0x49
+  R_ARM_ALU_SB_G2             = 0x4a
+  R_ARM_LDR_SB_G0             = 0x4b
+  R_ARM_LDR_SB_G1             = 0x4c
+  R_ARM_LDR_SB_G2             = 0x4d
+  R_ARM_LDRS_SB_G0            = 0x4e
+  R_ARM_LDRS_SB_G1            = 0x4f
+  R_ARM_LDRS_SB_G2            = 0x50
+  R_ARM_LDC_SB_G0             = 0x51
+  R_ARM_LDC_SB_G1             = 0x52
+  R_ARM_LDC_SB_G2             = 0x53
+  R_ARM_MOVW_BREL_NC          = 0x54
+  R_ARM_MOVT_BREL             = 0x55
+  R_ARM_MOVW_BREL             = 0x56
+  R_ARM_THM_MOVW_BREL_NC      = 0x57
+  R_ARM_THM_MOVT_BREL         = 0x58
+  R_ARM_THM_MOVW_BREL         = 0x59
+  R_ARM_TLS_GOTDESC           = 0x5a
+  R_ARM_TLS_CALL              = 0x5b
+  R_ARM_TLS_DESCSEQ           = 0x5c
+  R_ARM_THM_TLS_CALL          = 0x5d
+  R_ARM_PLT32_ABS             = 0x5e
+  R_ARM_GOT_ABS               = 0x5f
+  R_ARM_GOT_PREL              = 0x60
+  R_ARM_GOT_BREL12            = 0x61
+  R_ARM_GOTOFF12              = 0x62
+  R_ARM_GOTRELAX              = 0x63
+  R_ARM_GNU_VTENTRY           = 0x64
+  R_ARM_GNU_VTINHERIT         = 0x65
+  R_ARM_THM_JUMP11            = 0x66
+  R_ARM_THM_JUMP8             = 0x67
+  R_ARM_TLS_GD32              = 0x68
+  R_ARM_TLS_LDM32             = 0x69
+  R_ARM_TLS_LDO32             = 0x6a
+  R_ARM_TLS_IE32              = 0x6b
+  R_ARM_TLS_LE32              = 0x6c
+  R_ARM_TLS_LDO12             = 0x6d
+  R_ARM_TLS_LE12              = 0x6e
+  R_ARM_TLS_IE12GP            = 0x6f
+  R_ARM_PRIVATE_0             = 0x70
+  R_ARM_PRIVATE_1             = 0x71
+  R_ARM_PRIVATE_2             = 0x72
+  R_ARM_PRIVATE_3             = 0x73
+  R_ARM_PRIVATE_4             = 0x74
+  R_ARM_PRIVATE_5             = 0x75
+  R_ARM_PRIVATE_6             = 0x76
+  R_ARM_PRIVATE_7             = 0x77
+  R_ARM_PRIVATE_8             = 0x78
+  R_ARM_PRIVATE_9             = 0x79
+  R_ARM_PRIVATE_10            = 0x7a
+  R_ARM_PRIVATE_11            = 0x7b
+  R_ARM_PRIVATE_12            = 0x7c
+  R_ARM_PRIVATE_13            = 0x7d
+  R_ARM_PRIVATE_14            = 0x7e
+  R_ARM_PRIVATE_15            = 0x7f
+  R_ARM_ME_TOO                = 0x80
+  R_ARM_THM_TLS_DESCSEQ16     = 0x81
+  R_ARM_THM_TLS_DESCSEQ32     = 0x82
+
+class Relocs_Elf_Mips(Enum):
+  R_MIPS_NONE              =  0
+  R_MIPS_16                =  1
+  R_MIPS_32                =  2
+  R_MIPS_REL32             =  3
+  R_MIPS_26                =  4
+  R_MIPS_HI16              =  5
+  R_MIPS_LO16              =  6
+  R_MIPS_GPREL16           =  7
+  R_MIPS_LITERAL           =  8
+  R_MIPS_GOT16             =  9
+  R_MIPS_PC16              = 10
+  R_MIPS_CALL16            = 11
+  R_MIPS_GPREL32           = 12
+  R_MIPS_SHIFT5            = 16
+  R_MIPS_SHIFT6            = 17
+  R_MIPS_64                = 18
+  R_MIPS_GOT_DISP          = 19
+  R_MIPS_GOT_PAGE          = 20
+  R_MIPS_GOT_OFST          = 21
+  R_MIPS_GOT_HI16          = 22
+  R_MIPS_GOT_LO16          = 23
+  R_MIPS_SUB               = 24
+  R_MIPS_INSERT_A          = 25
+  R_MIPS_INSERT_B          = 26
+  R_MIPS_DELETE            = 27
+  R_MIPS_HIGHER            = 28
+  R_MIPS_HIGHEST           = 29
+  R_MIPS_CALL_HI16         = 30
+  R_MIPS_CALL_LO16         = 31
+  R_MIPS_SCN_DISP          = 32
+  R_MIPS_REL16             = 33
+  R_MIPS_ADD_IMMEDIATE     = 34
+  R_MIPS_PJUMP             = 35
+  R_MIPS_RELGOT            = 36
+  R_MIPS_JALR              = 37
+  R_MIPS_TLS_DTPMOD32      = 38
+  R_MIPS_TLS_DTPREL32      = 39
+  R_MIPS_TLS_DTPMOD64      = 40
+  R_MIPS_TLS_DTPREL64      = 41
+  R_MIPS_TLS_GD            = 42
+  R_MIPS_TLS_LDM           = 43
+  R_MIPS_TLS_DTPREL_HI16   = 44
+  R_MIPS_TLS_DTPREL_LO16   = 45
+  R_MIPS_TLS_GOTTPREL      = 46
+  R_MIPS_TLS_TPREL32       = 47
+  R_MIPS_TLS_TPREL64       = 48
+  R_MIPS_TLS_TPREL_HI16    = 49
+  R_MIPS_TLS_TPREL_LO16    = 50
+  R_MIPS_GLOB_DAT          = 51
+  R_MIPS_COPY              = 126
+  R_MIPS_JUMP_SLOT         = 127
+  R_MIPS_NUM               = 218
+
+class Relocs_Elf_Hexagon(Enum):
+  R_HEX_NONE              =  0
+  R_HEX_B22_PCREL         =  1
+  R_HEX_B15_PCREL         =  2
+  R_HEX_B7_PCREL          =  3
+  R_HEX_LO16              =  4
+  R_HEX_HI16              =  5
+  R_HEX_32                =  6
+  R_HEX_16                =  7
+  R_HEX_8                 =  8
+  R_HEX_GPREL16_0         =  9
+  R_HEX_GPREL16_1         =  10
+  R_HEX_GPREL16_2         =  11
+  R_HEX_GPREL16_3         =  12
+  R_HEX_HL16              =  13
+  R_HEX_B13_PCREL         =  14
+  R_HEX_B9_PCREL          =  15
+  R_HEX_B32_PCREL_X       =  16
+  R_HEX_32_6_X            =  17
+  R_HEX_B22_PCREL_X       =  18
+  R_HEX_B15_PCREL_X       =  19
+  R_HEX_B13_PCREL_X       =  20
+  R_HEX_B9_PCREL_X        =  21
+  R_HEX_B7_PCREL_X        =  22
+  R_HEX_16_X              =  23
+  R_HEX_12_X              =  24
+  R_HEX_11_X              =  25
+  R_HEX_10_X              =  26
+  R_HEX_9_X               =  27
+  R_HEX_8_X               =  28
+  R_HEX_7_X               =  29
+  R_HEX_6_X               =  30
+  R_HEX_32_PCREL          =  31
+  R_HEX_COPY              =  32
+  R_HEX_GLOB_DAT          =  33
+  R_HEX_JMP_SLOT          =  34
+  R_HEX_RELATIVE          =  35
+  R_HEX_PLT_B22_PCREL     =  36
+  R_HEX_GOTREL_LO16       =  37
+  R_HEX_GOTREL_HI16       =  38
+  R_HEX_GOTREL_32         =  39
+  R_HEX_GOT_LO16          =  40
+  R_HEX_GOT_HI16          =  41
+  R_HEX_GOT_32            =  42
+  R_HEX_GOT_16            =  43
+  R_HEX_DTPMOD_32         =  44
+  R_HEX_DTPREL_LO16       =  45
+  R_HEX_DTPREL_HI16       =  46
+  R_HEX_DTPREL_32         =  47
+  R_HEX_DTPREL_16         =  48
+  R_HEX_GD_PLT_B22_PCREL  =  49
+  R_HEX_GD_GOT_LO16       =  50
+  R_HEX_GD_GOT_HI16       =  51
+  R_HEX_GD_GOT_32         =  52
+  R_HEX_GD_GOT_16         =  53
+  R_HEX_IE_LO16           =  54
+  R_HEX_IE_HI16           =  55
+  R_HEX_IE_32             =  56
+  R_HEX_IE_GOT_LO16       =  57
+  R_HEX_IE_GOT_HI16       =  58
+  R_HEX_IE_GOT_32         =  59
+  R_HEX_IE_GOT_16         =  60
+  R_HEX_TPREL_LO16        =  61
+  R_HEX_TPREL_HI16        =  62
+  R_HEX_TPREL_32          =  63
+  R_HEX_TPREL_16          =  64
+  R_HEX_6_PCREL_X         =  65
+  R_HEX_GOTREL_32_6_X     =  66
+  R_HEX_GOTREL_16_X       =  67
+  R_HEX_GOTREL_11_X       =  68
+  R_HEX_GOT_32_6_X        =  69
+  R_HEX_GOT_16_X          =  70
+  R_HEX_GOT_11_X          =  71
+  R_HEX_DTPREL_32_6_X     =  72
+  R_HEX_DTPREL_16_X       =  73
+  R_HEX_DTPREL_11_X       =  74
+  R_HEX_GD_GOT_32_6_X     =  75
+  R_HEX_GD_GOT_16_X       =  76
+  R_HEX_GD_GOT_11_X       =  77
+  R_HEX_IE_32_6_X         =  78
+  R_HEX_IE_16_X           =  79
+  R_HEX_IE_GOT_32_6_X     =  80
+  R_HEX_IE_GOT_16_X       =  81
+  R_HEX_IE_GOT_11_X       =  82
+  R_HEX_TPREL_32_6_X      =  83
+  R_HEX_TPREL_16_X        =  84
+  R_HEX_TPREL_11_X        =  85
+
+
+class Relocs_Coff_i386(Enum):
+  IMAGE_REL_I386_ABSOLUTE = 0x0000
+  IMAGE_REL_I386_DIR16    = 0x0001
+  IMAGE_REL_I386_REL16    = 0x0002
+  IMAGE_REL_I386_DIR32    = 0x0006
+  IMAGE_REL_I386_DIR32NB  = 0x0007
+  IMAGE_REL_I386_SEG12    = 0x0009
+  IMAGE_REL_I386_SECTION  = 0x000A
+  IMAGE_REL_I386_SECREL   = 0x000B
+  IMAGE_REL_I386_TOKEN    = 0x000C
+  IMAGE_REL_I386_SECREL7  = 0x000D
+  IMAGE_REL_I386_REL32    = 0x0014
+
+class Relocs_Coff_X86_64(Enum):
+  IMAGE_REL_AMD64_ABSOLUTE  = 0x0000
+  IMAGE_REL_AMD64_ADDR64    = 0x0001
+  IMAGE_REL_AMD64_ADDR32    = 0x0002
+  IMAGE_REL_AMD64_ADDR32NB  = 0x0003
+  IMAGE_REL_AMD64_REL32     = 0x0004
+  IMAGE_REL_AMD64_REL32_1   = 0x0005
+  IMAGE_REL_AMD64_REL32_2   = 0x0006
+  IMAGE_REL_AMD64_REL32_3   = 0x0007
+  IMAGE_REL_AMD64_REL32_4   = 0x0008
+  IMAGE_REL_AMD64_REL32_5   = 0x0009
+  IMAGE_REL_AMD64_SECTION   = 0x000A
+  IMAGE_REL_AMD64_SECREL    = 0x000B
+  IMAGE_REL_AMD64_SECREL7   = 0x000C
+  IMAGE_REL_AMD64_TOKEN     = 0x000D
+  IMAGE_REL_AMD64_SREL32    = 0x000E
+  IMAGE_REL_AMD64_PAIR      = 0x000F
+  IMAGE_REL_AMD64_SSPAN32   = 0x0010
+
+class Relocs_Coff_ARM(Enum):
+  IMAGE_REL_ARM_ABSOLUTE  = 0x0000
+  IMAGE_REL_ARM_ADDR32    = 0x0001
+  IMAGE_REL_ARM_ADDR32NB  = 0x0002
+  IMAGE_REL_ARM_BRANCH24  = 0x0003
+  IMAGE_REL_ARM_BRANCH11  = 0x0004
+  IMAGE_REL_ARM_TOKEN     = 0x0005
+  IMAGE_REL_ARM_BLX24     = 0x0008
+  IMAGE_REL_ARM_BLX11     = 0x0009
+  IMAGE_REL_ARM_SECTION   = 0x000E
+  IMAGE_REL_ARM_SECREL    = 0x000F
+  IMAGE_REL_ARM_MOV32A    = 0x0010
+  IMAGE_REL_ARM_MOV32T    = 0x0011
+  IMAGE_REL_ARM_BRANCH20T = 0x0012
+  IMAGE_REL_ARM_BRANCH24T = 0x0014
+  IMAGE_REL_ARM_BLX23T    = 0x0015
+
+
+class Relocs_Macho_i386(Enum):
+  RIT_Vanilla                     = 0
+  RIT_Pair                        = 1
+  RIT_Difference                  = 2
+  RIT_Generic_PreboundLazyPointer = 3
+  RIT_Generic_LocalDifference     = 4
+  RIT_Generic_TLV                 = 5
+
+class Relocs_Macho_X86_64(Enum):
+  RIT_X86_64_Unsigned   = 0
+  RIT_X86_64_Signed     = 1
+  RIT_X86_64_Branch     = 2
+  RIT_X86_64_GOTLoad    = 3
+  RIT_X86_64_GOT        = 4
+  RIT_X86_64_Subtractor = 5
+  RIT_X86_64_Signed1    = 6
+  RIT_X86_64_Signed2    = 7
+  RIT_X86_64_Signed4    = 8
+  RIT_X86_64_TLV        = 9
+
+class Relocs_Macho_ARM(Enum):
+  RIT_Vanilla                     = 0
+  RIT_Pair                        = 1
+  RIT_Difference                  = 2
+  RIT_ARM_LocalDifference         = 3
+  RIT_ARM_PreboundLazyPointer     = 4
+  RIT_ARM_Branch24Bit             = 5
+  RIT_ARM_ThumbBranch22Bit        = 6
+  RIT_ARM_ThumbBranch32Bit        = 7
+  RIT_ARM_Half                    = 8
+  RIT_ARM_HalfDifference          = 9
+
+class Relocs_Macho_PPC(Enum):
+  PPC_RELOC_VANILLA        = 0
+  PPC_RELOC_PAIR           = 1
+  PPC_RELOC_BR14           = 2
+  PPC_RELOC_BR24           = 3
+  PPC_RELOC_HI16           = 4
+  PPC_RELOC_LO16           = 5
+  PPC_RELOC_HA16           = 6
+  PPC_RELOC_LO14           = 7
+  PPC_RELOC_SECTDIFF       = 8
+  PPC_RELOC_PB_LA_PTR      = 9
+  PPC_RELOC_HI16_SECTDIFF  = 10
+  PPC_RELOC_LO16_SECTDIFF  = 11
+  PPC_RELOC_HA16_SECTDIFF  = 12
+  PPC_RELOC_JBSR           = 13
+  PPC_RELOC_LO14_SECTDIFF  = 14
+  PPC_RELOC_LOCAL_SECTDIFF = 15
+
+
+craftElf("relocs.obj.elf-x86_64",   "x86_64-pc-linux-gnu",         Relocs_Elf_X86_64.entries(), "leaq sym@GOTTPOFF(%rip), %rax")
+craftElf("relocs.obj.elf-i386",     "i386-pc-linux-gnu",           Relocs_Elf_i386.entries(),   "mov sym@GOTOFF(%ebx), %eax")
+#craftElf("relocs-elf-ppc32",   "powerpc-unknown-linux-gnu",   Relocs_Elf_PPC32.entries(), ...)
+craftElf("relocs.obj.elf-ppc64",   "powerpc64-unknown-linux-gnu", Relocs_Elf_PPC64.entries(),
+         ("@t = thread_local global i32 0, align 4", "define i32* @f{0}() nounwind {{ ret i32* @t }}", 2))
+craftElf("relocs.obj.elf-aarch64",  "aarch64",                     Relocs_Elf_AArch64.entries(), "movz x0, #:abs_g0:sym")
+craftElf("relocs.obj.elf-arm",      "arm-unknown-unknown",         Relocs_Elf_ARM.entries(), "b sym")
+craftElf("relocs.obj.elf-mips",     "mips-unknown-linux",          Relocs_Elf_Mips.entries(), "lui $2, %hi(sym)")
+craftElf("relocs.obj.elf-mips64el", "mips64el-unknown-linux",        Relocs_Elf_Mips.entries(), "lui $2, %hi(sym)")
+#craftElf("relocs.obj.elf-mblaze",   "mblaze-unknown-unknown",      Relocs_Elf_MBlaze.entries(), ...)
+#craftElf("relocs.obj.elf-hexagon",  "hexagon-unknown-unknown",     Relocs_Elf_Hexagon.entries(), ...)
+
+craftCoff("relocs.obj.coff-i386",   "i386-pc-win32",   Relocs_Coff_i386.entries(),   "mov foo@imgrel(%ebx, %ecx, 4), %eax")
+craftCoff("relocs.obj.coff-x86_64", "x86_64-pc-win32", Relocs_Coff_X86_64.entries(), "mov foo@imgrel(%ebx, %ecx, 4), %eax")
+#craftCoff("relocs.obj.coff-arm",    "arm-pc-win32",    Relocs_Coff_ARM.entries(), "...")
+
+craftMacho("relocs.obj.macho-i386",   "i386-apple-darwin9", Relocs_Macho_i386.entries(),
+          ("asm", ".subsections_via_symbols; .text; a: ; b:", "call a", 1))
+craftMacho("relocs.obj.macho-x86_64", "x86_64-apple-darwin9", Relocs_Macho_X86_64.entries(),
+          ("asm", ".subsections_via_symbols; .text; a: ; b:", "call a", 1))
+craftMacho("relocs.obj.macho-arm",    "armv7-apple-darwin10", Relocs_Macho_ARM.entries(), "bl sym")
+#craftMacho("relocs.obj.macho-ppc",   "powerpc-apple-darwin10", Relocs_Macho_PPC.entries(), ...)
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.macho-arm b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-arm
new file mode 100644
index 0000000..117df9e
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-arm
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.macho-ppc b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-ppc
new file mode 100644
index 0000000..dd2e956
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-ppc
diff --git a/test/tools/llvm-readobj/Inputs/trivial.obj.macho-ppc64 b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-ppc64
new file mode 100644
index 0000000..20ec8ef
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/trivial.obj.macho-ppc64
diff --git a/test/tools/llvm-readobj/program-headers.test b/test/tools/llvm-readobj/program-headers.test
new file mode 100644
index 0000000..2a574bb
--- /dev/null
+++ b/test/tools/llvm-readobj/program-headers.test
@@ -0,0 +1,74 @@
+RUN: llvm-readobj -program-headers %p/../../Object/Inputs/program-headers.elf-i386 \
+RUN:     | FileCheck %s -check-prefix ELF-I386
+RUN: llvm-readobj -program-headers %p/../../Object/Inputs/program-headers.elf-x86-64 \
+RUN:     | FileCheck %s -check-prefix ELF-X86-64
+
+ELF-I386:      ProgramHeaders [
+ELF-I386-NEXT:   ProgramHeader {
+ELF-I386-NEXT:     Type: PT_LOAD (0x1)
+ELF-I386-NEXT:     Offset: 0x0
+ELF-I386-NEXT:     VirtualAddress: 0x8048000
+ELF-I386-NEXT:     PhysicalAddress: 0x8048000
+ELF-I386-NEXT:     FileSize: 308
+ELF-I386-NEXT:     MemSize: 308
+ELF-I386-NEXT:     Flags [ (0x5)
+ELF-I386-NEXT:       PF_R (0x4)
+ELF-I386-NEXT:       PF_X (0x1)
+ELF-I386-NEXT:     ]
+ELF-I386-NEXT:     Alignment: 4096
+ELF-I386-NEXT:   }
+ELF-I386-NEXT:   ProgramHeader {
+ELF-I386-NEXT:     Type: PT_GNU_STACK (0x6474E551)
+ELF-I386-NEXT:     Offset: 0x0
+ELF-I386-NEXT:     VirtualAddress: 0x0
+ELF-I386-NEXT:     PhysicalAddress: 0x0
+ELF-I386-NEXT:     FileSize: 0
+ELF-I386-NEXT:     MemSize: 0
+ELF-I386-NEXT:     Flags [ (0x6)
+ELF-I386-NEXT:       PF_R (0x4)
+ELF-I386-NEXT:       PF_W (0x2)
+ELF-I386-NEXT:     ]
+ELF-I386-NEXT:     Alignment: 4
+ELF-I386-NEXT:   }
+ELF-I386-NEXT: ]
+
+ELF-X86-64:      ProgramHeaders [
+ELF-X86-64-NEXT:   ProgramHeader {
+ELF-X86-64-NEXT:     Type: PT_LOAD (0x1)
+ELF-X86-64-NEXT:     Offset: 0x0
+ELF-X86-64-NEXT:     VirtualAddress: 0x400000
+ELF-X86-64-NEXT:     PhysicalAddress: 0x400000
+ELF-X86-64-NEXT:     FileSize: 312
+ELF-X86-64-NEXT:     MemSize: 312
+ELF-X86-64-NEXT:     Flags [ (0x5)
+ELF-X86-64-NEXT:       PF_R (0x4)
+ELF-X86-64-NEXT:       PF_X (0x1)
+ELF-X86-64-NEXT:     ]
+ELF-X86-64-NEXT:     Alignment: 2097152
+ELF-X86-64-NEXT:   }
+ELF-X86-64-NEXT:   ProgramHeader {
+ELF-X86-64-NEXT:     Type: PT_GNU_EH_FRAME (0x6474E550)
+ELF-X86-64-NEXT:     Offset: 0xF4
+ELF-X86-64-NEXT:     VirtualAddress: 0x4000F4
+ELF-X86-64-NEXT:     PhysicalAddress: 0x4000F4
+ELF-X86-64-NEXT:     FileSize: 20
+ELF-X86-64-NEXT:     MemSize: 20
+ELF-X86-64-NEXT:     Flags [ (0x4)
+ELF-X86-64-NEXT:       PF_R (0x4)
+ELF-X86-64-NEXT:     ]
+ELF-X86-64-NEXT:     Alignment: 4
+ELF-X86-64-NEXT:   }
+ELF-X86-64-NEXT:   ProgramHeader {
+ELF-X86-64-NEXT:     Type: PT_GNU_STACK (0x6474E551)
+ELF-X86-64-NEXT:     Offset: 0x0
+ELF-X86-64-NEXT:     VirtualAddress: 0x0
+ELF-X86-64-NEXT:     PhysicalAddress: 0x0
+ELF-X86-64-NEXT:     FileSize: 0
+ELF-X86-64-NEXT:     MemSize: 0
+ELF-X86-64-NEXT:     Flags [ (0x6)
+ELF-X86-64-NEXT:       PF_R (0x4)
+ELF-X86-64-NEXT:       PF_W (0x2)
+ELF-X86-64-NEXT:     ]
+ELF-X86-64-NEXT:     Alignment: 8
+ELF-X86-64-NEXT:   }
+ELF-X86-64-NEXT: ]
diff --git a/test/tools/llvm-readobj/reloc-types.test b/test/tools/llvm-readobj/reloc-types.test
new file mode 100644
index 0000000..08603bc
--- /dev/null
+++ b/test/tools/llvm-readobj/reloc-types.test
@@ -0,0 +1,663 @@
+// Test that libObject and subsequently llvm-readobj shows proper relocation type
+// names and values.
+
+// Todo: ELF-PPC, ELF-HEXAGON
+
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-i386     | FileCheck %s -check-prefix ELF-32
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-x86_64   | FileCheck %s -check-prefix ELF-64
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-aarch64  | FileCheck %s -check-prefix ELF-AARCH64
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-arm      | FileCheck %s -check-prefix ELF-ARM
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-mips     | FileCheck %s -check-prefix ELF-MIPS
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-mips64el | FileCheck %s -check-prefix ELF-MIPS64EL
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.elf-ppc64    | FileCheck %s -check-prefix ELF-PPC64
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.coff-i386    | FileCheck %s -check-prefix COFF-32
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.coff-x86_64  | FileCheck %s -check-prefix COFF-64
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.macho-arm    | FileCheck %s -check-prefix MACHO-ARM
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.macho-i386   | FileCheck %s -check-prefix MACHO-32
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/relocs.obj.macho-x86_64 | FileCheck %s -check-prefix MACHO-64
+
+
+ELF-32: Type: R_386_NONE (0)
+ELF-32: Type: R_386_32 (1)
+ELF-32: Type: R_386_PC32 (2)
+ELF-32: Type: R_386_GOT32 (3)
+ELF-32: Type: R_386_PLT32 (4)
+ELF-32: Type: R_386_COPY (5)
+ELF-32: Type: R_386_GLOB_DAT (6)
+ELF-32: Type: R_386_JUMP_SLOT (7)
+ELF-32: Type: R_386_RELATIVE (8)
+ELF-32: Type: R_386_GOTOFF (9)
+ELF-32: Type: R_386_GOTPC (10)
+ELF-32: Type: R_386_32PLT (11)
+ELF-32: Type: R_386_TLS_TPOFF (14)
+ELF-32: Type: R_386_TLS_IE (15)
+ELF-32: Type: R_386_TLS_GOTIE (16)
+ELF-32: Type: R_386_TLS_LE (17)
+ELF-32: Type: R_386_TLS_GD (18)
+ELF-32: Type: R_386_TLS_LDM (19)
+ELF-32: Type: R_386_16 (20)
+ELF-32: Type: R_386_PC16 (21)
+ELF-32: Type: R_386_8 (22)
+ELF-32: Type: R_386_PC8 (23)
+ELF-32: Type: R_386_TLS_GD_32 (24)
+ELF-32: Type: R_386_TLS_GD_PUSH (25)
+ELF-32: Type: R_386_TLS_GD_CALL (26)
+ELF-32: Type: R_386_TLS_GD_POP (27)
+ELF-32: Type: R_386_TLS_LDM_32 (28)
+ELF-32: Type: R_386_TLS_LDM_PUSH (29)
+ELF-32: Type: R_386_TLS_LDM_CALL (30)
+ELF-32: Type: R_386_TLS_LDM_POP (31)
+ELF-32: Type: R_386_TLS_LDO_32 (32)
+ELF-32: Type: R_386_TLS_IE_32 (33)
+ELF-32: Type: R_386_TLS_LE_32 (34)
+ELF-32: Type: R_386_TLS_DTPMOD32 (35)
+ELF-32: Type: R_386_TLS_DTPOFF32 (36)
+ELF-32: Type: R_386_TLS_TPOFF32 (37)
+ELF-32: Type: R_386_TLS_GOTDESC (39)
+ELF-32: Type: R_386_TLS_DESC_CALL (40)
+ELF-32: Type: R_386_TLS_DESC (41)
+ELF-32: Type: R_386_IRELATIVE (42)
+_LF-32: Type: R_386_NUM (43)
+
+ELF-64: Type: R_X86_64_NONE (0)
+ELF-64: Type: R_X86_64_64 (1)
+ELF-64: Type: R_X86_64_PC32 (2)
+ELF-64: Type: R_X86_64_GOT32 (3)
+ELF-64: Type: R_X86_64_PLT32 (4)
+ELF-64: Type: R_X86_64_COPY (5)
+ELF-64: Type: R_X86_64_GLOB_DAT (6)
+ELF-64: Type: R_X86_64_JUMP_SLOT (7)
+ELF-64: Type: R_X86_64_RELATIVE (8)
+ELF-64: Type: R_X86_64_GOTPCREL (9)
+ELF-64: Type: R_X86_64_32 (10)
+ELF-64: Type: R_X86_64_32S (11)
+ELF-64: Type: R_X86_64_16 (12)
+ELF-64: Type: R_X86_64_PC16 (13)
+ELF-64: Type: R_X86_64_8 (14)
+ELF-64: Type: R_X86_64_PC8 (15)
+ELF-64: Type: R_X86_64_DTPMOD64 (16)
+ELF-64: Type: R_X86_64_DTPOFF64 (17)
+ELF-64: Type: R_X86_64_TPOFF64 (18)
+ELF-64: Type: R_X86_64_TLSGD (19)
+ELF-64: Type: R_X86_64_TLSLD (20)
+ELF-64: Type: R_X86_64_DTPOFF32 (21)
+ELF-64: Type: R_X86_64_GOTTPOFF (22)
+ELF-64: Type: R_X86_64_TPOFF32 (23)
+ELF-64: Type: R_X86_64_PC64 (24)
+ELF-64: Type: R_X86_64_GOTOFF64 (25)
+ELF-64: Type: R_X86_64_GOTPC32 (26)
+ELF-64: Type: R_X86_64_GOT64 (27)
+ELF-64: Type: R_X86_64_GOTPCREL64 (28)
+ELF-64: Type: R_X86_64_GOTPC64 (29)
+ELF-64: Type: R_X86_64_GOTPLT64 (30)
+ELF-64: Type: R_X86_64_PLTOFF64 (31)
+ELF-64: Type: R_X86_64_SIZE32 (32)
+ELF-64: Type: R_X86_64_SIZE64 (33)
+ELF-64: Type: R_X86_64_GOTPC32_TLSDESC (34)
+ELF-64: Type: R_X86_64_TLSDESC_CALL (35)
+ELF-64: Type: R_X86_64_TLSDESC (36)
+ELF-64: Type: R_X86_64_IRELATIVE (37)
+
+ELF-PPC: Type: R_PPC_NONE (0)
+ELF-PPC: Type: R_PPC_ADDR32 (1)
+ELF-PPC: Type: R_PPC_ADDR24 (2)
+ELF-PPC: Type: R_PPC_ADDR16 (3)
+ELF-PPC: Type: R_PPC_ADDR16_LO (4)
+ELF-PPC: Type: R_PPC_ADDR16_HI (5)
+ELF-PPC: Type: R_PPC_ADDR16_HA (6)
+ELF-PPC: Type: R_PPC_ADDR14 (7)
+ELF-PPC: Type: R_PPC_ADDR14_BRTAKEN (8)
+ELF-PPC: Type: R_PPC_ADDR14_BRNTAKEN (9)
+ELF-PPC: Type: R_PPC_REL24 (10)
+ELF-PPC: Type: R_PPC_REL14 (11)
+ELF-PPC: Type: R_PPC_REL14_BRTAKEN (12)
+ELF-PPC: Type: R_PPC_REL14_BRNTAKEN (13)
+ELF-PPC: Type: R_PPC_REL32 (26)
+ELF-PPC: Type: R_PPC_TPREL16_LO (70)
+ELF-PPC: Type: R_PPC_TPREL16_HA (72)
+
+ELF-PPC64: Type: R_PPC64_NONE (0)
+ELF-PPC64: Type: R_PPC64_ADDR32 (1)
+ELF-PPC64: Type: R_PPC64_ADDR16_LO (4)
+ELF-PPC64: Type: R_PPC64_ADDR16_HI (5)
+ELF-PPC64: Type: R_PPC64_ADDR14 (7)
+ELF-PPC64: Type: R_PPC64_REL24 (10)
+ELF-PPC64: Type: R_PPC64_REL32 (26)
+ELF-PPC64: Type: R_PPC64_ADDR64 (38)
+ELF-PPC64: Type: R_PPC64_ADDR16_HIGHER (39)
+ELF-PPC64: Type: R_PPC64_ADDR16_HIGHEST (41)
+ELF-PPC64: Type: R_PPC64_REL64 (44)
+ELF-PPC64: Type: R_PPC64_TOC16 (47)
+ELF-PPC64: Type: R_PPC64_TOC16_LO (48)
+ELF-PPC64: Type: R_PPC64_TOC16_HA (50)
+ELF-PPC64: Type: R_PPC64_TOC (51)
+ELF-PPC64: Type: R_PPC64_ADDR16_DS (56)
+ELF-PPC64: Type: R_PPC64_ADDR16_LO_DS (57)
+ELF-PPC64: Type: R_PPC64_TOC16_DS (63)
+ELF-PPC64: Type: R_PPC64_TOC16_LO_DS (64)
+ELF-PPC64: Type: R_PPC64_TLS (67)
+ELF-PPC64: Type: R_PPC64_TPREL16_LO (70)
+ELF-PPC64: Type: R_PPC64_TPREL16_HA (72)
+ELF-PPC64: Type: R_PPC64_DTPREL16_LO (75)
+ELF-PPC64: Type: R_PPC64_DTPREL16_HA (77)
+ELF-PPC64: Type: R_PPC64_GOT_TLSGD16_LO (80)
+ELF-PPC64: Type: R_PPC64_GOT_TLSGD16_HA (82)
+ELF-PPC64: Type: R_PPC64_GOT_TLSLD16_LO (84)
+ELF-PPC64: Type: R_PPC64_GOT_TLSLD16_HA (86)
+ELF-PPC64: Type: R_PPC64_GOT_TPREL16_LO_DS (88)
+ELF-PPC64: Type: R_PPC64_GOT_TPREL16_HA (90)
+ELF-PPC64: Type: R_PPC64_TLSGD (107)
+ELF-PPC64: Type: R_PPC64_TLSLD (108)
+
+ELF-AARCH64: Type: R_AARCH64_NONE (256)
+ELF-AARCH64: Type: R_AARCH64_ABS64 (257)
+ELF-AARCH64: Type: R_AARCH64_ABS32 (258)
+ELF-AARCH64: Type: R_AARCH64_ABS16 (259)
+ELF-AARCH64: Type: R_AARCH64_PREL64 (260)
+ELF-AARCH64: Type: R_AARCH64_PREL32 (261)
+ELF-AARCH64: Type: R_AARCH64_PREL16 (262)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G0 (263)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G0_NC (264)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G1 (265)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G1_NC (266)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G2 (267)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G2_NC (268)
+ELF-AARCH64: Type: R_AARCH64_MOVW_UABS_G3 (269)
+ELF-AARCH64: Type: R_AARCH64_MOVW_SABS_G0 (270)
+ELF-AARCH64: Type: R_AARCH64_MOVW_SABS_G1 (271)
+ELF-AARCH64: Type: R_AARCH64_MOVW_SABS_G2 (272)
+ELF-AARCH64: Type: R_AARCH64_LD_PREL_LO19 (273)
+ELF-AARCH64: Type: R_AARCH64_ADR_PREL_LO21 (274)
+ELF-AARCH64: Type: R_AARCH64_ADR_PREL_PG_HI21 (275)
+ELF-AARCH64: Type: R_AARCH64_ADD_ABS_LO12_NC (277)
+ELF-AARCH64: Type: R_AARCH64_LDST8_ABS_LO12_NC (278)
+ELF-AARCH64: Type: R_AARCH64_TSTBR14 (279)
+ELF-AARCH64: Type: R_AARCH64_CONDBR19 (280)
+ELF-AARCH64: Type: R_AARCH64_JUMP26 (282)
+ELF-AARCH64: Type: R_AARCH64_CALL26 (283)
+ELF-AARCH64: Type: R_AARCH64_LDST16_ABS_LO12_NC (284)
+ELF-AARCH64: Type: R_AARCH64_LDST32_ABS_LO12_NC (285)
+ELF-AARCH64: Type: R_AARCH64_LDST64_ABS_LO12_NC (286)
+ELF-AARCH64: Type: R_AARCH64_LDST128_ABS_LO12_NC (299)
+ELF-AARCH64: Type: R_AARCH64_ADR_GOT_PAGE (311)
+ELF-AARCH64: Type: R_AARCH64_LD64_GOT_LO12_NC (312)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G2 (523)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G1 (524)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC (525)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G0 (526)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC (527)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_ADD_DTPREL_HI12 (528)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_ADD_DTPREL_LO12 (529)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC (530)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST8_DTPREL_LO12 (531)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC (532)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST16_DTPREL_LO12 (533)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC (534)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST32_DTPREL_LO12 (535)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC (536)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST64_DTPREL_LO12 (537)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC (538)
+ELF-AARCH64: Type: R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 (539)
+ELF-AARCH64: Type: R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC (540)
+ELF-AARCH64: Type: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 (541)
+ELF-AARCH64: Type: R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC (542)
+ELF-AARCH64: Type: R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 (543)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G2 (544)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G1 (545)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G1_NC (546)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G0 (547)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC (548)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_ADD_TPREL_HI12 (549)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_ADD_TPREL_LO12 (550)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC (551)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST8_TPREL_LO12 (552)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC (553)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST16_TPREL_LO12 (554)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC (555)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST32_TPREL_LO12 (556)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC (557)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST64_TPREL_LO12 (558)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC (559)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADR_PAGE (562)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_LD64_LO12_NC (563)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADD_LO12_NC (564)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_CALL (569)
+
+ELF-ARM: Type: R_ARM_NONE (0)
+ELF-ARM: Type: R_ARM_PC24 (1)
+ELF-ARM: Type: R_ARM_ABS32 (2)
+ELF-ARM: Type: R_ARM_REL32 (3)
+ELF-ARM: Type: R_ARM_LDR_PC_G0 (4)
+ELF-ARM: Type: R_ARM_ABS16 (5)
+ELF-ARM: Type: R_ARM_ABS12 (6)
+ELF-ARM: Type: R_ARM_THM_ABS5 (7)
+ELF-ARM: Type: R_ARM_ABS8 (8)
+ELF-ARM: Type: R_ARM_SBREL32 (9)
+ELF-ARM: Type: R_ARM_THM_CALL (10)
+ELF-ARM: Type: R_ARM_THM_PC8 (11)
+ELF-ARM: Type: R_ARM_BREL_ADJ (12)
+ELF-ARM: Type: R_ARM_TLS_DESC (13)
+ELF-ARM: Type: R_ARM_THM_SWI8 (14)
+ELF-ARM: Type: R_ARM_XPC25 (15)
+ELF-ARM: Type: R_ARM_THM_XPC22 (16)
+ELF-ARM: Type: R_ARM_TLS_DTPMOD32 (17)
+ELF-ARM: Type: R_ARM_TLS_DTPOFF32 (18)
+ELF-ARM: Type: R_ARM_TLS_TPOFF32 (19)
+ELF-ARM: Type: R_ARM_COPY (20)
+ELF-ARM: Type: R_ARM_GLOB_DAT (21)
+ELF-ARM: Type: R_ARM_JUMP_SLOT (22)
+ELF-ARM: Type: R_ARM_RELATIVE (23)
+ELF-ARM: Type: R_ARM_GOTOFF32 (24)
+ELF-ARM: Type: R_ARM_BASE_PREL (25)
+ELF-ARM: Type: R_ARM_GOT_BREL (26)
+ELF-ARM: Type: R_ARM_PLT32 (27)
+ELF-ARM: Type: R_ARM_CALL (28)
+ELF-ARM: Type: R_ARM_JUMP24 (29)
+ELF-ARM: Type: R_ARM_THM_JUMP24 (30)
+ELF-ARM: Type: R_ARM_BASE_ABS (31)
+ELF-ARM: Type: R_ARM_ALU_PCREL_7_0 (32)
+ELF-ARM: Type: R_ARM_ALU_PCREL_15_8 (33)
+ELF-ARM: Type: R_ARM_ALU_PCREL_23_15 (34)
+ELF-ARM: Type: R_ARM_LDR_SBREL_11_0_NC (35)
+ELF-ARM: Type: R_ARM_ALU_SBREL_19_12_NC (36)
+ELF-ARM: Type: R_ARM_ALU_SBREL_27_20_CK (37)
+ELF-ARM: Type: R_ARM_TARGET1 (38)
+ELF-ARM: Type: R_ARM_SBREL31 (39)
+ELF-ARM: Type: R_ARM_V4BX (40)
+ELF-ARM: Type: R_ARM_TARGET2 (41)
+ELF-ARM: Type: R_ARM_PREL31 (42)
+ELF-ARM: Type: R_ARM_MOVW_ABS_NC (43)
+ELF-ARM: Type: R_ARM_MOVT_ABS (44)
+ELF-ARM: Type: R_ARM_MOVW_PREL_NC (45)
+ELF-ARM: Type: R_ARM_MOVT_PREL (46)
+ELF-ARM: Type: R_ARM_THM_MOVW_ABS_NC (47)
+ELF-ARM: Type: R_ARM_THM_MOVT_ABS (48)
+ELF-ARM: Type: R_ARM_THM_MOVW_PREL_NC (49)
+ELF-ARM: Type: R_ARM_THM_MOVT_PREL (50)
+ELF-ARM: Type: R_ARM_THM_JUMP19 (51)
+ELF-ARM: Type: R_ARM_THM_JUMP6 (52)
+ELF-ARM: Type: R_ARM_THM_ALU_PREL_11_0 (53)
+ELF-ARM: Type: R_ARM_THM_PC12 (54)
+ELF-ARM: Type: R_ARM_ABS32_NOI (55)
+ELF-ARM: Type: R_ARM_REL32_NOI (56)
+ELF-ARM: Type: R_ARM_ALU_PC_G0_NC (57)
+ELF-ARM: Type: R_ARM_ALU_PC_G0 (58)
+ELF-ARM: Type: R_ARM_ALU_PC_G1_NC (59)
+ELF-ARM: Type: R_ARM_ALU_PC_G1 (60)
+ELF-ARM: Type: R_ARM_ALU_PC_G2 (61)
+ELF-ARM: Type: R_ARM_LDR_PC_G1 (62)
+ELF-ARM: Type: R_ARM_LDR_PC_G2 (63)
+ELF-ARM: Type: R_ARM_LDRS_PC_G0 (64)
+ELF-ARM: Type: R_ARM_LDRS_PC_G1 (65)
+ELF-ARM: Type: R_ARM_LDRS_PC_G2 (66)
+ELF-ARM: Type: R_ARM_LDC_PC_G0 (67)
+ELF-ARM: Type: R_ARM_LDC_PC_G1 (68)
+ELF-ARM: Type: R_ARM_LDC_PC_G2 (69)
+ELF-ARM: Type: R_ARM_ALU_SB_G0_NC (70)
+ELF-ARM: Type: R_ARM_ALU_SB_G0 (71)
+ELF-ARM: Type: R_ARM_ALU_SB_G1_NC (72)
+ELF-ARM: Type: R_ARM_ALU_SB_G1 (73)
+ELF-ARM: Type: R_ARM_ALU_SB_G2 (74)
+ELF-ARM: Type: R_ARM_LDR_SB_G0 (75)
+ELF-ARM: Type: R_ARM_LDR_SB_G1 (76)
+ELF-ARM: Type: R_ARM_LDR_SB_G2 (77)
+ELF-ARM: Type: R_ARM_LDRS_SB_G0 (78)
+ELF-ARM: Type: R_ARM_LDRS_SB_G1 (79)
+ELF-ARM: Type: R_ARM_LDRS_SB_G2 (80)
+ELF-ARM: Type: R_ARM_LDC_SB_G0 (81)
+ELF-ARM: Type: R_ARM_LDC_SB_G1 (82)
+ELF-ARM: Type: R_ARM_LDC_SB_G2 (83)
+ELF-ARM: Type: R_ARM_MOVW_BREL_NC (84)
+ELF-ARM: Type: R_ARM_MOVT_BREL (85)
+ELF-ARM: Type: R_ARM_MOVW_BREL (86)
+ELF-ARM: Type: R_ARM_THM_MOVW_BREL_NC (87)
+ELF-ARM: Type: R_ARM_THM_MOVT_BREL (88)
+ELF-ARM: Type: R_ARM_THM_MOVW_BREL (89)
+ELF-ARM: Type: R_ARM_TLS_GOTDESC (90)
+ELF-ARM: Type: R_ARM_TLS_CALL (91)
+ELF-ARM: Type: R_ARM_TLS_DESCSEQ (92)
+ELF-ARM: Type: R_ARM_THM_TLS_CALL (93)
+ELF-ARM: Type: R_ARM_PLT32_ABS (94)
+ELF-ARM: Type: R_ARM_GOT_ABS (95)
+ELF-ARM: Type: R_ARM_GOT_PREL (96)
+ELF-ARM: Type: R_ARM_GOT_BREL12 (97)
+ELF-ARM: Type: R_ARM_GOTOFF12 (98)
+ELF-ARM: Type: R_ARM_GOTRELAX (99)
+ELF-ARM: Type: R_ARM_GNU_VTENTRY (100)
+ELF-ARM: Type: R_ARM_GNU_VTINHERIT (101)
+ELF-ARM: Type: R_ARM_THM_JUMP11 (102)
+ELF-ARM: Type: R_ARM_THM_JUMP8 (103)
+ELF-ARM: Type: R_ARM_TLS_GD32 (104)
+ELF-ARM: Type: R_ARM_TLS_LDM32 (105)
+ELF-ARM: Type: R_ARM_TLS_LDO32 (106)
+ELF-ARM: Type: R_ARM_TLS_IE32 (107)
+ELF-ARM: Type: R_ARM_TLS_LE32 (108)
+ELF-ARM: Type: R_ARM_TLS_LDO12 (109)
+ELF-ARM: Type: R_ARM_TLS_LE12 (110)
+ELF-ARM: Type: R_ARM_TLS_IE12GP (111)
+ELF-ARM: Type: R_ARM_PRIVATE_0 (112)
+ELF-ARM: Type: R_ARM_PRIVATE_1 (113)
+ELF-ARM: Type: R_ARM_PRIVATE_2 (114)
+ELF-ARM: Type: R_ARM_PRIVATE_3 (115)
+ELF-ARM: Type: R_ARM_PRIVATE_4 (116)
+ELF-ARM: Type: R_ARM_PRIVATE_5 (117)
+ELF-ARM: Type: R_ARM_PRIVATE_6 (118)
+ELF-ARM: Type: R_ARM_PRIVATE_7 (119)
+ELF-ARM: Type: R_ARM_PRIVATE_8 (120)
+ELF-ARM: Type: R_ARM_PRIVATE_9 (121)
+ELF-ARM: Type: R_ARM_PRIVATE_10 (122)
+ELF-ARM: Type: R_ARM_PRIVATE_11 (123)
+ELF-ARM: Type: R_ARM_PRIVATE_12 (124)
+ELF-ARM: Type: R_ARM_PRIVATE_13 (125)
+ELF-ARM: Type: R_ARM_PRIVATE_14 (126)
+ELF-ARM: Type: R_ARM_PRIVATE_15 (127)
+ELF-ARM: Type: R_ARM_ME_TOO (128)
+ELF-ARM: Type: R_ARM_THM_TLS_DESCSEQ16 (129)
+ELF-ARM: Type: R_ARM_THM_TLS_DESCSEQ32 (130)
+
+ELF-MIPS: Type: R_MIPS_NONE (0)
+ELF-MIPS: Type: R_MIPS_16 (1)
+ELF-MIPS: Type: R_MIPS_32 (2)
+ELF-MIPS: Type: R_MIPS_REL32 (3)
+ELF-MIPS: Type: R_MIPS_26 (4)
+ELF-MIPS: Type: R_MIPS_HI16 (5)
+ELF-MIPS: Type: R_MIPS_LO16 (6)
+ELF-MIPS: Type: R_MIPS_GPREL16 (7)
+ELF-MIPS: Type: R_MIPS_LITERAL (8)
+ELF-MIPS: Type: R_MIPS_GOT16 (9)
+ELF-MIPS: Type: R_MIPS_PC16 (10)
+ELF-MIPS: Type: R_MIPS_CALL16 (11)
+ELF-MIPS: Type: R_MIPS_GPREL32 (12)
+ELF-MIPS: Type: R_MIPS_SHIFT5 (16)
+ELF-MIPS: Type: R_MIPS_SHIFT6 (17)
+ELF-MIPS: Type: R_MIPS_64 (18)
+ELF-MIPS: Type: R_MIPS_GOT_DISP (19)
+ELF-MIPS: Type: R_MIPS_GOT_PAGE (20)
+ELF-MIPS: Type: R_MIPS_GOT_OFST (21)
+ELF-MIPS: Type: R_MIPS_GOT_HI16 (22)
+ELF-MIPS: Type: R_MIPS_GOT_LO16 (23)
+ELF-MIPS: Type: R_MIPS_SUB (24)
+ELF-MIPS: Type: R_MIPS_INSERT_A (25)
+ELF-MIPS: Type: R_MIPS_INSERT_B (26)
+ELF-MIPS: Type: R_MIPS_DELETE (27)
+ELF-MIPS: Type: R_MIPS_HIGHER (28)
+ELF-MIPS: Type: R_MIPS_HIGHEST (29)
+ELF-MIPS: Type: R_MIPS_CALL_HI16 (30)
+ELF-MIPS: Type: R_MIPS_CALL_LO16 (31)
+ELF-MIPS: Type: R_MIPS_SCN_DISP (32)
+ELF-MIPS: Type: R_MIPS_REL16 (33)
+ELF-MIPS: Type: R_MIPS_ADD_IMMEDIATE (34)
+ELF-MIPS: Type: R_MIPS_PJUMP (35)
+ELF-MIPS: Type: R_MIPS_RELGOT (36)
+ELF-MIPS: Type: R_MIPS_JALR (37)
+ELF-MIPS: Type: R_MIPS_TLS_DTPMOD32 (38)
+ELF-MIPS: Type: R_MIPS_TLS_DTPREL32 (39)
+ELF-MIPS: Type: R_MIPS_TLS_DTPMOD64 (40)
+ELF-MIPS: Type: R_MIPS_TLS_DTPREL64 (41)
+ELF-MIPS: Type: R_MIPS_TLS_GD (42)
+ELF-MIPS: Type: R_MIPS_TLS_LDM (43)
+ELF-MIPS: Type: R_MIPS_TLS_DTPREL_HI16 (44)
+ELF-MIPS: Type: R_MIPS_TLS_DTPREL_LO16 (45)
+ELF-MIPS: Type: R_MIPS_TLS_GOTTPREL (46)
+ELF-MIPS: Type: R_MIPS_TLS_TPREL32 (47)
+ELF-MIPS: Type: R_MIPS_TLS_TPREL64 (48)
+ELF-MIPS: Type: R_MIPS_TLS_TPREL_HI16 (49)
+ELF-MIPS: Type: R_MIPS_TLS_TPREL_LO16 (50)
+ELF-MIPS: Type: R_MIPS_GLOB_DAT (51)
+ELF-MIPS: Type: R_MIPS_COPY (126)
+ELF-MIPS: Type: R_MIPS_JUMP_SLOT (127)
+ELF-MIPS: Type: R_MIPS_NUM (218)
+ELF-MIPS64EL: Type: R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE (0)
+ELF-MIPS64EL: Type: R_MIPS_16/R_MIPS_16/R_MIPS_16 (65793)
+ELF-MIPS64EL: Type: R_MIPS_32/R_MIPS_32/R_MIPS_32 (131586)
+ELF-MIPS64EL: Type: R_MIPS_REL32/R_MIPS_REL32/R_MIPS_REL32 (197379)
+ELF-MIPS64EL: Type: R_MIPS_26/R_MIPS_26/R_MIPS_26 (263172)
+ELF-MIPS64EL: Type: R_MIPS_HI16/R_MIPS_HI16/R_MIPS_HI16 (328965)
+ELF-MIPS64EL: Type: R_MIPS_LO16/R_MIPS_LO16/R_MIPS_LO16 (394758)
+ELF-MIPS64EL: Type: R_MIPS_GPREL16/R_MIPS_GPREL16/R_MIPS_GPREL16 (460551)
+ELF-MIPS64EL: Type: R_MIPS_LITERAL/R_MIPS_LITERAL/R_MIPS_LITERAL (526344)
+ELF-MIPS64EL: Type: R_MIPS_GOT16/R_MIPS_GOT16/R_MIPS_GOT16 (592137)
+ELF-MIPS64EL: Type: R_MIPS_PC16/R_MIPS_PC16/R_MIPS_PC16 (657930)
+ELF-MIPS64EL: Type: R_MIPS_CALL16/R_MIPS_CALL16/R_MIPS_CALL16 (723723)
+ELF-MIPS64EL: Type: R_MIPS_GPREL32/R_MIPS_GPREL32/R_MIPS_GPREL32 (789516)
+ELF-MIPS64EL: Type: R_MIPS_SHIFT5/R_MIPS_SHIFT5/R_MIPS_SHIFT5 (1052688)
+ELF-MIPS64EL: Type: R_MIPS_SHIFT6/R_MIPS_SHIFT6/R_MIPS_SHIFT6 (1118481)
+ELF-MIPS64EL: Type: R_MIPS_64/R_MIPS_64/R_MIPS_64 (1184274)
+ELF-MIPS64EL: Type: R_MIPS_GOT_DISP/R_MIPS_GOT_DISP/R_MIPS_GOT_DISP (1250067)
+ELF-MIPS64EL: Type: R_MIPS_GOT_PAGE/R_MIPS_GOT_PAGE/R_MIPS_GOT_PAGE (1315860)
+ELF-MIPS64EL: Type: R_MIPS_GOT_OFST/R_MIPS_GOT_OFST/R_MIPS_GOT_OFST (1381653)
+ELF-MIPS64EL: Type: R_MIPS_GOT_HI16/R_MIPS_GOT_HI16/R_MIPS_GOT_HI16 (1447446)
+ELF-MIPS64EL: Type: R_MIPS_GOT_LO16/R_MIPS_GOT_LO16/R_MIPS_GOT_LO16 (1513239)
+ELF-MIPS64EL: Type: R_MIPS_SUB/R_MIPS_SUB/R_MIPS_SUB (1579032)
+ELF-MIPS64EL: Type: R_MIPS_INSERT_A/R_MIPS_INSERT_A/R_MIPS_INSERT_A (1644825)
+ELF-MIPS64EL: Type: R_MIPS_INSERT_B/R_MIPS_INSERT_B/R_MIPS_INSERT_B (1710618)
+ELF-MIPS64EL: Type: R_MIPS_DELETE/R_MIPS_DELETE/R_MIPS_DELETE (1776411)
+ELF-MIPS64EL: Type: R_MIPS_HIGHER/R_MIPS_HIGHER/R_MIPS_HIGHER (1842204)
+ELF-MIPS64EL: Type: R_MIPS_HIGHEST/R_MIPS_HIGHEST/R_MIPS_HIGHEST (1907997)
+ELF-MIPS64EL: Type: R_MIPS_CALL_HI16/R_MIPS_CALL_HI16/R_MIPS_CALL_HI16 (1973790)
+ELF-MIPS64EL: Type: R_MIPS_CALL_LO16/R_MIPS_CALL_LO16/R_MIPS_CALL_LO16 (2039583)
+ELF-MIPS64EL: Type: R_MIPS_SCN_DISP/R_MIPS_SCN_DISP/R_MIPS_SCN_DISP (2105376)
+ELF-MIPS64EL: Type: R_MIPS_REL16/R_MIPS_REL16/R_MIPS_REL16 (2171169)
+ELF-MIPS64EL: Type: R_MIPS_ADD_IMMEDIATE/R_MIPS_ADD_IMMEDIATE/R_MIPS_ADD_IMMEDIATE (2236962)
+ELF-MIPS64EL: Type: R_MIPS_PJUMP/R_MIPS_PJUMP/R_MIPS_PJUMP (2302755)
+ELF-MIPS64EL: Type: R_MIPS_RELGOT/R_MIPS_RELGOT/R_MIPS_RELGOT (2368548)
+ELF-MIPS64EL: Type: R_MIPS_JALR/R_MIPS_JALR/R_MIPS_JALR (2434341)
+ELF-MIPS64EL: Type: R_MIPS_TLS_DTPMOD32/R_MIPS_TLS_DTPMOD32/R_MIPS_TLS_DTPMOD32 (2500134)
+ELF-MIPS64EL: Type: R_MIPS_TLS_DTPREL32/R_MIPS_TLS_DTPREL32/R_MIPS_TLS_DTPREL32 (2565927)
+ELF-MIPS64EL: Type: R_MIPS_TLS_DTPMOD64/R_MIPS_TLS_DTPMOD64/R_MIPS_TLS_DTPMOD64 (2631720)
+ELF-MIPS64EL: Type: R_MIPS_TLS_DTPREL64/R_MIPS_TLS_DTPREL64/R_MIPS_TLS_DTPREL64 (2697513)
+ELF-MIPS64EL: Type: R_MIPS_TLS_GD/R_MIPS_TLS_GD/R_MIPS_TLS_GD (2763306)
+ELF-MIPS64EL: Type: R_MIPS_TLS_LDM/R_MIPS_TLS_LDM/R_MIPS_TLS_LDM (2829099)
+ELF-MIPS64EL: Type: R_MIPS_TLS_DTPREL_HI16/R_MIPS_TLS_DTPREL_HI16/R_MIPS_TLS_DTPREL_HI16 (2894892)
+ELF-MIPS64EL: Type: R_MIPS_TLS_DTPREL_LO16/R_MIPS_TLS_DTPREL_LO16/R_MIPS_TLS_DTPREL_LO16 (2960685)
+ELF-MIPS64EL: Type: R_MIPS_TLS_GOTTPREL/R_MIPS_TLS_GOTTPREL/R_MIPS_TLS_GOTTPREL (3026478)
+ELF-MIPS64EL: Type: R_MIPS_TLS_TPREL32/R_MIPS_TLS_TPREL32/R_MIPS_TLS_TPREL32 (3092271)
+ELF-MIPS64EL: Type: R_MIPS_TLS_TPREL64/R_MIPS_TLS_TPREL64/R_MIPS_TLS_TPREL64 (3158064)
+ELF-MIPS64EL: Type: R_MIPS_TLS_TPREL_HI16/R_MIPS_TLS_TPREL_HI16/R_MIPS_TLS_TPREL_HI16 (3223857)
+ELF-MIPS64EL: Type: R_MIPS_TLS_TPREL_LO16/R_MIPS_TLS_TPREL_LO16/R_MIPS_TLS_TPREL_LO16 (3289650)
+ELF-MIPS64EL: Type: R_MIPS_GLOB_DAT/R_MIPS_GLOB_DAT/R_MIPS_GLOB_DAT (3355443)
+ELF-MIPS64EL: Type: R_MIPS_COPY/R_MIPS_COPY/R_MIPS_COPY (8289918)
+ELF-MIPS64EL: Type: R_MIPS_JUMP_SLOT/R_MIPS_JUMP_SLOT/R_MIPS_JUMP_SLOT (8355711)
+ELF-MIPS64EL: Type: R_MIPS_NUM/R_MIPS_NUM/R_MIPS_NUM (14342874)
+
+ELF-MBLAZE: Type: R_MICROBLAZE_NONE (0)
+ELF-MBLAZE: Type: R_MICROBLAZE_32 (1)
+ELF-MBLAZE: Type: R_MICROBLAZE_32_PCREL (2)
+ELF-MBLAZE: Type: R_MICROBLAZE_64_PCREL (3)
+ELF-MBLAZE: Type: R_MICROBLAZE_32_PCREL_LO (4)
+ELF-MBLAZE: Type: R_MICROBLAZE_64 (5)
+ELF-MBLAZE: Type: R_MICROBLAZE_32_LO (6)
+ELF-MBLAZE: Type: R_MICROBLAZE_SRO32 (7)
+ELF-MBLAZE: Type: R_MICROBLAZE_SRW32 (8)
+ELF-MBLAZE: Type: R_MICROBLAZE_64_NONE (9)
+ELF-MBLAZE: Type: R_MICROBLAZE_32_SYM_OP_SYM (10)
+ELF-MBLAZE: Type: R_MICROBLAZE_GNU_VTINHERIT (11)
+ELF-MBLAZE: Type: R_MICROBLAZE_GNU_VTENTRY (12)
+ELF-MBLAZE: Type: R_MICROBLAZE_GOTPC_64 (13)
+ELF-MBLAZE: Type: R_MICROBLAZE_GOT_64 (14)
+ELF-MBLAZE: Type: R_MICROBLAZE_PLT_64 (15)
+ELF-MBLAZE: Type: R_MICROBLAZE_REL (16)
+ELF-MBLAZE: Type: R_MICROBLAZE_JUMP_SLOT (17)
+ELF-MBLAZE: Type: R_MICROBLAZE_GLOB_DAT (18)
+ELF-MBLAZE: Type: R_MICROBLAZE_GOTOFF_64 (19)
+ELF-MBLAZE: Type: R_MICROBLAZE_GOTOFF_32 (20)
+ELF-MBLAZE: Type: R_MICROBLAZE_COPY (21)
+
+ELF-HEXAGON: Type: R_HEX_NONE (0)
+ELF-HEXAGON: Type: R_HEX_B22_PCREL (1)
+ELF-HEXAGON: Type: R_HEX_B15_PCREL (2)
+ELF-HEXAGON: Type: R_HEX_B7_PCREL (3)
+ELF-HEXAGON: Type: R_HEX_LO16 (4)
+ELF-HEXAGON: Type: R_HEX_HI16 (5)
+ELF-HEXAGON: Type: R_HEX_32 (6)
+ELF-HEXAGON: Type: R_HEX_16 (7)
+ELF-HEXAGON: Type: R_HEX_8 (8)
+ELF-HEXAGON: Type: R_HEX_GPREL16_0 (9)
+ELF-HEXAGON: Type: R_HEX_GPREL16_1 (10)
+ELF-HEXAGON: Type: R_HEX_GPREL16_2 (11)
+ELF-HEXAGON: Type: R_HEX_GPREL16_3 (12)
+ELF-HEXAGON: Type: R_HEX_HL16 (13)
+ELF-HEXAGON: Type: R_HEX_B13_PCREL (14)
+ELF-HEXAGON: Type: R_HEX_B9_PCREL (15)
+ELF-HEXAGON: Type: R_HEX_B32_PCREL_X (16)
+ELF-HEXAGON: Type: R_HEX_32_6_X (17)
+ELF-HEXAGON: Type: R_HEX_B22_PCREL_X (18)
+ELF-HEXAGON: Type: R_HEX_B15_PCREL_X (19)
+ELF-HEXAGON: Type: R_HEX_B13_PCREL_X (20)
+ELF-HEXAGON: Type: R_HEX_B9_PCREL_X (21)
+ELF-HEXAGON: Type: R_HEX_B7_PCREL_X (22)
+ELF-HEXAGON: Type: R_HEX_16_X (23)
+ELF-HEXAGON: Type: R_HEX_12_X (24)
+ELF-HEXAGON: Type: R_HEX_11_X (25)
+ELF-HEXAGON: Type: R_HEX_10_X (26)
+ELF-HEXAGON: Type: R_HEX_9_X (27)
+ELF-HEXAGON: Type: R_HEX_8_X (28)
+ELF-HEXAGON: Type: R_HEX_7_X (29)
+ELF-HEXAGON: Type: R_HEX_6_X (30)
+ELF-HEXAGON: Type: R_HEX_32_PCREL (31)
+ELF-HEXAGON: Type: R_HEX_COPY (32)
+ELF-HEXAGON: Type: R_HEX_GLOB_DAT (33)
+ELF-HEXAGON: Type: R_HEX_JMP_SLOT (34)
+ELF-HEXAGON: Type: R_HEX_RELATIVE (35)
+ELF-HEXAGON: Type: R_HEX_PLT_B22_PCREL (36)
+ELF-HEXAGON: Type: R_HEX_GOTREL_LO16 (37)
+ELF-HEXAGON: Type: R_HEX_GOTREL_HI16 (38)
+ELF-HEXAGON: Type: R_HEX_GOTREL_32 (39)
+ELF-HEXAGON: Type: R_HEX_GOT_LO16 (40)
+ELF-HEXAGON: Type: R_HEX_GOT_HI16 (41)
+ELF-HEXAGON: Type: R_HEX_GOT_32 (42)
+ELF-HEXAGON: Type: R_HEX_GOT_16 (43)
+ELF-HEXAGON: Type: R_HEX_DTPMOD_32 (44)
+ELF-HEXAGON: Type: R_HEX_DTPREL_LO16 (45)
+ELF-HEXAGON: Type: R_HEX_DTPREL_HI16 (46)
+ELF-HEXAGON: Type: R_HEX_DTPREL_32 (47)
+ELF-HEXAGON: Type: R_HEX_DTPREL_16 (48)
+ELF-HEXAGON: Type: R_HEX_GD_PLT_B22_PCREL (49)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_LO16 (50)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_HI16 (51)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_32 (52)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_16 (53)
+ELF-HEXAGON: Type: R_HEX_IE_LO16 (54)
+ELF-HEXAGON: Type: R_HEX_IE_HI16 (55)
+ELF-HEXAGON: Type: R_HEX_IE_32 (56)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_LO16 (57)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_HI16 (58)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_32 (59)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_16 (60)
+ELF-HEXAGON: Type: R_HEX_TPREL_LO16 (61)
+ELF-HEXAGON: Type: R_HEX_TPREL_HI16 (62)
+ELF-HEXAGON: Type: R_HEX_TPREL_32 (63)
+ELF-HEXAGON: Type: R_HEX_TPREL_16 (64)
+ELF-HEXAGON: Type: R_HEX_6_PCREL_X (65)
+ELF-HEXAGON: Type: R_HEX_GOTREL_32_6_X (66)
+ELF-HEXAGON: Type: R_HEX_GOTREL_16_X (67)
+ELF-HEXAGON: Type: R_HEX_GOTREL_11_X (68)
+ELF-HEXAGON: Type: R_HEX_GOT_32_6_X (69)
+ELF-HEXAGON: Type: R_HEX_GOT_16_X (70)
+ELF-HEXAGON: Type: R_HEX_GOT_11_X (71)
+ELF-HEXAGON: Type: R_HEX_DTPREL_32_6_X (72)
+ELF-HEXAGON: Type: R_HEX_DTPREL_16_X (73)
+ELF-HEXAGON: Type: R_HEX_DTPREL_11_X (74)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_32_6_X (75)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_16_X (76)
+ELF-HEXAGON: Type: R_HEX_GD_GOT_11_X (77)
+ELF-HEXAGON: Type: R_HEX_IE_32_6_X (78)
+ELF-HEXAGON: Type: R_HEX_IE_16_X (79)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_32_6_X (80)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_16_X (81)
+ELF-HEXAGON: Type: R_HEX_IE_GOT_11_X (82)
+ELF-HEXAGON: Type: R_HEX_TPREL_32_6_X (83)
+ELF-HEXAGON: Type: R_HEX_TPREL_16_X (84)
+ELF-HEXAGON: Type: R_HEX_TPREL_11_X (85)
+
+COFF-32: Type: IMAGE_REL_I386_ABSOLUTE (0)
+COFF-32: Type: IMAGE_REL_I386_DIR16 (1)
+COFF-32: Type: IMAGE_REL_I386_REL16 (2)
+COFF-32: Type: IMAGE_REL_I386_DIR32 (6)
+COFF-32: Type: IMAGE_REL_I386_DIR32NB (7)
+COFF-32: Type: IMAGE_REL_I386_SEG12 (9)
+COFF-32: Type: IMAGE_REL_I386_SECTION (10)
+COFF-32: Type: IMAGE_REL_I386_SECREL (11)
+COFF-32: Type: IMAGE_REL_I386_TOKEN (12)
+COFF-32: Type: IMAGE_REL_I386_SECREL7 (13)
+COFF-32: Type: IMAGE_REL_I386_REL32 (20)
+
+COFF-64: Type: IMAGE_REL_AMD64_ABSOLUTE (0)
+COFF-64: Type: IMAGE_REL_AMD64_ADDR64 (1)
+COFF-64: Type: IMAGE_REL_AMD64_ADDR32 (2)
+COFF-64: Type: IMAGE_REL_AMD64_ADDR32NB (3)
+COFF-64: Type: IMAGE_REL_AMD64_REL32 (4)
+COFF-64: Type: IMAGE_REL_AMD64_REL32_1 (5)
+COFF-64: Type: IMAGE_REL_AMD64_REL32_2 (6)
+COFF-64: Type: IMAGE_REL_AMD64_REL32_3 (7)
+COFF-64: Type: IMAGE_REL_AMD64_REL32_4 (8)
+COFF-64: Type: IMAGE_REL_AMD64_REL32_5 (9)
+COFF-64: Type: IMAGE_REL_AMD64_SECTION (10)
+COFF-64: Type: IMAGE_REL_AMD64_SECREL (11)
+COFF-64: Type: IMAGE_REL_AMD64_SECREL7 (12)
+COFF-64: Type: IMAGE_REL_AMD64_TOKEN (13)
+COFF-64: Type: IMAGE_REL_AMD64_SREL32 (14)
+COFF-64: Type: IMAGE_REL_AMD64_PAIR (15)
+COFF-64: Type: IMAGE_REL_AMD64_SSPAN32 (16)
+
+COFF-ARM: Type: IMAGE_REL_ARM_ABSOLUTE (0x0000)
+COFF-ARM: Type: IMAGE_REL_ARM_ADDR32 (0x0001)
+COFF-ARM: Type: IMAGE_REL_ARM_ADDR32NB (0x0002)
+COFF-ARM: Type: IMAGE_REL_ARM_BRANCH24 (0x0003)
+COFF-ARM: Type: IMAGE_REL_ARM_BRANCH11 (0x0004)
+COFF-ARM: Type: IMAGE_REL_ARM_TOKEN (0x0005)
+COFF-ARM: Type: IMAGE_REL_ARM_BLX24 (0x0008)
+COFF-ARM: Type: IMAGE_REL_ARM_BLX11 (0x0009)
+COFF-ARM: Type: IMAGE_REL_ARM_SECTION (0x000E)
+COFF-ARM: Type: IMAGE_REL_ARM_SECREL (0x000F)
+COFF-ARM: Type: IMAGE_REL_ARM_MOV32A (0x0010)
+COFF-ARM: Type: IMAGE_REL_ARM_MOV32T (0x0011)
+COFF-ARM: Type: IMAGE_REL_ARM_BRANCH20T (0x0012)
+COFF-ARM: Type: IMAGE_REL_ARM_BRANCH24T (0x0014)
+COFF-ARM: Type: IMAGE_REL_ARM_BLX23T (0x0015)
+
+MACHO-32: Type: GENERIC_RELOC_VANILLA (0)
+MACHO-32: Type: GENERIC_RELOC_PAIR (1)
+MACHO-32: Type: GENERIC_RELOC_SECTDIFF (2)
+MACHO-32: Type: GENERIC_RELOC_PB_LA_PTR (3)
+MACHO-32: Type: GENERIC_RELOC_LOCAL_SECTDIFF (4)
+MACHO-32: Type: GENERIC_RELOC_TLV (5)
+
+MACHO-64: Type: X86_64_RELOC_UNSIGNED (0)
+MACHO-64: Type: X86_64_RELOC_SIGNED (1)
+MACHO-64: Type: X86_64_RELOC_BRANCH (2)
+MACHO-64: Type: X86_64_RELOC_GOT_LOAD (3)
+MACHO-64: Type: X86_64_RELOC_GOT (4)
+MACHO-64: Type: X86_64_RELOC_SUBTRACTOR (5)
+MACHO-64: Type: X86_64_RELOC_SIGNED_1 (6)
+MACHO-64: Type: X86_64_RELOC_SIGNED_2 (7)
+MACHO-64: Type: X86_64_RELOC_SIGNED_4 (8)
+MACHO-64: Type: X86_64_RELOC_TLV (9)
+
+MACHO-ARM: Type: ARM_RELOC_VANILLA (0)
+MACHO-ARM: Type: ARM_RELOC_PAIR (1)
+MACHO-ARM: Type: ARM_RELOC_SECTDIFF (2)
+MACHO-ARM: Type: ARM_RELOC_LOCAL_SECTDIFF (3)
+MACHO-ARM: Type: ARM_RELOC_PB_LA_PTR (4)
+MACHO-ARM: Type: ARM_RELOC_BR24 (5)
+MACHO-ARM: Type: ARM_THUMB_RELOC_BR22 (6)
+MACHO-ARM: Type: ARM_THUMB_32BIT_BRANCH (7)
+MACHO-ARM: Type: ARM_RELOC_HALF (8)
+MACHO-ARM: Type: ARM_RELOC_HALF_SECTDIFF (9)
+
+MACHO-PPC: PPC_RELOC_VANILLA (0)
+MACHO-PPC: PPC_RELOC_PAIR (1)
+MACHO-PPC: PPC_RELOC_BR14 (2)
+MACHO-PPC: PPC_RELOC_BR24 (3)
+MACHO-PPC: PPC_RELOC_HI16 (4)
+MACHO-PPC: PPC_RELOC_LO16 (5)
+MACHO-PPC: PPC_RELOC_HA16 (6)
+MACHO-PPC: PPC_RELOC_LO14 (7)
+MACHO-PPC: PPC_RELOC_SECTDIFF (8)
+MACHO-PPC: PPC_RELOC_PB_LA_PTR (9)
+MACHO-PPC: PPC_RELOC_HI16_SECTDIFF (10)
+MACHO-PPC: PPC_RELOC_LO16_SECTDIFF (11)
+MACHO-PPC: PPC_RELOC_HA16_SECTDIFF (12)
+MACHO-PPC: PPC_RELOC_JBSR (13)
+MACHO-PPC: PPC_RELOC_LO14_SECTDIFF (14)
+MACHO-PPC: PPC_RELOC_LOCAL_SECTDIFF (15)
diff --git a/test/tools/llvm-readobj/relocations.test b/test/tools/llvm-readobj/relocations.test
index 0608565..dec7f86 100644
--- a/test/tools/llvm-readobj/relocations.test
+++ b/test/tools/llvm-readobj/relocations.test
@@ -3,7 +3,15 @@ RUN:   | FileCheck %s -check-prefix COFF
 RUN: llvm-readobj -r %p/Inputs/trivial.obj.elf-i386 \
 RUN:   | FileCheck %s -check-prefix ELF
 RUN: llvm-readobj -r %p/Inputs/trivial.obj.macho-i386 \
-RUN:   | FileCheck %s -check-prefix MACHO
+RUN:   | FileCheck %s -check-prefix MACHO-I386
+RUN: llvm-readobj -r %p/Inputs/trivial.obj.macho-x86-64 \
+RUN:   | FileCheck %s -check-prefix MACHO-X86-64
+RUN: llvm-readobj -r %p/Inputs/trivial.obj.macho-ppc \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC
+RUN: llvm-readobj -r %p/Inputs/trivial.obj.macho-ppc64 \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC64
+RUN: llvm-readobj -r -expand-relocs %p/Inputs/trivial.obj.macho-arm \
+RUN:   | FileCheck %s -check-prefix MACHO-ARM
 
 COFF:      Relocations [
 COFF-NEXT:   Section (1) .text {
@@ -22,11 +30,144 @@ ELF-NEXT:     0x1F R_386_PLT32 SomeOtherFunction 0x0
 ELF-NEXT:   }
 ELF-NEXT: ]
 
-MACHO:      Relocations [
-MACHO-NEXT:   Section __text {
-MACHO-NEXT:     0x18 GENERIC_RELOC_VANILLA _SomeOtherFunction 0x0
-MACHO-NEXT:     0x13 GENERIC_RELOC_VANILLA _puts 0x0
-MACHO-NEXT:     0xB GENERIC_RELOC_LOCAL_SECTDIFF _main 0x{{[0-9A-F]+}}
-MACHO-NEXT:     0x0 GENERIC_RELOC_PAIR _main 0x{{[0-9A-F]+}}
-MACHO-NEXT:   }
-MACHO-NEXT: ]
+MACHO-I386:      Relocations [
+MACHO-I386-NEXT:   Section __text {
+MACHO-I386-NEXT:     0x18 1 2 1 GENERIC_RELOC_VANILLA 0 _SomeOtherFunction
+MACHO-I386-NEXT:     0x13 1 2 1 GENERIC_RELOC_VANILLA 0 _puts
+MACHO-I386-NEXT:     0xB 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 -
+MACHO-I386-NEXT:     0x0 0 2 n/a GENERIC_RELOC_PAIR 1 -
+MACHO-I386-NEXT:   }
+MACHO-I386-NEXT: ]
+
+MACHO-X86-64: Relocations [
+MACHO-X86-64-NEXT:  Section __text {
+MACHO-X86-64-NEXT:    0xE 1 2 1 X86_64_RELOC_BRANCH 0 _SomeOtherFunction
+MACHO-X86-64-NEXT:    0x9 1 2 1 X86_64_RELOC_BRANCH 0 _puts
+MACHO-X86-64-NEXT:    0x4 1 2 1 X86_64_RELOC_SIGNED 0 L_.str
+MACHO-X86-64-NEXT:  }
+MACHO-X86-64-NEXT:]
+
+MACHO-PPC: Relocations [
+MACHO-PPC-NEXT:   Section __text {
+MACHO-PPC-NEXT:     0x24 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 -
+MACHO-PPC-NEXT:     0x0 0 2 n/a PPC_RELOC_PAIR 1 -
+MACHO-PPC-NEXT:     0x1C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 -
+MACHO-PPC-NEXT:     0x58 0 2 n/a PPC_RELOC_PAIR 1 -
+MACHO-PPC-NEXT:     0x18 1 2 0 PPC_RELOC_BR24 0 -
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section __picsymbolstub1 {
+MACHO-PPC-NEXT:     0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 -
+MACHO-PPC-NEXT:     0x0 0 2 n/a PPC_RELOC_PAIR 1 -
+MACHO-PPC-NEXT:     0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 -
+MACHO-PPC-NEXT:     0x20 0 2 n/a PPC_RELOC_PAIR 1 -
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section __la_symbol_ptr {
+MACHO-PPC-NEXT:     0x0 0 2 1 PPC_RELOC_VANILLA 0 dyld_stub_binding_helper
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT: ]
+
+MACHO-PPC64: Relocations [
+MACHO-PPC64-NEXT:   Section __text {
+MACHO-PPC64-NEXT:     0x24 0 2 n/a 1 -
+MACHO-PPC64-NEXT:     0x0 0 2 n/a 1 -
+MACHO-PPC64-NEXT:     0x1C 0 2 n/a 1 -
+MACHO-PPC64-NEXT:     0x58 0 2 n/a 1 -
+MACHO-PPC64-NEXT:     0x18 1 2 0 0 -
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section __picsymbolstub1 {
+MACHO-PPC64-NEXT:     0x14 0 2 n/a 1 -
+MACHO-PPC64-NEXT:     0x0 0 2 n/a 1 -
+MACHO-PPC64-NEXT:     0xC 0 2 n/a 1 -
+MACHO-PPC64-NEXT:     0x24 0 2 n/a 1 -
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section __la_symbol_ptr {
+MACHO-PPC64-NEXT:     0x0 0 3 1 0 dyld_stub_binding_helper
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT: ]
+
+
+MACHO-ARM:       Relocations [
+MACHO-ARM-NEXT:    Section __text {
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x38
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 2
+MACHO-ARM-NEXT:        Extern: N/A
+MACHO-ARM-NEXT:        Type: ARM_RELOC_SECTDIFF (2)
+MACHO-ARM-NEXT:        Symbol: -
+MACHO-ARM-NEXT:        Scattered: 1
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x0
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 2
+MACHO-ARM-NEXT:        Extern: N/A
+MACHO-ARM-NEXT:        Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:        Symbol: -
+MACHO-ARM-NEXT:        Scattered: 1
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x20
+MACHO-ARM-NEXT:        PCRel: 1
+MACHO-ARM-NEXT:        Length: 2
+MACHO-ARM-NEXT:        Extern: 1
+MACHO-ARM-NEXT:        Type: ARM_RELOC_BR24 (5)
+MACHO-ARM-NEXT:        Symbol: _g
+MACHO-ARM-NEXT:        Scattered: 0
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x1C
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 1
+MACHO-ARM-NEXT:        Extern: 1
+MACHO-ARM-NEXT:        Type: ARM_RELOC_HALF (8)
+MACHO-ARM-NEXT:        Symbol: _g
+MACHO-ARM-NEXT:        Scattered: 0
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x0
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 1
+MACHO-ARM-NEXT:        Extern: 0
+MACHO-ARM-NEXT:        Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:        Symbol: -
+MACHO-ARM-NEXT:        Scattered: 0
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x18
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 0
+MACHO-ARM-NEXT:        Extern: 1
+MACHO-ARM-NEXT:        Type: ARM_RELOC_HALF (8)
+MACHO-ARM-NEXT:        Symbol: _g
+MACHO-ARM-NEXT:        Scattered: 0
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x0
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 0
+MACHO-ARM-NEXT:        Extern: 0
+MACHO-ARM-NEXT:        Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:        Symbol: -
+MACHO-ARM-NEXT:        Scattered: 0
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0xC
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 2
+MACHO-ARM-NEXT:        Extern: N/A
+MACHO-ARM-NEXT:        Type: ARM_RELOC_SECTDIFF (2)
+MACHO-ARM-NEXT:        Symbol: -
+MACHO-ARM-NEXT:        Scattered: 1
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:      Relocation {
+MACHO-ARM-NEXT:        Offset: 0x0
+MACHO-ARM-NEXT:        PCRel: 0
+MACHO-ARM-NEXT:        Length: 2
+MACHO-ARM-NEXT:        Extern: N/A
+MACHO-ARM-NEXT:        Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:        Symbol: -
+MACHO-ARM-NEXT:        Scattered: 1
+MACHO-ARM-NEXT:      }
+MACHO-ARM-NEXT:    }
+MACHO-ARM-NEXT:  ]
diff --git a/test/tools/llvm-readobj/sections-ext.test b/test/tools/llvm-readobj/sections-ext.test
index 3254040..327f040 100644
--- a/test/tools/llvm-readobj/sections-ext.test
+++ b/test/tools/llvm-readobj/sections-ext.test
@@ -3,7 +3,15 @@ RUN:   | FileCheck %s -check-prefix COFF
 RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.elf-i386 \
 RUN:   | FileCheck %s -check-prefix ELF
 RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.macho-i386 \
-RUN:   | FileCheck %s -check-prefix MACHO
+RUN:   | FileCheck %s -check-prefix MACHO-I386
+RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.macho-x86-64 \
+RUN:   | FileCheck %s -check-prefix MACHO-X86-64
+RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.macho-ppc \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC
+RUN: llvm-readobj -s -st -sr -sd %p/Inputs/trivial.obj.macho-ppc64 \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC64
+RUN: llvm-readobj -expand-relocs -s -st -sr -sd %p/Inputs/trivial.obj.macho-arm \
+RUN:   | FileCheck %s -check-prefix MACHO-ARM
 
 COFF:      Sections [
 COFF-NEXT:   Section {
@@ -132,44 +140,702 @@ ELF-NEXT:       0020: FFFFFF31 C083C408 5BC3               |...1....[.|
 ELF-NEXT:     )
 ELF-NEXT:   }
 
-MACHO:      Sections [
-MACHO-NEXT:   Section {
-MACHO-NEXT:     Index: 0
-MACHO-NEXT:     Name: __text (5F 5F 74 65 78 74 00
-MACHO-NEXT:     Segment:
-MACHO-NEXT:     Address: 0x0
-MACHO-NEXT:     Size: 0x22
-MACHO-NEXT:     Offset: 324
-MACHO-NEXT:     Alignment: 4
-MACHO-NEXT:     RelocationOffset: 0x174
-MACHO-NEXT:     RelocationCount: 4
-MACHO-NEXT:     Type: 0x0
-MACHO-NEXT:     Attributes [ (0x800004)
-MACHO-NEXT:       PureInstructions (0x800000)
-MACHO-NEXT:       SomeInstructions (0x4)
-MACHO-NEXT:     ]
-MACHO-NEXT:     Reserved1: 0x0
-MACHO-NEXT:     Reserved2: 0x0
-MACHO-NEXT:     Relocations [
-MACHO-NEXT:       0x18 GENERIC_RELOC_VANILLA _SomeOtherFunction 0x0
-MACHO-NEXT:       0x13 GENERIC_RELOC_VANILLA _puts 0x0
-MACHO-NEXT:       0xB GENERIC_RELOC_LOCAL_SECTDIFF _main 0x{{[0-9A-F]+}}
-MACHO-NEXT:       0x0 GENERIC_RELOC_PAIR _main 0x{{[0-9A-F]+}}
-MACHO-NEXT:     ]
-MACHO-NEXT:     Symbols [
-MACHO-NEXT:       Symbol {
-MACHO-NEXT:         Name: _main (1)
-MACHO-NEXT:         Type: 0xF
-MACHO-NEXT:         Section: __text (0x1)
-MACHO-NEXT:         RefType: UndefinedNonLazy (0x0)
-MACHO-NEXT:         Flags [ (0x0)
-MACHO-NEXT:         ]
-MACHO-NEXT:         Value: 0x0
-MACHO-NEXT:       }
-MACHO-NEXT:     ]
-MACHO-NEXT:     SectionData (
-MACHO-NEXT:       0000: 83EC0CE8 00000000 588D801A 00000089  |........X.......|
-MACHO-NEXT:       0010: 0424E8E9 FFFFFFE8 E4FFFFFF 31C083C4  |.$..........1...|
-MACHO-NEXT:       0020: 0CC3                                 |..|
-MACHO-NEXT:     )
-MACHO-NEXT:   }
+MACHO-I386:      Sections [
+MACHO-I386-NEXT:   Section {
+MACHO-I386-NEXT:     Index: 0
+MACHO-I386-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-I386-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-I386-NEXT:     Address: 0x0
+MACHO-I386-NEXT:     Size: 0x22
+MACHO-I386-NEXT:     Offset: 324
+MACHO-I386-NEXT:     Alignment: 4
+MACHO-I386-NEXT:     RelocationOffset: 0x174
+MACHO-I386-NEXT:     RelocationCount: 4
+MACHO-I386-NEXT:     Type: 0x0
+MACHO-I386-NEXT:     Attributes [ (0x800004)
+MACHO-I386-NEXT:       PureInstructions (0x800000)
+MACHO-I386-NEXT:       SomeInstructions (0x4)
+MACHO-I386-NEXT:     ]
+MACHO-I386-NEXT:     Reserved1: 0x0
+MACHO-I386-NEXT:     Reserved2: 0x0
+MACHO-I386-NEXT:     Relocations [
+MACHO-I386-NEXT:       0x18 1 2 1 GENERIC_RELOC_VANILLA 0 _SomeOtherFunction
+MACHO-I386-NEXT:       0x13 1 2 1 GENERIC_RELOC_VANILLA 0 _puts
+MACHO-I386-NEXT:       0xB 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 -
+MACHO-I386-NEXT:       0x0 0 2 n/a GENERIC_RELOC_PAIR 1 -
+MACHO-I386-NEXT:     ]
+MACHO-I386-NEXT:     Symbols [
+MACHO-I386-NEXT:       Symbol {
+MACHO-I386-NEXT:         Name: _main (1)
+MACHO-I386-NEXT:         Type: 0xF
+MACHO-I386-NEXT:         Section: __text (0x1)
+MACHO-I386-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-I386-NEXT:         Flags [ (0x0)
+MACHO-I386-NEXT:         ]
+MACHO-I386-NEXT:         Value: 0x0
+MACHO-I386-NEXT:       }
+MACHO-I386-NEXT:     ]
+MACHO-I386-NEXT:     SectionData (
+MACHO-I386-NEXT:       0000: 83EC0CE8 00000000 588D801A 00000089  |........X.......|
+MACHO-I386-NEXT:       0010: 0424E8E9 FFFFFFE8 E4FFFFFF 31C083C4  |.$..........1...|
+MACHO-I386-NEXT:       0020: 0CC3                                 |..|
+MACHO-I386-NEXT:     )
+MACHO-I386-NEXT:   }
+
+
+MACHO-X86-64:     Sections [
+MACHO-X86-64-NEXT:  Section {
+MACHO-X86-64-NEXT:    Index: 0
+MACHO-X86-64-NEXT:    Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Address: 0x0
+MACHO-X86-64-NEXT:    Size: 0x16
+MACHO-X86-64-NEXT:    Offset: 368
+MACHO-X86-64-NEXT:    Alignment: 4
+MACHO-X86-64-NEXT:    RelocationOffset: 0x194
+MACHO-X86-64-NEXT:    RelocationCount: 3
+MACHO-X86-64-NEXT:    Type: 0x0
+MACHO-X86-64-NEXT:    Attributes [ (0x800004)
+MACHO-X86-64-NEXT:      PureInstructions (0x800000)
+MACHO-X86-64-NEXT:      SomeInstructions (0x4)
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    Reserved1: 0x0
+MACHO-X86-64-NEXT:    Reserved2: 0x0
+MACHO-X86-64-NEXT:    Relocations [
+MACHO-X86-64-NEXT:      0xE 1 2 1 X86_64_RELOC_BRANCH 0 _SomeOtherFunction
+MACHO-X86-64-NEXT:      0x9 1 2 1 X86_64_RELOC_BRANCH 0 _puts
+MACHO-X86-64-NEXT:      0x4 1 2 1 X86_64_RELOC_SIGNED 0 L_.str
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    Symbols [
+MACHO-X86-64-NEXT:      Symbol {
+MACHO-X86-64-NEXT:        Name: _main (1)
+MACHO-X86-64-NEXT:        Type: 0xF
+MACHO-X86-64-NEXT:        Section: __text (0x1)
+MACHO-X86-64-NEXT:        RefType: UndefinedNonLazy (0x0)
+MACHO-X86-64-NEXT:        Flags [ (0x0)
+MACHO-X86-64-NEXT:        ]
+MACHO-X86-64-NEXT:        Value: 0x0
+MACHO-X86-64-NEXT:      }
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    SectionData (
+MACHO-X86-64-NEXT:      0000: 50488D3D 00000000 E8000000 00E80000  |PH.=............|
+MACHO-X86-64-NEXT:      0010: 000031C0 5AC3                        |..1.Z.|
+MACHO-X86-64-NEXT:    )
+MACHO-X86-64-NEXT:  }
+MACHO-X86-64-NEXT:  Section {
+MACHO-X86-64-NEXT:    Index: 1
+MACHO-X86-64-NEXT:    Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Address: 0x16
+MACHO-X86-64-NEXT:    Size: 0xD
+MACHO-X86-64-NEXT:    Offset: 390
+MACHO-X86-64-NEXT:    Alignment: 0
+MACHO-X86-64-NEXT:    RelocationOffset: 0x0
+MACHO-X86-64-NEXT:    RelocationCount: 0
+MACHO-X86-64-NEXT:    Type: ExtReloc (0x2)
+MACHO-X86-64-NEXT:    Attributes [ (0x0)
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    Reserved1: 0x0
+MACHO-X86-64-NEXT:    Reserved2: 0x0
+MACHO-X86-64-NEXT:    Relocations [
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    Symbols [
+MACHO-X86-64-NEXT:      Symbol {
+MACHO-X86-64-NEXT:        Name: L_.str (32)
+MACHO-X86-64-NEXT:        Type: Section (0xE)
+MACHO-X86-64-NEXT:        Section: __cstring (0x2)
+MACHO-X86-64-NEXT:        RefType: UndefinedNonLazy (0x0)
+MACHO-X86-64-NEXT:        Flags [ (0x0)
+MACHO-X86-64-NEXT:        ]
+MACHO-X86-64-NEXT:        Value: 0x16
+MACHO-X86-64-NEXT:      }
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    SectionData (
+MACHO-X86-64-NEXT:      0000: 48656C6C 6F20576F 726C640A 00        |Hello World..|
+MACHO-X86-64-NEXT:    )
+MACHO-X86-64-NEXT:  }
+MACHO-X86-64-NEXT:]
+
+MACHO-PPC: Sections [
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 0
+MACHO-PPC-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x0
+MACHO-PPC-NEXT:     Size: 0x3C
+MACHO-PPC-NEXT:     Offset: 528
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x27C
+MACHO-PPC-NEXT:     RelocationCount: 5
+MACHO-PPC-NEXT:     Type: 0x0
+MACHO-PPC-NEXT:     Attributes [ (0x800004)
+MACHO-PPC-NEXT:       PureInstructions (0x800000)
+MACHO-PPC-NEXT:       SomeInstructions (0x4)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x0
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:     Relocations [
+MACHO-PPC-NEXT:       0x24 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 -
+MACHO-PPC-NEXT:       0x0 0 2 n/a PPC_RELOC_PAIR 1 -
+MACHO-PPC-NEXT:       0x1C 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 -
+MACHO-PPC-NEXT:       0x58 0 2 n/a PPC_RELOC_PAIR 1 -
+MACHO-PPC-NEXT:       0x18 1 2 0 PPC_RELOC_BR24 0 -
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Symbols [
+MACHO-PPC-NEXT:       Symbol {
+MACHO-PPC-NEXT:         Name: _f (4)
+MACHO-PPC-NEXT:         Type: 0xF
+MACHO-PPC-NEXT:         Section: __text (0x1)
+MACHO-PPC-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-PPC-NEXT:         Flags [ (0x0)
+MACHO-PPC-NEXT:         ]
+MACHO-PPC-NEXT:         Value: 0x0
+MACHO-PPC-NEXT:       }
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     SectionData (
+MACHO-PPC-NEXT:       0000: 7C0802A6 93E1FFFC 429F0005 7FE802A6  ||.......B.......|
+MACHO-PPC-NEXT:       0010: 90010008 9421FFB0 48000029 3C5F0000  |.....!..H..)<_..|
+MACHO-PPC-NEXT:       0020: 38210050 80420058 80010008 83E1FFFC  |8!.P.B.X........|
+MACHO-PPC-NEXT:       0030: 7C0803A6 80620000 4E800020           ||....b..N.. |
+MACHO-PPC-NEXT:     )
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 1
+MACHO-PPC-NEXT:     Name: __picsymbolstub1 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 31)
+MACHO-PPC-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x40
+MACHO-PPC-NEXT:     Size: 0x20
+MACHO-PPC-NEXT:     Offset: 592
+MACHO-PPC-NEXT:     Alignment: 5
+MACHO-PPC-NEXT:     RelocationOffset: 0x2A4
+MACHO-PPC-NEXT:     RelocationCount: 4
+MACHO-PPC-NEXT:     Type: 0x8
+MACHO-PPC-NEXT:     Attributes [ (0x800004)
+MACHO-PPC-NEXT:       PureInstructions (0x800000)
+MACHO-PPC-NEXT:       SomeInstructions (0x4)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x0
+MACHO-PPC-NEXT:     Reserved2: 0x20
+MACHO-PPC-NEXT:     Relocations [
+MACHO-PPC-NEXT:       0x14 0 2 n/a PPC_RELOC_LO16_SECTDIFF 1 -
+MACHO-PPC-NEXT:       0x0 0 2 n/a PPC_RELOC_PAIR 1 -
+MACHO-PPC-NEXT:       0xC 0 2 n/a PPC_RELOC_HA16_SECTDIFF 1 -
+MACHO-PPC-NEXT:       0x20 0 2 n/a PPC_RELOC_PAIR 1 -
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Symbols [
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     SectionData (
+MACHO-PPC-NEXT:       0000: 7C0802A6 429F0005 7D6802A6 3D6B0000  ||...B...}h..=k..|
+MACHO-PPC-NEXT:       0010: 7C0803A6 858B0020 7D8903A6 4E800420  ||...... }...N.. |
+MACHO-PPC-NEXT:     )
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 2
+MACHO-PPC-NEXT:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x60
+MACHO-PPC-NEXT:     Size: 0x4
+MACHO-PPC-NEXT:     Offset: 624
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x0
+MACHO-PPC-NEXT:     RelocationCount: 0
+MACHO-PPC-NEXT:     Type: 0x0
+MACHO-PPC-NEXT:     Attributes [ (0x0)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x0
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:     Relocations [
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Symbols [
+MACHO-PPC-NEXT:       Symbol {
+MACHO-PPC-NEXT:         Name: _b (1)
+MACHO-PPC-NEXT:         Type: 0xF
+MACHO-PPC-NEXT:         Section: __data (0x3)
+MACHO-PPC-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-PPC-NEXT:         Flags [ (0x0)
+MACHO-PPC-NEXT:         ]
+MACHO-PPC-NEXT:         Value: 0x60
+MACHO-PPC-NEXT:       }
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     SectionData (
+MACHO-PPC-NEXT:       0000: 0000002A                             |...*|
+MACHO-PPC-NEXT:     )
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 3
+MACHO-PPC-NEXT:     Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x64
+MACHO-PPC-NEXT:     Size: 0x4
+MACHO-PPC-NEXT:     Offset: 628
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x0
+MACHO-PPC-NEXT:     RelocationCount: 0
+MACHO-PPC-NEXT:     Type: 0x6
+MACHO-PPC-NEXT:     Attributes [ (0x0)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x1
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:     Relocations [
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Symbols [
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     SectionData (
+MACHO-PPC-NEXT:       0000: 00000000                             |....|
+MACHO-PPC-NEXT:     )
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 4
+MACHO-PPC-NEXT:     Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x68
+MACHO-PPC-NEXT:     Size: 0x4
+MACHO-PPC-NEXT:     Offset: 632
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x2C4
+MACHO-PPC-NEXT:     RelocationCount: 1
+MACHO-PPC-NEXT:     Type: 0x7
+MACHO-PPC-NEXT:     Attributes [ (0x0)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x2
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:     Relocations [
+MACHO-PPC-NEXT:       0x0 0 2 1 PPC_RELOC_VANILLA 0 dyld_stub_binding_helper
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Symbols [
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     SectionData (
+MACHO-PPC-NEXT:       0000: 00000000                             |....|
+MACHO-PPC-NEXT:     )
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT: ]
+
+
+MACHO-PPC64:  Sections [
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 0
+MACHO-PPC64-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x0
+MACHO-PPC64-NEXT:     Size: 0x3C
+MACHO-PPC64-NEXT:     Offset: 608
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x2D4
+MACHO-PPC64-NEXT:     RelocationCount: 5
+MACHO-PPC64-NEXT:     Type: 0x0
+MACHO-PPC64-NEXT:     Attributes [ (0x800004)
+MACHO-PPC64-NEXT:       PureInstructions (0x800000)
+MACHO-PPC64-NEXT:       SomeInstructions (0x4)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x0
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Relocations [
+MACHO-PPC64-NEXT:       0x24 0 2 n/a 1 -
+MACHO-PPC64-NEXT:       0x0 0 2 n/a 1 -
+MACHO-PPC64-NEXT:       0x1C 0 2 n/a 1 -
+MACHO-PPC64-NEXT:       0x58 0 2 n/a 1 -
+MACHO-PPC64-NEXT:       0x18 1 2 0 0 -
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Symbols [
+MACHO-PPC64-NEXT:       Symbol {
+MACHO-PPC64-NEXT:         Name: _f (4)
+MACHO-PPC64-NEXT:         Type: 0xF
+MACHO-PPC64-NEXT:         Section: __text (0x1)
+MACHO-PPC64-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-PPC64-NEXT:         Flags [ (0x0)
+MACHO-PPC64-NEXT:         ]
+MACHO-PPC64-NEXT:         Value: 0x0
+MACHO-PPC64-NEXT:       }
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     SectionData (
+MACHO-PPC64-NEXT:       0000: 7C0802A6 FBE1FFF8 429F0005 7FE802A6  ||.......B.......|
+MACHO-PPC64-NEXT:       0010: F8010010 F821FF81 48000029 3C5F0000  |.....!..H..)<_..|
+MACHO-PPC64-NEXT:       0020: 38210080 E8420058 E8010010 EBE1FFF8  |8!...B.X........|
+MACHO-PPC64-NEXT:       0030: 7C0803A6 E8620002 4E800020           ||....b..N.. |
+MACHO-PPC64-NEXT:     )
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 1
+MACHO-PPC64-NEXT:     Name: __picsymbolstub1 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 31)
+MACHO-PPC64-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x40
+MACHO-PPC64-NEXT:     Size: 0x20
+MACHO-PPC64-NEXT:     Offset: 672
+MACHO-PPC64-NEXT:     Alignment: 5
+MACHO-PPC64-NEXT:     RelocationOffset: 0x2FC
+MACHO-PPC64-NEXT:     RelocationCount: 4
+MACHO-PPC64-NEXT:     Type: 0x8
+MACHO-PPC64-NEXT:     Attributes [ (0x800004)
+MACHO-PPC64-NEXT:       PureInstructions (0x800000)
+MACHO-PPC64-NEXT:       SomeInstructions (0x4)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x0
+MACHO-PPC64-NEXT:     Reserved2: 0x20
+MACHO-PPC64-NEXT:     Relocations [
+MACHO-PPC64-NEXT:       0x14 0 2 n/a 1 -
+MACHO-PPC64-NEXT:       0x0 0 2 n/a 1 -
+MACHO-PPC64-NEXT:       0xC 0 2 n/a 1 -
+MACHO-PPC64-NEXT:       0x24 0 2 n/a 1 -
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Symbols [
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     SectionData (
+MACHO-PPC64-NEXT:       0000: 7C0802A6 429F0005 7D6802A6 3D6B0000  ||...B...}h..=k..|
+MACHO-PPC64-NEXT:       0010: 7C0803A6 E98B0025 7D8903A6 4E800420  ||......%}...N.. |
+MACHO-PPC64-NEXT:     )
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 2
+MACHO-PPC64-NEXT:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x60
+MACHO-PPC64-NEXT:     Size: 0x4
+MACHO-PPC64-NEXT:     Offset: 704
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x0
+MACHO-PPC64-NEXT:     RelocationCount: 0
+MACHO-PPC64-NEXT:     Type: 0x0
+MACHO-PPC64-NEXT:     Attributes [ (0x0)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x0
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Relocations [
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Symbols [
+MACHO-PPC64-NEXT:       Symbol {
+MACHO-PPC64-NEXT:         Name: _b (1)
+MACHO-PPC64-NEXT:         Type: 0xF
+MACHO-PPC64-NEXT:         Section: __data (0x3)
+MACHO-PPC64-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-PPC64-NEXT:         Flags [ (0x0)
+MACHO-PPC64-NEXT:         ]
+MACHO-PPC64-NEXT:         Value: 0x60
+MACHO-PPC64-NEXT:       }
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     SectionData (
+MACHO-PPC64-NEXT:       0000: 0000002A                             |...*|
+MACHO-PPC64-NEXT:     )
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 3
+MACHO-PPC64-NEXT:     Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC64-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x64
+MACHO-PPC64-NEXT:     Size: 0x8
+MACHO-PPC64-NEXT:     Offset: 708
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x0
+MACHO-PPC64-NEXT:     RelocationCount: 0
+MACHO-PPC64-NEXT:     Type: 0x6
+MACHO-PPC64-NEXT:     Attributes [ (0x0)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x1
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Relocations [
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Symbols [
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     SectionData (
+MACHO-PPC64-NEXT:       0000: 00000000 00000000                    |........|
+MACHO-PPC64-NEXT:     )
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 4
+MACHO-PPC64-NEXT:     Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC64-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x6C
+MACHO-PPC64-NEXT:     Size: 0x8
+MACHO-PPC64-NEXT:     Offset: 716
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x31C
+MACHO-PPC64-NEXT:     RelocationCount: 1
+MACHO-PPC64-NEXT:     Type: 0x7
+MACHO-PPC64-NEXT:     Attributes [ (0x0)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x2
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Relocations [
+MACHO-PPC64-NEXT:       0x0 0 3 1 0 dyld_stub_binding_helper
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Symbols [
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     SectionData (
+MACHO-PPC64-NEXT:       0000: 00000000 00000000                    |........|
+MACHO-PPC64-NEXT:     )
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT: ]
+
+MACHO-ARM:      Sections [
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 0
+MACHO-ARM-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x0
+MACHO-ARM-NEXT:     Size: 0x3C
+MACHO-ARM-NEXT:     Offset: 664
+MACHO-ARM-NEXT:     Alignment: 2
+MACHO-ARM-NEXT:     RelocationOffset: 0x2E0
+MACHO-ARM-NEXT:     RelocationCount: 9
+MACHO-ARM-NEXT:     Type: 0x0
+MACHO-ARM-NEXT:     Attributes [ (0x800004)
+MACHO-ARM-NEXT:       PureInstructions (0x800000)
+MACHO-ARM-NEXT:       SomeInstructions (0x4)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x0
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x38
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 2
+MACHO-ARM-NEXT:         Extern: N/A
+MACHO-ARM-NEXT:         Type: ARM_RELOC_SECTDIFF (2)
+MACHO-ARM-NEXT:         Symbol: -
+MACHO-ARM-NEXT:         Scattered: 1
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x0
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 2
+MACHO-ARM-NEXT:         Extern: N/A
+MACHO-ARM-NEXT:         Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:         Symbol: -
+MACHO-ARM-NEXT:         Scattered: 1
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x20
+MACHO-ARM-NEXT:         PCRel: 1
+MACHO-ARM-NEXT:         Length: 2
+MACHO-ARM-NEXT:         Extern: 1
+MACHO-ARM-NEXT:         Type: ARM_RELOC_BR24 (5)
+MACHO-ARM-NEXT:         Symbol: _g
+MACHO-ARM-NEXT:         Scattered: 0
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x1C
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 1
+MACHO-ARM-NEXT:         Extern: 1
+MACHO-ARM-NEXT:         Type: ARM_RELOC_HALF (8)
+MACHO-ARM-NEXT:         Symbol: _g
+MACHO-ARM-NEXT:         Scattered: 0
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x0
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 1
+MACHO-ARM-NEXT:         Extern: 0
+MACHO-ARM-NEXT:         Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:         Symbol: -
+MACHO-ARM-NEXT:         Scattered: 0
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x18
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 0
+MACHO-ARM-NEXT:         Extern: 1
+MACHO-ARM-NEXT:         Type: ARM_RELOC_HALF (8)
+MACHO-ARM-NEXT:         Symbol: _g
+MACHO-ARM-NEXT:         Scattered: 0
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x0
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 0
+MACHO-ARM-NEXT:         Extern: 0
+MACHO-ARM-NEXT:         Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:         Symbol: -
+MACHO-ARM-NEXT:         Scattered: 0
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0xC
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 2
+MACHO-ARM-NEXT:         Extern: N/A
+MACHO-ARM-NEXT:         Type: ARM_RELOC_SECTDIFF (2)
+MACHO-ARM-NEXT:         Symbol: -
+MACHO-ARM-NEXT:         Scattered: 1
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Relocation {
+MACHO-ARM-NEXT:         Offset: 0x0
+MACHO-ARM-NEXT:         PCRel: 0
+MACHO-ARM-NEXT:         Length: 2
+MACHO-ARM-NEXT:         Extern: N/A
+MACHO-ARM-NEXT:         Type: ARM_RELOC_PAIR (1)
+MACHO-ARM-NEXT:         Symbol: -
+MACHO-ARM-NEXT:         Scattered: 1
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:       Symbol {
+MACHO-ARM-NEXT:         Name: _f (4)
+MACHO-ARM-NEXT:         Type: 0xF
+MACHO-ARM-NEXT:         Section: __text (0x1)
+MACHO-ARM-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-ARM-NEXT:         Flags [ (0x0)
+MACHO-ARM-NEXT:         ]
+MACHO-ARM-NEXT:         Value: 0x10
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:       Symbol {
+MACHO-ARM-NEXT:         Name: _h (1)
+MACHO-ARM-NEXT:         Type: 0xF
+MACHO-ARM-NEXT:         Section: __text (0x1)
+MACHO-ARM-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-ARM-NEXT:         Flags [ (0x0)
+MACHO-ARM-NEXT:         ]
+MACHO-ARM-NEXT:         Value: 0x0
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:       0000: 04009FE5 00009FE7 1EFF2FE1 38000000  |........../.8...|
+MACHO-ARM-NEXT:       0010: 80402DE9 0D70A0E1 000000E3 000040E3  |.@-..p........@.|
+MACHO-ARM-NEXT:       0020: F6FFFFEB 0C009FE5 00009FE7 000090E5  |................|
+MACHO-ARM-NEXT:       0030: 8040BDE8 1EFF2FE1 10000000           |.@..../.....|
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 1
+MACHO-ARM-NEXT:     Name: __textcoal_nt (5F 5F 74 65 78 74 63 6F 61 6C 5F 6E 74 00 00 00)
+MACHO-ARM-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x3C
+MACHO-ARM-NEXT:     Size: 0x0
+MACHO-ARM-NEXT:     Offset: 724
+MACHO-ARM-NEXT:     Alignment: 0
+MACHO-ARM-NEXT:     RelocationOffset: 0x0
+MACHO-ARM-NEXT:     RelocationCount: 0
+MACHO-ARM-NEXT:     Type: 0xB
+MACHO-ARM-NEXT:     Attributes [ (0x800000)
+MACHO-ARM-NEXT:       PureInstructions (0x800000)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x0
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 2
+MACHO-ARM-NEXT:     Name: __const_coal (5F 5F 63 6F 6E 73 74 5F 63 6F 61 6C 00 00 00 00)
+MACHO-ARM-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x3C
+MACHO-ARM-NEXT:     Size: 0x0
+MACHO-ARM-NEXT:     Offset: 724
+MACHO-ARM-NEXT:     Alignment: 0
+MACHO-ARM-NEXT:     RelocationOffset: 0x0
+MACHO-ARM-NEXT:     RelocationCount: 0
+MACHO-ARM-NEXT:     Type: 0xB
+MACHO-ARM-NEXT:     Attributes [ (0x0)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x0
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 3
+MACHO-ARM-NEXT:     Name: __picsymbolstub4 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 34)
+MACHO-ARM-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x3C
+MACHO-ARM-NEXT:     Size: 0x0
+MACHO-ARM-NEXT:     Offset: 724
+MACHO-ARM-NEXT:     Alignment: 0
+MACHO-ARM-NEXT:     RelocationOffset: 0x0
+MACHO-ARM-NEXT:     RelocationCount: 0
+MACHO-ARM-NEXT:     Type: 0x8
+MACHO-ARM-NEXT:     Attributes [ (0x0)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x10
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 4
+MACHO-ARM-NEXT:     Name: __StaticInit (5F 5F 53 74 61 74 69 63 49 6E 69 74 00 00 00 00)
+MACHO-ARM-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x3C
+MACHO-ARM-NEXT:     Size: 0x0
+MACHO-ARM-NEXT:     Offset: 724
+MACHO-ARM-NEXT:     Alignment: 0
+MACHO-ARM-NEXT:     RelocationOffset: 0x0
+MACHO-ARM-NEXT:     RelocationCount: 0
+MACHO-ARM-NEXT:     Type: 0x0
+MACHO-ARM-NEXT:     Attributes [ (0x800000)
+MACHO-ARM-NEXT:       PureInstructions (0x800000)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x0
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 5
+MACHO-ARM-NEXT:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x3C
+MACHO-ARM-NEXT:     Size: 0x4
+MACHO-ARM-NEXT:     Offset: 724
+MACHO-ARM-NEXT:     Alignment: 2
+MACHO-ARM-NEXT:     RelocationOffset: 0x0
+MACHO-ARM-NEXT:     RelocationCount: 0
+MACHO-ARM-NEXT:     Type: 0x0
+MACHO-ARM-NEXT:     Attributes [ (0x0)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x0
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:       Symbol {
+MACHO-ARM-NEXT:         Name: _b (10)
+MACHO-ARM-NEXT:         Type: 0xF
+MACHO-ARM-NEXT:         Section: __data (0x6)
+MACHO-ARM-NEXT:         RefType: UndefinedNonLazy (0x0)
+MACHO-ARM-NEXT:         Flags [ (0x0)
+MACHO-ARM-NEXT:         ]
+MACHO-ARM-NEXT:         Value: 0x3C
+MACHO-ARM-NEXT:       }
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:       0000: 2A000000                             |*...|
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:     Index: 6
+MACHO-ARM-NEXT:     Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-ARM-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:     Address: 0x40
+MACHO-ARM-NEXT:     Size: 0x8
+MACHO-ARM-NEXT:     Offset: 728
+MACHO-ARM-NEXT:     Alignment: 2
+MACHO-ARM-NEXT:     RelocationOffset: 0x0
+MACHO-ARM-NEXT:     RelocationCount: 0
+MACHO-ARM-NEXT:     Type: 0x6
+MACHO-ARM-NEXT:     Attributes [ (0x0)
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Reserved1: 0x0
+MACHO-ARM-NEXT:     Reserved2: 0x0
+MACHO-ARM-NEXT:     Relocations [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     Symbols [
+MACHO-ARM-NEXT:     ]
+MACHO-ARM-NEXT:     SectionData (
+MACHO-ARM-NEXT:       0000: 00000000 00000000                    |........|
+MACHO-ARM-NEXT:     )
+MACHO-ARM-NEXT:   }
+MACHO-ARM-NEXT: ]
diff --git a/test/tools/llvm-readobj/sections.test b/test/tools/llvm-readobj/sections.test
index e5c6c06..16f1131 100644
--- a/test/tools/llvm-readobj/sections.test
+++ b/test/tools/llvm-readobj/sections.test
@@ -3,7 +3,15 @@ RUN:   | FileCheck %s -check-prefix COFF
 RUN: llvm-readobj -s %p/Inputs/trivial.obj.elf-i386 \
 RUN:   | FileCheck %s -check-prefix ELF
 RUN: llvm-readobj -s %p/Inputs/trivial.obj.macho-i386 \
-RUN:   | FileCheck %s -check-prefix MACHO
+RUN:   | FileCheck %s -check-prefix MACHO-I386
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.macho-x86-64 \
+RUN:   | FileCheck %s -check-prefix MACHO-X86-64
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.macho-ppc \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.macho-ppc64 \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC64
+RUN: llvm-readobj -s %p/Inputs/trivial.obj.macho-arm \
+RUN:   | FileCheck %s -check-prefix MACHO-ARM
 
 COFF:      Sections [
 COFF-NEXT:   Section {
@@ -76,38 +84,369 @@ ELF-NEXT:     AddressAlignment: 16
 ELF-NEXT:     EntrySize: 0
 ELF-NEXT:   }
 
-MACHO:      Sections [
-MACHO-NEXT:   Section {
-MACHO-NEXT:     Index: 0
-MACHO-NEXT:     Name: __text (
-MACHO-NEXT:     Segment: 
-MACHO-NEXT:     Address: 0x0
-MACHO-NEXT:     Size: 0x22
-MACHO-NEXT:     Offset: 324
-MACHO-NEXT:     Alignment: 4
-MACHO-NEXT:     RelocationOffset: 0x174
-MACHO-NEXT:     RelocationCount: 4
-MACHO-NEXT:     Type: 0x0
-MACHO-NEXT:     Attributes [ (0x800004)
-MACHO-NEXT:       PureInstructions (0x800000)
-MACHO-NEXT:       SomeInstructions (0x4)
-MACHO-NEXT:     ]
-MACHO-NEXT:     Reserved1: 0x0
-MACHO-NEXT:     Reserved2: 0x0
-MACHO-NEXT:   }
-MACHO-NEXT:   Section {
-MACHO-NEXT:     Index: 1
-MACHO-NEXT:     Name: __cstring (
-MACHO-NEXT:     Segment:
-MACHO-NEXT:     Address: 0x22
-MACHO-NEXT:     Size: 0xD
-MACHO-NEXT:     Offset: 358
-MACHO-NEXT:     Alignment: 0
-MACHO-NEXT:     RelocationOffset: 0x0
-MACHO-NEXT:     RelocationCount: 0
-MACHO-NEXT:     Type: ExtReloc (0x2)
-MACHO-NEXT:     Attributes [ (0x0)
-MACHO-NEXT:     ]
-MACHO-NEXT:     Reserved1: 0x0
-MACHO-NEXT:     Reserved2: 0x0
-MACHO-NEXT:   }
+MACHO-I386:      Sections [
+MACHO-I386-NEXT:   Section {
+MACHO-I386-NEXT:     Index: 0
+MACHO-I386-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-I386-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-I386-NEXT:     Address: 0x0
+MACHO-I386-NEXT:     Size: 0x22
+MACHO-I386-NEXT:     Offset: 324
+MACHO-I386-NEXT:     Alignment: 4
+MACHO-I386-NEXT:     RelocationOffset: 0x174
+MACHO-I386-NEXT:     RelocationCount: 4
+MACHO-I386-NEXT:     Type: 0x0
+MACHO-I386-NEXT:     Attributes [ (0x800004)
+MACHO-I386-NEXT:       PureInstructions (0x800000)
+MACHO-I386-NEXT:       SomeInstructions (0x4)
+MACHO-I386-NEXT:     ]
+MACHO-I386-NEXT:     Reserved1: 0x0
+MACHO-I386-NEXT:     Reserved2: 0x0
+MACHO-I386-NEXT:   }
+MACHO-I386-NEXT:   Section {
+MACHO-I386-NEXT:     Index: 1
+MACHO-I386-NEXT:     Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+MACHO-I386-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-I386-NEXT:     Address: 0x22
+MACHO-I386-NEXT:     Size: 0xD
+MACHO-I386-NEXT:     Offset: 358
+MACHO-I386-NEXT:     Alignment: 0
+MACHO-I386-NEXT:     RelocationOffset: 0x0
+MACHO-I386-NEXT:     RelocationCount: 0
+MACHO-I386-NEXT:     Type: ExtReloc (0x2)
+MACHO-I386-NEXT:     Attributes [ (0x0)
+MACHO-I386-NEXT:     ]
+MACHO-I386-NEXT:     Reserved1: 0x0
+MACHO-I386-NEXT:     Reserved2: 0x0
+MACHO-I386-NEXT:   }
+
+
+MACHO-X86-64:     Sections [
+MACHO-X86-64-NEXT:  Section {
+MACHO-X86-64-NEXT:    Index: 0
+MACHO-X86-64-NEXT:    Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Address: 0x0
+MACHO-X86-64-NEXT:    Size: 0x16
+MACHO-X86-64-NEXT:    Offset: 368
+MACHO-X86-64-NEXT:    Alignment: 4
+MACHO-X86-64-NEXT:    RelocationOffset: 0x194
+MACHO-X86-64-NEXT:    RelocationCount: 3
+MACHO-X86-64-NEXT:    Type: 0x0
+MACHO-X86-64-NEXT:    Attributes [ (0x800004)
+MACHO-X86-64-NEXT:      PureInstructions (0x800000)
+MACHO-X86-64-NEXT:      SomeInstructions (0x4)
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    Reserved1: 0x0
+MACHO-X86-64-NEXT:    Reserved2: 0x0
+MACHO-X86-64-NEXT:  }
+MACHO-X86-64-NEXT:  Section {
+MACHO-X86-64-NEXT:    Index: 1
+MACHO-X86-64-NEXT:    Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-X86-64-NEXT:    Address: 0x16
+MACHO-X86-64-NEXT:    Size: 0xD
+MACHO-X86-64-NEXT:    Offset: 390
+MACHO-X86-64-NEXT:    Alignment: 0
+MACHO-X86-64-NEXT:    RelocationOffset: 0x0
+MACHO-X86-64-NEXT:    RelocationCount: 0
+MACHO-X86-64-NEXT:    Type: ExtReloc (0x2)
+MACHO-X86-64-NEXT:    Attributes [ (0x0)
+MACHO-X86-64-NEXT:    ]
+MACHO-X86-64-NEXT:    Reserved1: 0x0
+MACHO-X86-64-NEXT:    Reserved2: 0x0
+MACHO-X86-64-NEXT:  }
+MACHO-X86-64-NEXT:]
+
+MACHO-PPC: Sections [
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 0
+MACHO-PPC-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x0
+MACHO-PPC-NEXT:     Size: 0x3C
+MACHO-PPC-NEXT:     Offset: 528
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x27C
+MACHO-PPC-NEXT:     RelocationCount: 5
+MACHO-PPC-NEXT:     Type: 0x0
+MACHO-PPC-NEXT:     Attributes [ (0x800004)
+MACHO-PPC-NEXT:       PureInstructions (0x800000)
+MACHO-PPC-NEXT:       SomeInstructions (0x4)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x0
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 1
+MACHO-PPC-NEXT:     Name: __picsymbolstub1 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 31)
+MACHO-PPC-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x40
+MACHO-PPC-NEXT:     Size: 0x20
+MACHO-PPC-NEXT:     Offset: 592
+MACHO-PPC-NEXT:     Alignment: 5
+MACHO-PPC-NEXT:     RelocationOffset: 0x2A4
+MACHO-PPC-NEXT:     RelocationCount: 4
+MACHO-PPC-NEXT:     Type: 0x8
+MACHO-PPC-NEXT:     Attributes [ (0x800004)
+MACHO-PPC-NEXT:       PureInstructions (0x800000)
+MACHO-PPC-NEXT:       SomeInstructions (0x4)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x0
+MACHO-PPC-NEXT:     Reserved2: 0x20
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 2
+MACHO-PPC-NEXT:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x60
+MACHO-PPC-NEXT:     Size: 0x4
+MACHO-PPC-NEXT:     Offset: 624
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x0
+MACHO-PPC-NEXT:     RelocationCount: 0
+MACHO-PPC-NEXT:     Type: 0x0
+MACHO-PPC-NEXT:     Attributes [ (0x0)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x0
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 3
+MACHO-PPC-NEXT:     Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x64
+MACHO-PPC-NEXT:     Size: 0x4
+MACHO-PPC-NEXT:     Offset: 628
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x0
+MACHO-PPC-NEXT:     RelocationCount: 0
+MACHO-PPC-NEXT:     Type: 0x6
+MACHO-PPC-NEXT:     Attributes [ (0x0)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x1
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT:   Section {
+MACHO-PPC-NEXT:     Index: 4
+MACHO-PPC-NEXT:     Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC-NEXT:     Address: 0x68
+MACHO-PPC-NEXT:     Size: 0x4
+MACHO-PPC-NEXT:     Offset: 632
+MACHO-PPC-NEXT:     Alignment: 2
+MACHO-PPC-NEXT:     RelocationOffset: 0x2C4
+MACHO-PPC-NEXT:     RelocationCount: 1
+MACHO-PPC-NEXT:     Type: 0x7
+MACHO-PPC-NEXT:     Attributes [ (0x0)
+MACHO-PPC-NEXT:     ]
+MACHO-PPC-NEXT:     Reserved1: 0x2
+MACHO-PPC-NEXT:     Reserved2: 0x0
+MACHO-PPC-NEXT:   }
+MACHO-PPC-NEXT: ]
+
+MACHO-PPC64: Sections [
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 0
+MACHO-PPC64-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x0
+MACHO-PPC64-NEXT:     Size: 0x3C
+MACHO-PPC64-NEXT:     Offset: 608
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x2D4
+MACHO-PPC64-NEXT:     RelocationCount: 5
+MACHO-PPC64-NEXT:     Type: 0x0
+MACHO-PPC64-NEXT:     Attributes [ (0x800004)
+MACHO-PPC64-NEXT:       PureInstructions (0x800000)
+MACHO-PPC64-NEXT:       SomeInstructions (0x4)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x0
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 1
+MACHO-PPC64-NEXT:     Name: __picsymbolstub1 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 31)
+MACHO-PPC64-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x40
+MACHO-PPC64-NEXT:     Size: 0x20
+MACHO-PPC64-NEXT:     Offset: 672
+MACHO-PPC64-NEXT:     Alignment: 5
+MACHO-PPC64-NEXT:     RelocationOffset: 0x2FC
+MACHO-PPC64-NEXT:     RelocationCount: 4
+MACHO-PPC64-NEXT:     Type: 0x8
+MACHO-PPC64-NEXT:     Attributes [ (0x800004)
+MACHO-PPC64-NEXT:       PureInstructions (0x800000)
+MACHO-PPC64-NEXT:       SomeInstructions (0x4)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x0
+MACHO-PPC64-NEXT:     Reserved2: 0x20
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 2
+MACHO-PPC64-NEXT:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x60
+MACHO-PPC64-NEXT:     Size: 0x4
+MACHO-PPC64-NEXT:     Offset: 704
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x0
+MACHO-PPC64-NEXT:     RelocationCount: 0
+MACHO-PPC64-NEXT:     Type: 0x0
+MACHO-PPC64-NEXT:     Attributes [ (0x0)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x0
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 3
+MACHO-PPC64-NEXT:     Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC64-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x64
+MACHO-PPC64-NEXT:     Size: 0x8
+MACHO-PPC64-NEXT:     Offset: 708
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x0
+MACHO-PPC64-NEXT:     RelocationCount: 0
+MACHO-PPC64-NEXT:     Type: 0x6
+MACHO-PPC64-NEXT:     Attributes [ (0x0)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x1
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT:   Section {
+MACHO-PPC64-NEXT:     Index: 4
+MACHO-PPC64-NEXT:     Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-PPC64-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-PPC64-NEXT:     Address: 0x6C
+MACHO-PPC64-NEXT:     Size: 0x8
+MACHO-PPC64-NEXT:     Offset: 716
+MACHO-PPC64-NEXT:     Alignment: 2
+MACHO-PPC64-NEXT:     RelocationOffset: 0x31C
+MACHO-PPC64-NEXT:     RelocationCount: 1
+MACHO-PPC64-NEXT:     Type: 0x7
+MACHO-PPC64-NEXT:     Attributes [ (0x0)
+MACHO-PPC64-NEXT:     ]
+MACHO-PPC64-NEXT:     Reserved1: 0x2
+MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:   }
+MACHO-PPC64-NEXT: ]
+
+MACHO-ARM:      Sections [
+MACHO-ARM-NEXT:   Section {
+MACHO-ARM-NEXT:    Index: 0
+MACHO-ARM-NEXT:    Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x0
+MACHO-ARM-NEXT:    Size: 0x3C
+MACHO-ARM-NEXT:    Offset: 664
+MACHO-ARM-NEXT:    Alignment: 2
+MACHO-ARM-NEXT:    RelocationOffset: 0x2E0
+MACHO-ARM-NEXT:    RelocationCount: 9
+MACHO-ARM-NEXT:    Type: 0x0
+MACHO-ARM-NEXT:    Attributes [ (0x800004)
+MACHO-ARM-NEXT:      PureInstructions (0x800000)
+MACHO-ARM-NEXT:      SomeInstructions (0x4)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x0
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:  Section {
+MACHO-ARM-NEXT:    Index: 1
+MACHO-ARM-NEXT:    Name: __textcoal_nt (5F 5F 74 65 78 74 63 6F 61 6C 5F 6E 74 00 00 00)
+MACHO-ARM-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x3C
+MACHO-ARM-NEXT:    Size: 0x0
+MACHO-ARM-NEXT:    Offset: 724
+MACHO-ARM-NEXT:    Alignment: 0
+MACHO-ARM-NEXT:    RelocationOffset: 0x0
+MACHO-ARM-NEXT:    RelocationCount: 0
+MACHO-ARM-NEXT:    Type: 0xB
+MACHO-ARM-NEXT:    Attributes [ (0x800000)
+MACHO-ARM-NEXT:      PureInstructions (0x800000)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x0
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:  Section {
+MACHO-ARM-NEXT:    Index: 2
+MACHO-ARM-NEXT:    Name: __const_coal (5F 5F 63 6F 6E 73 74 5F 63 6F 61 6C 00 00 00 00)
+MACHO-ARM-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x3C
+MACHO-ARM-NEXT:    Size: 0x0
+MACHO-ARM-NEXT:    Offset: 724
+MACHO-ARM-NEXT:    Alignment: 0
+MACHO-ARM-NEXT:    RelocationOffset: 0x0
+MACHO-ARM-NEXT:    RelocationCount: 0
+MACHO-ARM-NEXT:    Type: 0xB
+MACHO-ARM-NEXT:    Attributes [ (0x0)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x0
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:  Section {
+MACHO-ARM-NEXT:    Index: 3
+MACHO-ARM-NEXT:    Name: __picsymbolstub4 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 34)
+MACHO-ARM-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x3C
+MACHO-ARM-NEXT:    Size: 0x0
+MACHO-ARM-NEXT:    Offset: 724
+MACHO-ARM-NEXT:    Alignment: 0
+MACHO-ARM-NEXT:    RelocationOffset: 0x0
+MACHO-ARM-NEXT:    RelocationCount: 0
+MACHO-ARM-NEXT:    Type: 0x8
+MACHO-ARM-NEXT:    Attributes [ (0x0)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x10
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:  Section {
+MACHO-ARM-NEXT:    Index: 4
+MACHO-ARM-NEXT:    Name: __StaticInit (5F 5F 53 74 61 74 69 63 49 6E 69 74 00 00 00 00)
+MACHO-ARM-NEXT:    Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x3C
+MACHO-ARM-NEXT:    Size: 0x0
+MACHO-ARM-NEXT:    Offset: 724
+MACHO-ARM-NEXT:    Alignment: 0
+MACHO-ARM-NEXT:    RelocationOffset: 0x0
+MACHO-ARM-NEXT:    RelocationCount: 0
+MACHO-ARM-NEXT:    Type: 0x0
+MACHO-ARM-NEXT:    Attributes [ (0x800000)
+MACHO-ARM-NEXT:      PureInstructions (0x800000)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x0
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:  Section {
+MACHO-ARM-NEXT:    Index: 5
+MACHO-ARM-NEXT:    Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x3C
+MACHO-ARM-NEXT:    Size: 0x4
+MACHO-ARM-NEXT:    Offset: 724
+MACHO-ARM-NEXT:    Alignment: 2
+MACHO-ARM-NEXT:    RelocationOffset: 0x0
+MACHO-ARM-NEXT:    RelocationCount: 0
+MACHO-ARM-NEXT:    Type: 0x0
+MACHO-ARM-NEXT:    Attributes [ (0x0)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x0
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:  Section {
+MACHO-ARM-NEXT:    Index: 6
+MACHO-ARM-NEXT:    Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+MACHO-ARM-NEXT:    Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+MACHO-ARM-NEXT:    Address: 0x40
+MACHO-ARM-NEXT:    Size: 0x8
+MACHO-ARM-NEXT:    Offset: 728
+MACHO-ARM-NEXT:    Alignment: 2
+MACHO-ARM-NEXT:    RelocationOffset: 0x0
+MACHO-ARM-NEXT:    RelocationCount: 0
+MACHO-ARM-NEXT:    Type: 0x6
+MACHO-ARM-NEXT:    Attributes [ (0x0)
+MACHO-ARM-NEXT:    ]
+MACHO-ARM-NEXT:    Reserved1: 0x0
+MACHO-ARM-NEXT:    Reserved2: 0x0
+MACHO-ARM-NEXT:  }
+MACHO-ARM-NEXT:]
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 9b80ee5..6b7c884 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -44,6 +44,7 @@ add_subdirectory(llvm-mcmarkup)
 add_subdirectory(llvm-symbolizer)
 
 add_subdirectory(obj2yaml)
+add_subdirectory(yaml2obj)
 
 if( NOT WIN32 )
   add_subdirectory(lto)
diff --git a/tools/Makefile b/tools/Makefile
index b8f21d2..eaf9ed3 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -35,7 +35,7 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \
                  llvm-diff macho-dump llvm-objdump llvm-readobj \
 	         llvm-rtdyld llvm-dwarfdump llvm-cov \
 	         llvm-size llvm-stress llvm-mcmarkup \
-	         llvm-symbolizer obj2yaml
+	         llvm-symbolizer obj2yaml yaml2obj
 
 # If Intel JIT Events support is configured, build an extra tool to test it.
 ifeq ($(USE_INTEL_JITEVENTS), 1)
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index e49a96b..937d86a 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -122,7 +122,7 @@ bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
   outs() << "Read input file      : '" << Filenames[0] << "'\n";
 
   for (unsigned i = 1, e = Filenames.size(); i != e; ++i) {
-    std::auto_ptr<Module> M(ParseInputFile(Filenames[i], Context));
+    OwningPtr<Module> M(ParseInputFile(Filenames[i], Context));
     if (M.get() == 0) return true;
 
     outs() << "Linking in input file: '" << Filenames[i] << "'\n";
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 1dce9d7..8a462c6 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -200,7 +200,7 @@ int main(int argc, char **argv) {
 static int compileModule(char **argv, LLVMContext &Context) {
   // Load the module to be compiled...
   SMDiagnostic Err;
-  std::auto_ptr<Module> M;
+  OwningPtr<Module> M;
   Module *mod = 0;
   Triple TheTriple;
 
@@ -281,7 +281,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
   Options.UseInitArray = UseInitArray;
   Options.SSPBufferSize = SSPBufferSize;
 
-  std::auto_ptr<TargetMachine>
+  OwningPtr<TargetMachine>
     target(TheTarget->createTargetMachine(TheTriple.getTriple(),
                                           MCPU, FeaturesStr, Options,
                                           RelocModel, CMModel, OLvl));
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index 273c427..d6f1919 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -94,7 +94,7 @@ int main(int argc, char **argv) {
 
   // Parse the file now...
   SMDiagnostic Err;
-  std::auto_ptr<Module> M(ParseAssemblyFile(InputFilename, Err, Context));
+  OwningPtr<Module> M(ParseAssemblyFile(InputFilename, Err, Context));
   if (M.get() == 0) {
     Err.print(argv[0], errs());
     return 1;
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 2baa91d..067955e 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -123,7 +123,7 @@ int main(int argc, char **argv) {
   cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n");
 
   std::string ErrorMessage;
-  std::auto_ptr<Module> M;
+  OwningPtr<Module> M;
 
   // Use the bitcode streaming interface
   DataStreamer *streamer = getDataFileStreamer(InputFilename, &ErrorMessage);
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index fd0a381..2f45b4e 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -100,7 +100,7 @@ int main(int argc, char **argv) {
 
   // Use lazy loading, since we only care about selected global values.
   SMDiagnostic Err;
-  std::auto_ptr<Module> M;
+  OwningPtr<Module> M;
   M.reset(getLazyIRFileModule(InputFilename, Err, Context));
 
   if (M.get() == 0) {
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index 83665cc..01a61c6 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -53,13 +53,12 @@ DumpAsm("d", cl::desc("Print assembly as linked"), cl::Hidden);
 // LoadFile - Read the specified bitcode file in and return it.  This routine
 // searches the link path for the specified file to try to find it...
 //
-static inline std::auto_ptr<Module> LoadFile(const char *argv0,
-                                             const std::string &FN, 
-                                             LLVMContext& Context) {
+static inline Module *LoadFile(const char *argv0, const std::string &FN,
+                               LLVMContext& Context) {
   sys::Path Filename;
   if (!Filename.set(FN)) {
     errs() << "Invalid file name: '" << FN << "'\n";
-    return std::auto_ptr<Module>();
+    return NULL;
   }
 
   SMDiagnostic Err;
@@ -68,10 +67,10 @@ static inline std::auto_ptr<Module> LoadFile(const char *argv0,
   
   const std::string &FNStr = Filename.str();
   Result = ParseIRFile(FNStr, Err, Context);
-  if (Result) return std::auto_ptr<Module>(Result);   // Load successful!
+  if (Result) return Result;   // Load successful!
 
   Err.print(argv0, errs());
-  return std::auto_ptr<Module>();
+  return NULL;
 }
 
 int main(int argc, char **argv) {
@@ -86,17 +85,17 @@ int main(int argc, char **argv) {
   unsigned BaseArg = 0;
   std::string ErrorMessage;
 
-  std::auto_ptr<Module> Composite(LoadFile(argv[0],
-                                           InputFilenames[BaseArg], Context));
+  OwningPtr<Module> Composite(LoadFile(argv[0],
+                                       InputFilenames[BaseArg], Context));
   if (Composite.get() == 0) {
     errs() << argv[0] << ": error loading file '"
            << InputFilenames[BaseArg] << "'\n";
     return 1;
   }
 
+  Linker L(Composite.get());
   for (unsigned i = BaseArg+1; i < InputFilenames.size(); ++i) {
-    std::auto_ptr<Module> M(LoadFile(argv[0],
-                                     InputFilenames[i], Context));
+    OwningPtr<Module> M(LoadFile(argv[0], InputFilenames[i], Context));
     if (M.get() == 0) {
       errs() << argv[0] << ": error loading file '" <<InputFilenames[i]<< "'\n";
       return 1;
@@ -104,8 +103,7 @@ int main(int argc, char **argv) {
 
     if (Verbose) errs() << "Linking in '" << InputFilenames[i] << "'\n";
 
-    if (Linker::LinkModules(Composite.get(), M.get(), Linker::DestroySource,
-                            &ErrorMessage)) {
+    if (L.linkInModule(M.get(), &ErrorMessage)) {
       errs() << argv[0] << ": link error in '" << InputFilenames[i]
              << "': " << ErrorMessage << "\n";
       return 1;
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 243899b..4b01c33 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -16,7 +16,6 @@
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -27,7 +26,6 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/FormattedStream.h"
@@ -40,7 +38,6 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/system_error.h"
 using namespace llvm;
 
 static cl::opt<std::string>
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index c324ff1..6797e2d 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -27,6 +27,7 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Object/MachO.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
@@ -52,27 +53,11 @@ static cl::opt<bool>
 static cl::opt<std::string>
   DSYMFile("dsym", cl::desc("Use .dSYM file for debug info"));
 
-static const Target *GetTarget(const MachOObject *MachOObj) {
+static const Target *GetTarget(const MachOObjectFile *MachOObj) {
   // Figure out the target triple.
   if (TripleName.empty()) {
     llvm::Triple TT("unknown-unknown-unknown");
-    switch (MachOObj->getHeader().CPUType) {
-    case llvm::MachO::CPUTypeI386:
-      TT.setArch(Triple::ArchType(Triple::x86));
-      break;
-    case llvm::MachO::CPUTypeX86_64:
-      TT.setArch(Triple::ArchType(Triple::x86_64));
-      break;
-    case llvm::MachO::CPUTypeARM:
-      TT.setArch(Triple::ArchType(Triple::arm));
-      break;
-    case llvm::MachO::CPUTypePowerPC:
-      TT.setArch(Triple::ArchType(Triple::ppc));
-      break;
-    case llvm::MachO::CPUTypePowerPC64:
-      TT.setArch(Triple::ArchType(Triple::ppc64));
-      break;
-    }
+    TT.setArch(Triple::ArchType(MachOObj->getArch()));
     TripleName = TT.str();
   }
 
@@ -108,7 +93,7 @@ struct SymbolSorter {
 
 // Print additional information about an address, if available.
 static void DumpAddress(uint64_t Address, ArrayRef<SectionRef> Sections,
-                        MachOObject *MachOObj, raw_ostream &OS) {
+                        const MachOObjectFile *MachOObj, raw_ostream &OS) {
   for (unsigned i = 0; i != Sections.size(); ++i) {
     uint64_t SectAddr = 0, SectSize = 0;
     Sections[i].getAddress(SectAddr);
@@ -199,12 +184,12 @@ static void emitDOTFile(const char *FileName, const MCFunction &f,
   Out << "}\n";
 }
 
-static void getSectionsAndSymbols(const macho::Header &Header,
-                                  MachOObjectFile *MachOObj,
-                             InMemoryStruct<macho::SymtabLoadCommand> *SymtabLC,
-                                  std::vector<SectionRef> &Sections,
-                                  std::vector<SymbolRef> &Symbols,
-                                  SmallVectorImpl<uint64_t> &FoundFns) {
+static void
+getSectionsAndSymbols(const macho::Header Header,
+                      MachOObjectFile *MachOObj,
+                      std::vector<SectionRef> &Sections,
+                      std::vector<SymbolRef> &Symbols,
+                      SmallVectorImpl<uint64_t> &FoundFns) {
   error_code ec;
   for (symbol_iterator SI = MachOObj->begin_symbols(),
        SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec))
@@ -218,20 +203,28 @@ static void getSectionsAndSymbols(const macho::Header &Header,
     Sections.push_back(*SI);
   }
 
-  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
-    const MachOObject::LoadCommandInfo &LCI =
-       MachOObj->getObject()->getLoadCommandInfo(i);
-    if (LCI.Command.Type == macho::LCT_FunctionStarts) {
+  MachOObjectFile::LoadCommandInfo Command =
+    MachOObj->getFirstLoadCommandInfo();
+  for (unsigned i = 0; ; ++i) {
+    if (Command.C.Type == macho::LCT_FunctionStarts) {
       // We found a function starts segment, parse the addresses for later
       // consumption.
-      InMemoryStruct<macho::LinkeditDataLoadCommand> LLC;
-      MachOObj->getObject()->ReadLinkeditDataLoadCommand(LCI, LLC);
+      macho::LinkeditDataLoadCommand LLC =
+        MachOObj->getLinkeditDataLoadCommand(Command);
 
-      MachOObj->getObject()->ReadULEB128s(LLC->DataOffset, FoundFns);
+      MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns);
     }
+
+    if (i == Header.NumLoadCommands - 1)
+      break;
+    else
+      Command = MachOObj->getNextLoadCommandInfo(Command);
   }
 }
 
+static void DisassembleInputMachO2(StringRef Filename,
+                                   MachOObjectFile *MachOOF);
+
 void llvm::DisassembleInputMachO(StringRef Filename) {
   OwningPtr<MemoryBuffer> Buff;
 
@@ -242,9 +235,13 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
 
   OwningPtr<MachOObjectFile> MachOOF(static_cast<MachOObjectFile*>(
         ObjectFile::createMachOObjectFile(Buff.take())));
-  MachOObject *MachOObj = MachOOF->getObject();
 
-  const Target *TheTarget = GetTarget(MachOObj);
+  DisassembleInputMachO2(Filename, MachOOF.get());
+}
+
+static void DisassembleInputMachO2(StringRef Filename,
+                                   MachOObjectFile *MachOOF) {
+  const Target *TheTarget = GetTarget(MachOOF);
   if (!TheTarget) {
     // GetTarget prints out stuff.
     return;
@@ -272,31 +269,13 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
 
   outs() << '\n' << Filename << ":\n\n";
 
-  const macho::Header &Header = MachOObj->getHeader();
-
-  const MachOObject::LoadCommandInfo *SymtabLCI = 0;
-  // First, find the symbol table segment.
-  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
-    const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i);
-    if (LCI.Command.Type == macho::LCT_Symtab) {
-      SymtabLCI = &LCI;
-      break;
-    }
-  }
-
-  // Read and register the symbol table data.
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
-  if (SymtabLCI) {
-    MachOObj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
-    MachOObj->RegisterStringTable(*SymtabLC);
-  }
+  macho::Header Header = MachOOF->getHeader();
 
   std::vector<SectionRef> Sections;
   std::vector<SymbolRef> Symbols;
   SmallVector<uint64_t, 8> FoundFns;
 
-  getSectionsAndSymbols(Header, MachOOF.get(), &SymtabLC, Sections, Symbols,
-                        FoundFns);
+  getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns);
 
   // Make a copy of the unsorted symbol list. FIXME: duplication
   std::vector<SymbolRef> UnsortedSymbols(Symbols);
@@ -310,7 +289,7 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
 #endif
 
   OwningPtr<DIContext> diContext;
-  ObjectFile *DbgObj = MachOOF.get();
+  ObjectFile *DbgObj = MachOOF;
   // Try to find debug info and set up the DIContext for it.
   if (UseDbg) {
     // A separate DSym file path was specified, parse it as a macho file,
@@ -337,10 +316,9 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
         SectName != "__text")
       continue; // Skip non-text sections
 
-    StringRef SegmentName;
     DataRefImpl DR = Sections[SectIdx].getRawDataRefImpl();
-    if (MachOOF->getSectionFinalSegmentName(DR, SegmentName) ||
-        SegmentName != "__TEXT")
+    StringRef SegmentName = MachOOF->getSectionFinalSegmentName(DR);
+    if (SegmentName != "__TEXT")
       continue;
 
     // Insert the functions from the function starts segment into our map.
@@ -365,7 +343,7 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
     for (relocation_iterator RI = Sections[SectIdx].begin_relocations(),
          RE = Sections[SectIdx].end_relocations(); RI != RE; RI.increment(ec)) {
       uint64_t RelocOffset, SectionAddress;
-      RI->getAddress(RelocOffset);
+      RI->getOffset(RelocOffset);
       Sections[SectIdx].getAddress(SectionAddress);
       RelocOffset -= SectionAddress;
 
@@ -600,7 +578,7 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
                 Relocs[j].second.getName(SymName);
 
                 outs() << "\t# " << SymName << ' ';
-                DumpAddress(Addr, Sections, MachOObj, outs());
+                DumpAddress(Addr, Sections, MachOOF, outs());
               }
 
             // If this instructions contains an address, see if we can evaluate
@@ -609,7 +587,7 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
                                                           Inst.Address,
                                                           Inst.Size);
             if (targ != -1ULL)
-              DumpAddress(targ, Sections, MachOObj, outs());
+              DumpAddress(targ, Sections, MachOOF, outs());
 
             // Print debug info.
             if (diContext) {
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 7832cf0..247b90f 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -186,8 +186,8 @@ void llvm::DumpBytes(StringRef bytes) {
 
 bool llvm::RelocAddressLess(RelocationRef a, RelocationRef b) {
   uint64_t a_addr, b_addr;
-  if (error(a.getAddress(a_addr))) return false;
-  if (error(b.getAddress(b_addr))) return false;
+  if (error(a.getOffset(a_addr))) return false;
+  if (error(b.getOffset(b_addr))) return false;
   return a_addr < b_addr;
 }
 
@@ -255,10 +255,10 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
 
     StringRef SegmentName = "";
-    if (const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj)) {
+    if (const MachOObjectFile *MachO =
+        dyn_cast<const MachOObjectFile>(Obj)) {
       DataRefImpl DR = i->getRawDataRefImpl();
-      if (error(MachO->getSectionFinalSegmentName(DR, SegmentName)))
-        break;
+      SegmentName = MachO->getSectionFinalSegmentName(DR);
     }
     StringRef name;
     if (error(i->getName(name))) break;
@@ -378,7 +378,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
           if (error(rel_cur->getHidden(hidden))) goto skip_print_rel;
           if (hidden) goto skip_print_rel;
 
-          if (error(rel_cur->getAddress(addr))) goto skip_print_rel;
+          if (error(rel_cur->getOffset(addr))) goto skip_print_rel;
           // Stop when rel_cur's address is past the current instruction.
           if (addr >= Index + Size) break;
           if (error(rel_cur->getTypeName(name))) goto skip_print_rel;
@@ -417,7 +417,7 @@ static void PrintRelocations(const ObjectFile *o) {
       if (error(ri->getHidden(hidden))) continue;
       if (hidden) continue;
       if (error(ri->getTypeName(relocname))) continue;
-      if (error(ri->getAddress(address))) continue;
+      if (error(ri->getOffset(address))) continue;
       if (error(ri->getValueString(valuestr))) continue;
       outs() << address << " " << relocname << " " << valuestr << "\n";
     }
@@ -460,11 +460,19 @@ static void PrintSectionContents(const ObjectFile *o) {
     StringRef Name;
     StringRef Contents;
     uint64_t BaseAddr;
+    bool BSS;
     if (error(si->getName(Name))) continue;
     if (error(si->getContents(Contents))) continue;
     if (error(si->getAddress(BaseAddr))) continue;
+    if (error(si->isBSS(BSS))) continue;
 
     outs() << "Contents of section " << Name << ":\n";
+    if (BSS) {
+      outs() << format("<skipping contents of bss section at [%04" PRIx64
+                       ", %04" PRIx64 ")>\n", BaseAddr,
+                       BaseAddr + Contents.size());
+      continue;
+    }
 
     // Dump out the content as hex and printable ascii characters.
     for (std::size_t addr = 0, end = Contents.size(); addr < end; addr += 16) {
@@ -592,11 +600,10 @@ static void PrintSymbolTable(const ObjectFile *o) {
       else if (Section == o->end_sections())
         outs() << "*UND*";
       else {
-        if (const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(o)) {
-          StringRef SegmentName;
+        if (const MachOObjectFile *MachO =
+            dyn_cast<const MachOObjectFile>(o)) {
           DataRefImpl DR = Section->getRawDataRefImpl();
-          if (error(MachO->getSectionFinalSegmentName(DR, SegmentName)))
-            SegmentName = "";
+          StringRef SegmentName = MachO->getSectionFinalSegmentName(DR);
           outs() << SegmentName << ",";
         }
         StringRef SectionName;
diff --git a/tools/llvm-ranlib/llvm-ranlib.cpp b/tools/llvm-ranlib/llvm-ranlib.cpp
index fe9d3e2..e3e3bad 100644
--- a/tools/llvm-ranlib/llvm-ranlib.cpp
+++ b/tools/llvm-ranlib/llvm-ranlib.cpp
@@ -78,7 +78,7 @@ int main(int argc, char **argv) {
   }
 
   std::string err_msg;
-  std::auto_ptr<Archive>
+  OwningPtr<Archive>
     AutoArchive(Archive::OpenAndLoad(ArchivePath, Context, &err_msg));
   Archive* TheArchive = AutoArchive.get();
   if (!TheArchive) {
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index be4e76c..94aafa7 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -680,11 +680,18 @@ void COFFDumper::printRelocation(section_iterator SecI,
   if (error(Symbol.getName(SymbolName))) return;
   if (error(SecI->getContents(Contents))) return;
 
-  raw_ostream& OS = W.startLine();
-  OS << W.hex(Offset)
-     << " " << RelocName
-     << " " << (SymbolName.size() > 0 ? SymbolName : "-")
-     << "\n";
+  if (opts::ExpandRelocs) {
+    DictScope Group(W, "Relocation");
+    W.printHex("Offset", Offset);
+    W.printNumber("Type", RelocName, RelocType);
+    W.printString("Symbol", SymbolName.size() > 0 ? SymbolName : "-");
+  } else {
+    raw_ostream& OS = W.startLine();
+    OS << W.hex(Offset)
+       << " " << RelocName
+       << " " << (SymbolName.size() > 0 ? SymbolName : "-")
+       << "\n";
+  }
 }
 
 void COFFDumper::printSymbols() {
@@ -719,9 +726,9 @@ void COFFDumper::printSymbol(symbol_iterator SymI) {
   if (Obj->getSymbolName(Symbol, SymbolName))
     SymbolName = "";
 
-  StringRef SectionName;
-  if (Section && Obj->getSectionName(Section, SectionName))
-    SectionName = "";
+  StringRef SectionName = "";
+  if (Section)
+    Obj->getSectionName(Section, SectionName);
 
   W.printString("Name", SymbolName);
   W.printNumber("Value", Symbol->Value);
@@ -778,7 +785,12 @@ void COFFDumper::printSymbol(symbol_iterator SymI) {
       if (error(getSymbolAuxData(Obj, Symbol + I, Aux)))
         break;
 
-    } else if (Symbol->StorageClass == COFF::IMAGE_SYM_CLASS_STATIC) {
+      DictScope AS(W, "AuxFileRecord");
+      W.printString("FileName", StringRef(Aux->FileName));
+
+    } else if (Symbol->StorageClass == COFF::IMAGE_SYM_CLASS_STATIC ||
+               (Symbol->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
+                Symbol->SectionNumber != COFF::IMAGE_SYM_UNDEFINED)) {
       const coff_aux_section_definition *Aux;
       if (error(getSymbolAuxData(Obj, Symbol + I, Aux)))
         break;
@@ -792,7 +804,7 @@ void COFFDumper::printSymbol(symbol_iterator SymI) {
       W.printEnum("Selection", Aux->Selection, makeArrayRef(ImageCOMDATSelect));
       W.printBinary("Unused", makeArrayRef(Aux->Unused));
 
-      if (Section->Characteristics & COFF::IMAGE_SCN_LNK_COMDAT
+      if (Section && Section->Characteristics & COFF::IMAGE_SCN_LNK_COMDAT
           && Aux->Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
         const coff_section *Assoc;
         StringRef AssocName;
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index 9e111dd..ea1b83f 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -50,16 +50,18 @@ public:
 
   virtual void printDynamicTable() LLVM_OVERRIDE;
   virtual void printNeededLibraries() LLVM_OVERRIDE;
+  virtual void printProgramHeaders() LLVM_OVERRIDE;
 
 private:
-  typedef typename ELFObjectFile<ELFT>::Elf_Shdr Elf_Shdr;
-  typedef typename ELFObjectFile<ELFT>::Elf_Sym Elf_Sym;
+  typedef ELFObjectFile<ELFT> ELFO;
+  typedef typename ELFO::Elf_Shdr Elf_Shdr;
+  typedef typename ELFO::Elf_Sym Elf_Sym;
 
   void printSymbol(symbol_iterator SymI, bool IsDynamic = false);
 
   void printRelocation(section_iterator SecI, relocation_iterator RelI);
 
-  const ELFObjectFile<ELFT> *Obj;
+  const ELFO *Obj;
 };
 
 } // namespace
@@ -399,11 +401,37 @@ static const EnumEntry<unsigned> ElfSectionFlags[] = {
   LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_NOSTRIP    )
 };
 
+static const EnumEntry<unsigned> ElfSegmentTypes[] = {
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_NULL   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_LOAD   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_DYNAMIC),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_INTERP ),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_NOTE   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_SHLIB  ),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_PHDR   ),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_TLS    ),
+
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_GNU_EH_FRAME),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_SUNW_EH_FRAME),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_SUNW_UNWIND),
+
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_GNU_STACK),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_GNU_RELRO),
+
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_ARM_EXIDX),
+  LLVM_READOBJ_ENUM_ENT(ELF, PT_ARM_UNWIND)
+};
+
+static const EnumEntry<unsigned> ElfSegmentFlags[] = {
+  LLVM_READOBJ_ENUM_ENT(ELF, PF_X),
+  LLVM_READOBJ_ENUM_ENT(ELF, PF_W),
+  LLVM_READOBJ_ENUM_ENT(ELF, PF_R)
+};
+
 
 template<class ELFT>
 void ELFDumper<ELFT>::printFileHeaders() {
   error_code EC;
-  typedef ELFObjectFile<ELFT> ELFO;
 
   const typename ELFO::Elf_Ehdr *Header = Obj->getElfHeader();
 
@@ -549,22 +577,36 @@ template<class ELFT>
 void ELFDumper<ELFT>::printRelocation(section_iterator Sec,
                                       relocation_iterator RelI) {
   uint64_t Offset;
+  uint64_t RelocType;
   SmallString<32> RelocName;
   int64_t Info;
   StringRef SymbolName;
   SymbolRef Symbol;
-  if (error(RelI->getOffset(Offset))) return;
+  if (Obj->getElfHeader()->e_type == ELF::ET_REL){
+    if (error(RelI->getOffset(Offset))) return;
+  } else {
+    if (error(RelI->getAddress(Offset))) return;
+  }
+  if (error(RelI->getType(RelocType))) return;
   if (error(RelI->getTypeName(RelocName))) return;
   if (error(RelI->getAdditionalInfo(Info))) return;
   if (error(RelI->getSymbol(Symbol))) return;
   if (error(Symbol.getName(SymbolName))) return;
 
-  raw_ostream& OS = W.startLine();
-  OS << W.hex(Offset)
-     << " " << RelocName
-     << " " << (SymbolName.size() > 0 ? SymbolName : "-")
-     << " " << W.hex(Info)
-     << "\n";
+  if (opts::ExpandRelocs) {
+    DictScope Group(W, "Relocation");
+    W.printHex("Offset", Offset);
+    W.printNumber("Type", RelocName, RelocType);
+    W.printString("Symbol", SymbolName.size() > 0 ? SymbolName : "-");
+    W.printHex("Info", Info);
+  } else {
+    raw_ostream& OS = W.startLine();
+    OS << W.hex(Offset)
+       << " " << RelocName
+       << " " << (SymbolName.size() > 0 ? SymbolName : "-")
+       << " " << W.hex(Info)
+       << "\n";
+  }
 }
 
 template<class ELFT>
@@ -605,9 +647,9 @@ void ELFDumper<ELFT>::printSymbol(symbol_iterator SymI, bool IsDynamic) {
   if (SymI->getName(SymbolName))
     SymbolName = "";
 
-  StringRef SectionName;
-  if (Section && Obj->getSectionName(Section, SectionName))
-    SectionName = "";
+  StringRef SectionName = "";
+  if (Section)
+    Obj->getSectionName(Section, SectionName);
 
   std::string FullSymbolName(SymbolName);
   if (IsDynamic) {
@@ -735,7 +777,6 @@ void ELFDumper<ELFT>::printUnwindInfo() {
 
 template<class ELFT>
 void ELFDumper<ELFT>::printDynamicTable() {
-  typedef ELFObjectFile<ELFT> ELFO;
   typedef typename ELFO::Elf_Dyn_iterator EDI;
   EDI Start = Obj->begin_dynamic_table(),
       End = Obj->end_dynamic_table(true);
@@ -798,3 +839,22 @@ void ELFDumper<ELFT>::printNeededLibraries() {
     outs() << "  " << Path << "\n";
   }
 }
+
+template<class ELFT>
+void ELFDumper<ELFT>::printProgramHeaders() {
+  ListScope L(W, "ProgramHeaders");
+
+  for (typename ELFO::Elf_Phdr_Iter PI = Obj->begin_program_headers(),
+                                    PE = Obj->end_program_headers();
+                                    PI != PE; ++PI) {
+    DictScope P(W, "ProgramHeader");
+    W.printEnum  ("Type", PI->p_type, makeArrayRef(ElfSegmentTypes));
+    W.printHex   ("Offset", PI->p_offset);
+    W.printHex   ("VirtualAddress", PI->p_vaddr);
+    W.printHex   ("PhysicalAddress", PI->p_paddr);
+    W.printNumber("FileSize", PI->p_filesz);
+    W.printNumber("MemSize", PI->p_memsz);
+    W.printFlags ("Flags", PI->p_flags, makeArrayRef(ElfSegmentFlags));
+    W.printNumber("Alignment", PI->p_align);
+  }
+}
diff --git a/tools/llvm-readobj/MachODumper.cpp b/tools/llvm-readobj/MachODumper.cpp
index 798c941..31dc5ce 100644
--- a/tools/llvm-readobj/MachODumper.cpp
+++ b/tools/llvm-readobj/MachODumper.cpp
@@ -27,7 +27,7 @@ namespace {
 
 class MachODumper : public ObjDumper {
 public:
-  MachODumper(const llvm::object::MachOObjectFile *Obj, StreamWriter& Writer)
+  MachODumper(const MachOObjectFile *Obj, StreamWriter& Writer)
     : ObjDumper(Writer)
     , Obj(Obj) { }
 
@@ -43,7 +43,12 @@ private:
 
   void printRelocation(section_iterator SecI, relocation_iterator RelI);
 
-  const llvm::object::MachOObjectFile *Obj;
+  void printRelocation(const MachOObjectFile *Obj,
+                       section_iterator SecI, relocation_iterator RelI);
+
+  void printSections(const MachOObjectFile *Obj);
+
+  const MachOObjectFile *Obj;
 };
 
 } // namespace
@@ -157,97 +162,53 @@ namespace {
   };
 }
 
-static StringRef parseSegmentOrSectionName(ArrayRef<char> P) {
-  if (P[15] == 0)
-    // Null terminated.
-    return StringRef(P.data());
-  // Not null terminated, so this is a 16 char string.
-  return StringRef(P.data(), 16);
-}
-
-static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) {
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  if (LCI.Command.Type == macho::LCT_Segment64)
-    return true;
-  assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected Type.");
-  return false;
-}
-
-static void getSection(const MachOObject *MachOObj,
-                       DataRefImpl DRI,
+static void getSection(const MachOObjectFile *Obj,
+                       DataRefImpl Sec,
                        MachOSection &Section) {
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  if (is64BitLoadCommand(MachOObj, DRI)) {
-    InMemoryStruct<macho::Section64> Sect;
-    MachOObj->ReadSection64(LCI, DRI.d.b, Sect);
-
-    Section.Name        = ArrayRef<char>(Sect->Name);
-    Section.SegmentName = ArrayRef<char>(Sect->SegmentName);
-    Section.Address     = Sect->Address;
-    Section.Size        = Sect->Size;
-    Section.Offset      = Sect->Offset;
-    Section.Alignment   = Sect->Align;
-    Section.RelocationTableOffset = Sect->RelocationTableOffset;
-    Section.NumRelocationTableEntries = Sect->NumRelocationTableEntries;
-    Section.Flags       = Sect->Flags;
-    Section.Reserved1   = Sect->Reserved1;
-    Section.Reserved2   = Sect->Reserved2;
-  } else {
-    InMemoryStruct<macho::Section> Sect;
-    MachOObj->ReadSection(LCI, DRI.d.b, Sect);
-
-    Section.Name        = Sect->Name;
-    Section.SegmentName = Sect->SegmentName;
-    Section.Address     = Sect->Address;
-    Section.Size        = Sect->Size;
-    Section.Offset      = Sect->Offset;
-    Section.Alignment   = Sect->Align;
-    Section.RelocationTableOffset = Sect->RelocationTableOffset;
-    Section.NumRelocationTableEntries = Sect->NumRelocationTableEntries;
-    Section.Flags       = Sect->Flags;
-    Section.Reserved1   = Sect->Reserved1;
-    Section.Reserved2   = Sect->Reserved2;
+  if (!Obj->is64Bit()) {
+    macho::Section Sect = Obj->getSection(Sec);
+    Section.Address     = Sect.Address;
+    Section.Size        = Sect.Size;
+    Section.Offset      = Sect.Offset;
+    Section.Alignment   = Sect.Align;
+    Section.RelocationTableOffset = Sect.RelocationTableOffset;
+    Section.NumRelocationTableEntries = Sect.NumRelocationTableEntries;
+    Section.Flags       = Sect.Flags;
+    Section.Reserved1   = Sect.Reserved1;
+    Section.Reserved2   = Sect.Reserved2;
+    return;
   }
+  macho::Section64 Sect = Obj->getSection64(Sec);
+  Section.Address     = Sect.Address;
+  Section.Size        = Sect.Size;
+  Section.Offset      = Sect.Offset;
+  Section.Alignment   = Sect.Align;
+  Section.RelocationTableOffset = Sect.RelocationTableOffset;
+  Section.NumRelocationTableEntries = Sect.NumRelocationTableEntries;
+  Section.Flags       = Sect.Flags;
+  Section.Reserved1   = Sect.Reserved1;
+  Section.Reserved2   = Sect.Reserved2;
 }
 
-static void getSymbolTableEntry(const MachOObject *MachO,
-                                DataRefImpl DRI,
-                                InMemoryStruct<macho::SymbolTableEntry> &Res) {
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
-  LoadCommandInfo LCI = MachO->getLoadCommandInfo(DRI.d.a);
-  MachO->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
-  MachO->ReadSymbolTableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b, Res);
-}
 
-static void getSymbol64TableEntry(const MachOObject *MachO,
-                                  DataRefImpl DRI,
-                               InMemoryStruct<macho::Symbol64TableEntry> &Res) {
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
-  LoadCommandInfo LCI = MachO->getLoadCommandInfo(DRI.d.a);
-  MachO->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
-  MachO->ReadSymbol64TableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b, Res);
-}
-
-static void getSymbol(const MachOObject *MachOObj,
+static void getSymbol(const MachOObjectFile *Obj,
                       DataRefImpl DRI,
                       MachOSymbol &Symbol) {
-  if (MachOObj->is64Bit()) {
-    InMemoryStruct<macho::Symbol64TableEntry> Entry;
-    getSymbol64TableEntry(MachOObj, DRI, Entry);
-    Symbol.StringIndex  = Entry->StringIndex;
-    Symbol.Type         = Entry->Type;
-    Symbol.SectionIndex = Entry->SectionIndex;
-    Symbol.Flags        = Entry->Flags;
-    Symbol.Value        = Entry->Value;
-  } else {
-    InMemoryStruct<macho::SymbolTableEntry> Entry;
-    getSymbolTableEntry(MachOObj, DRI, Entry);
-    Symbol.StringIndex  = Entry->StringIndex;
-    Symbol.Type         = Entry->Type;
-    Symbol.SectionIndex = Entry->SectionIndex;
-    Symbol.Flags        = Entry->Flags;
-    Symbol.Value        = Entry->Value;
+  if (!Obj->is64Bit()) {
+    macho::SymbolTableEntry Entry = Obj->getSymbolTableEntry(DRI);
+    Symbol.StringIndex  = Entry.StringIndex;
+    Symbol.Type         = Entry.Type;
+    Symbol.SectionIndex = Entry.SectionIndex;
+    Symbol.Flags        = Entry.Flags;
+    Symbol.Value        = Entry.Value;
+    return;
   }
+  macho::Symbol64TableEntry Entry = Obj->getSymbol64TableEntry(DRI);
+  Symbol.StringIndex  = Entry.StringIndex;
+  Symbol.Type         = Entry.Type;
+  Symbol.SectionIndex = Entry.SectionIndex;
+  Symbol.Flags        = Entry.Flags;
+  Symbol.Value        = Entry.Value;
 }
 
 void MachODumper::printFileHeaders() {
@@ -255,6 +216,10 @@ void MachODumper::printFileHeaders() {
 }
 
 void MachODumper::printSections() {
+  return printSections(Obj);
+}
+
+void MachODumper::printSections(const MachOObjectFile *Obj) {
   ListScope Group(W, "Sections");
 
   int SectionIndex = -1;
@@ -266,19 +231,22 @@ void MachODumper::printSections() {
 
     ++SectionIndex;
 
-    const MachOObject *MachO = const_cast<MachOObjectFile*>(Obj)->getObject();
-
     MachOSection Section;
-    getSection(MachO, SecI->getRawDataRefImpl(), Section);
+    getSection(Obj, SecI->getRawDataRefImpl(), Section);
+    DataRefImpl DR = SecI->getRawDataRefImpl();
+
     StringRef Name;
     if (error(SecI->getName(Name)))
         Name = "";
 
+    ArrayRef<char> RawName = Obj->getSectionRawName(DR);
+    StringRef SegmentName = Obj->getSectionFinalSegmentName(DR);
+    ArrayRef<char> RawSegmentName = Obj->getSectionRawFinalSegmentName(DR);
+
     DictScope SectionD(W, "Section");
     W.printNumber("Index", SectionIndex);
-    W.printBinary("Name", Name, Section.Name);
-    W.printBinary("Segment", parseSegmentOrSectionName(Section.SegmentName),
-                    Section.SegmentName);
+    W.printBinary("Name", Name, RawName);
+    W.printBinary("Segment", SegmentName, RawSegmentName);
     W.printHex   ("Address", Section.Address);
     W.printHex   ("Size", Section.Size);
     W.printNumber("Offset", Section.Offset);
@@ -364,23 +332,53 @@ void MachODumper::printRelocations() {
 
 void MachODumper::printRelocation(section_iterator SecI,
                                   relocation_iterator RelI) {
+  return printRelocation(Obj, SecI, RelI);
+}
+
+void MachODumper::printRelocation(const MachOObjectFile *Obj,
+                                  section_iterator SecI,
+                                  relocation_iterator RelI) {
   uint64_t Offset;
   SmallString<32> RelocName;
-  int64_t Info;
   StringRef SymbolName;
   SymbolRef Symbol;
   if (error(RelI->getOffset(Offset))) return;
   if (error(RelI->getTypeName(RelocName))) return;
-  if (error(RelI->getAdditionalInfo(Info))) return;
   if (error(RelI->getSymbol(Symbol))) return;
-  if (error(Symbol.getName(SymbolName))) return;
-
-  raw_ostream& OS = W.startLine();
-  OS << W.hex(Offset)
-     << " " << RelocName
-     << " " << (SymbolName.size() > 0 ? SymbolName : "-")
-     << " " << W.hex(Info)
-     << "\n";
+  if (symbol_iterator(Symbol) != Obj->end_symbols() &&
+      error(Symbol.getName(SymbolName)))
+    return;
+
+  DataRefImpl DR = RelI->getRawDataRefImpl();
+  macho::RelocationEntry RE = Obj->getRelocation(DR);
+  bool IsScattered = Obj->isRelocationScattered(RE);
+
+  if (opts::ExpandRelocs) {
+    DictScope Group(W, "Relocation");
+    W.printHex("Offset", Offset);
+    W.printNumber("PCRel", Obj->getAnyRelocationPCRel(RE));
+    W.printNumber("Length", Obj->getAnyRelocationLength(RE));
+    if (IsScattered)
+      W.printString("Extern", StringRef("N/A"));
+    else
+      W.printNumber("Extern", Obj->getPlainRelocationExternal(RE));
+    W.printNumber("Type", RelocName, Obj->getAnyRelocationType(RE));
+    W.printString("Symbol", SymbolName.size() > 0 ? SymbolName : "-");
+    W.printNumber("Scattered", IsScattered);
+  } else {
+    raw_ostream& OS = W.startLine();
+    OS << W.hex(Offset)
+       << " " << Obj->getAnyRelocationPCRel(RE)
+       << " " << Obj->getAnyRelocationLength(RE);
+    if (IsScattered)
+      OS << " n/a";
+    else
+      OS << " " << Obj->getPlainRelocationExternal(RE);
+    OS << " " << RelocName
+       << " " << IsScattered
+       << " " << (SymbolName.size() > 0 ? SymbolName : "-")
+       << "\n";
+  }
 }
 
 void MachODumper::printSymbols() {
@@ -407,16 +405,14 @@ void MachODumper::printSymbol(symbol_iterator SymI) {
   if (SymI->getName(SymbolName))
     SymbolName = "";
 
-  const MachOObject *MachO = const_cast<MachOObjectFile*>(Obj)->getObject();
-
   MachOSymbol Symbol;
-  getSymbol(MachO, SymI->getRawDataRefImpl(), Symbol);
+  getSymbol(Obj, SymI->getRawDataRefImpl(), Symbol);
 
-  StringRef SectionName;
+  StringRef SectionName = "";
   section_iterator SecI(Obj->end_sections());
-  if (error(SymI->getSection(SecI)) ||
-      error(SecI->getName(SectionName)))
-    SectionName = "";
+  if (!error(SymI->getSection(SecI)) &&
+      SecI != Obj->end_sections())
+      error(SecI->getName(SectionName));
 
   DictScope D(W, "Symbol");
   W.printNumber("Name", SymbolName, Symbol.StringIndex);
diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h
index 8d191cb..6918e28 100644
--- a/tools/llvm-readobj/ObjDumper.h
+++ b/tools/llvm-readobj/ObjDumper.h
@@ -38,6 +38,7 @@ public:
   // Only implemented for ELF at this time.
   virtual void printDynamicTable() { }
   virtual void printNeededLibraries() { }
+  virtual void printProgramHeaders() { }
 
 protected:
   StreamWriter& W;
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index 67c9a98..2e95b6b 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -120,6 +120,14 @@ namespace opts {
   // -needed-libs
   cl::opt<bool> NeededLibraries("needed-libs",
     cl::desc("Display the needed libraries"));
+
+  // -program-headers
+  cl::opt<bool> ProgramHeaders("program-headers",
+    cl::desc("Display ELF program headers"));
+
+  // -expand-relocs
+  cl::opt<bool> ExpandRelocs("expand-relocs",
+    cl::desc("Expand each shown relocation to multiple lines"));
 } // namespace opts
 
 namespace llvm {
@@ -135,8 +143,8 @@ bool error(error_code EC) {
 
 bool relocAddressLess(RelocationRef a, RelocationRef b) {
   uint64_t a_addr, b_addr;
-  if (error(a.getAddress(a_addr))) return false;
-  if (error(b.getAddress(b_addr))) return false;
+  if (error(a.getOffset(a_addr))) return false;
+  if (error(b.getOffset(b_addr))) return false;
   return a_addr < b_addr;
 }
 
@@ -211,6 +219,8 @@ static void dumpObject(const ObjectFile *Obj) {
     Dumper->printDynamicTable();
   if (opts::NeededLibraries)
     Dumper->printNeededLibraries();
+  if (opts::ProgramHeaders)
+    Dumper->printProgramHeaders();
 }
 
 
diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h
index be18268..3f75610 100644
--- a/tools/llvm-readobj/llvm-readobj.h
+++ b/tools/llvm-readobj/llvm-readobj.h
@@ -37,6 +37,7 @@ namespace opts {
   extern llvm::cl::opt<bool> Symbols;
   extern llvm::cl::opt<bool> DynamicSymbols;
   extern llvm::cl::opt<bool> UnwindInfo;
+  extern llvm::cl::opt<bool> ExpandRelocs;
 } // namespace opts
 
 #define LLVM_READOBJ_ENUM_ENT(ns, enum) \
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 4d8d345..ead541a 100644
--- a/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -17,7 +17,7 @@
 #include "llvm/ExecutionEngine/ObjectBuffer.h"
 #include "llvm/ExecutionEngine/ObjectImage.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Memory.h"
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index e7c83f9..57e7a2d 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -69,7 +69,7 @@ const char* LTOCodeGenerator::getVersionString() {
 
 LTOCodeGenerator::LTOCodeGenerator()
   : _context(getGlobalContext()),
-    _linker("LinkTimeOptimizer", "ld-temp.o", _context), _target(NULL),
+    _linker(new Module("ld-temp.o", _context)), _target(NULL),
     _emitDwarfDebugInfo(false), _scopeRestrictionsDone(false),
     _codeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC),
     _nativeObjectFile(NULL) {
@@ -81,6 +81,7 @@ LTOCodeGenerator::LTOCodeGenerator()
 LTOCodeGenerator::~LTOCodeGenerator() {
   delete _target;
   delete _nativeObjectFile;
+  delete _linker.getModule();
 
   for (std::vector<char*>::iterator I = _codegenOptions.begin(),
          E = _codegenOptions.end(); I != E; ++I)
@@ -88,7 +89,7 @@ LTOCodeGenerator::~LTOCodeGenerator() {
 }
 
 bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) {
-  bool ret = _linker.LinkInModule(mod->getLLVVMModule(), &errMsg);
+  bool ret = _linker.linkInModule(mod->getLLVVMModule(), &errMsg);
 
   const std::vector<const char*> &undefs = mod->getAsmUndefinedRefs();
   for (int i = 0, e = undefs.size(); i != e; ++i)
@@ -287,9 +288,7 @@ static void findUsedValues(GlobalVariable *LLVMUsed,
                            SmallPtrSet<GlobalValue*, 8> &UsedValues) {
   if (LLVMUsed == 0) return;
 
-  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
-  if (Inits == 0) return;
-
+  ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
   for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
     if (GlobalValue *GV =
         dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
@@ -326,24 +325,26 @@ void LTOCodeGenerator::applyScopeRestrictions() {
   if (LLVMCompilerUsed)
     LLVMCompilerUsed->eraseFromParent();
 
-  llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(_context);
-  std::vector<Constant*> asmUsed2;
-  for (SmallPtrSet<GlobalValue*, 16>::const_iterator i = asmUsed.begin(),
-         e = asmUsed.end(); i !=e; ++i) {
-    GlobalValue *GV = *i;
-    Constant *c = ConstantExpr::getBitCast(GV, i8PTy);
-    asmUsed2.push_back(c);
+  if (!asmUsed.empty()) {
+    llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(_context);
+    std::vector<Constant*> asmUsed2;
+    for (SmallPtrSet<GlobalValue*, 16>::const_iterator i = asmUsed.begin(),
+           e = asmUsed.end(); i !=e; ++i) {
+      GlobalValue *GV = *i;
+      Constant *c = ConstantExpr::getBitCast(GV, i8PTy);
+      asmUsed2.push_back(c);
+    }
+
+    llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size());
+    LLVMCompilerUsed =
+      new llvm::GlobalVariable(*mergedModule, ATy, false,
+                               llvm::GlobalValue::AppendingLinkage,
+                               llvm::ConstantArray::get(ATy, asmUsed2),
+                               "llvm.compiler.used");
+
+    LLVMCompilerUsed->setSection("llvm.metadata");
   }
 
-  llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size());
-  LLVMCompilerUsed =
-    new llvm::GlobalVariable(*mergedModule, ATy, false,
-                             llvm::GlobalValue::AppendingLinkage,
-                             llvm::ConstantArray::get(ATy, asmUsed2),
-                             "llvm.compiler.used");
-
-  LLVMCompilerUsed->setSection("llvm.metadata");
-
   passes.add(createInternalizePass(mustPreserveList));
 
   // apply scope restrictions
diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h
index 601dbfa..a4ade9f 100644
--- a/tools/lto/LTOCodeGenerator.h
+++ b/tools/lto/LTOCodeGenerator.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Linker.h"
 #include <string>
+#include <vector>
 
 namespace llvm {
   class LLVMContext;
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index ff67769..d805f49 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -743,7 +743,7 @@ namespace {
           AddValueSymbols(Inst.getOperand(i).getExpr());
     }
     virtual void EmitLabel(MCSymbol *Symbol) {
-      Symbol->setSection(*getCurrentSection());
+      Symbol->setSection(*getCurrentSection().first);
       markDefined(*Symbol);
     }
     virtual void EmitDebugLabel(MCSymbol *Symbol) {
@@ -771,7 +771,8 @@ namespace {
     virtual void EmitBundleUnlock() {}
 
     // Noop calls.
-    virtual void ChangeSection(const MCSection *Section) {}
+    virtual void ChangeSection(const MCSection *Section,
+                               const MCExpr *Subsection) {}
     virtual void InitToTextSection() {}
     virtual void InitSections() {}
     virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
diff --git a/tools/macho-dump/macho-dump.cpp b/tools/macho-dump/macho-dump.cpp
index 3bd3ecc..88fd452 100644
--- a/tools/macho-dump/macho-dump.cpp
+++ b/tools/macho-dump/macho-dump.cpp
@@ -11,9 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -66,7 +67,8 @@ static void DumpSegmentCommandData(StringRef Name,
   outs() << "  ('flags', " << Flags << ")\n";
 }
 
-static int DumpSectionData(MachOObject &Obj, unsigned Index, StringRef Name,
+static int DumpSectionData(const MachOObjectFile &Obj, unsigned Index,
+                           StringRef Name,
                            StringRef SegmentName, uint64_t Address,
                            uint64_t Size, uint32_t Offset,
                            uint32_t Align, uint32_t RelocationTableOffset,
@@ -92,26 +94,22 @@ static int DumpSectionData(MachOObject &Obj, unsigned Index, StringRef Name,
   outs() << "   ),\n";
 
   // Dump the relocation entries.
-  int Res = 0;
   outs() << "  ('_relocations', [\n";
-  for (unsigned i = 0; i != NumRelocationTableEntries; ++i) {
-    InMemoryStruct<macho::RelocationEntry> RE;
-    Obj.ReadRelocationEntry(RelocationTableOffset, i, RE);
-    if (!RE) {
-      Res = Error("unable to read relocation table entry '" + Twine(i) + "'");
-      break;
-    }
-    
-    outs() << "    # Relocation " << i << "\n";
-    outs() << "    (('word-0', " << format("0x%x", RE->Word0) << "),\n";
-    outs() << "     ('word-1', " << format("0x%x", RE->Word1) << ")),\n";
+  unsigned RelNum = 0;
+  error_code EC;
+  for (relocation_iterator I = Obj.getSectionRelBegin(Index),
+         E = Obj.getSectionRelEnd(Index); I != E; I.increment(EC), ++RelNum) {
+    macho::RelocationEntry RE = Obj.getRelocation(I->getRawDataRefImpl());
+    outs() << "    # Relocation " << RelNum << "\n";
+    outs() << "    (('word-0', " << format("0x%x", RE.Word0) << "),\n";
+    outs() << "     ('word-1', " << format("0x%x", RE.Word1) << ")),\n";
   }
   outs() << "  ])\n";
 
   // Dump the section data, if requested.
   if (ShowSectionData) {
     outs() << "  ('_section_data', '";
-    StringRef Data = Obj.getData(Offset, Size);
+    StringRef Data = Obj.getData().substr(Offset, Size);
     for (unsigned i = 0; i != Data.size(); ++i) {
       if (i && (i % 4) == 0)
         outs() << ' ';
@@ -121,208 +119,162 @@ static int DumpSectionData(MachOObject &Obj, unsigned Index, StringRef Name,
     outs() << "')\n";
   }
 
-  return Res;
+  return 0;
 }
 
-static int DumpSegmentCommand(MachOObject &Obj,
-                               const MachOObject::LoadCommandInfo &LCI) {
-  InMemoryStruct<macho::SegmentLoadCommand> SLC;
-  Obj.ReadSegmentLoadCommand(LCI, SLC);
-  if (!SLC)
-    return Error("unable to read segment load command");
+static int DumpSegmentCommand(const MachOObjectFile &Obj,
+                              const MachOObjectFile::LoadCommandInfo &LCI) {
+  macho::SegmentLoadCommand SLC = Obj.getSegmentLoadCommand(LCI);
 
-  DumpSegmentCommandData(StringRef(SLC->Name, 16), SLC->VMAddress,
-                         SLC->VMSize, SLC->FileOffset, SLC->FileSize,
-                         SLC->MaxVMProtection, SLC->InitialVMProtection,
-                         SLC->NumSections, SLC->Flags);
+  DumpSegmentCommandData(StringRef(SLC.Name, 16), SLC.VMAddress,
+                         SLC.VMSize, SLC.FileOffset, SLC.FileSize,
+                         SLC.MaxVMProtection, SLC.InitialVMProtection,
+                         SLC.NumSections, SLC.Flags);
 
   // Dump the sections.
-  int Res = 0;
   outs() << "  ('sections', [\n";
-  for (unsigned i = 0; i != SLC->NumSections; ++i) {
-    InMemoryStruct<macho::Section> Sect;
-    Obj.ReadSection(LCI, i, Sect);
-    if (!SLC) {
-      Res = Error("unable to read section '" + Twine(i) + "'");
-      break;
-    }
-
-    if ((Res = DumpSectionData(Obj, i, StringRef(Sect->Name, 16),
-                               StringRef(Sect->SegmentName, 16), Sect->Address,
-                               Sect->Size, Sect->Offset, Sect->Align,
-                               Sect->RelocationTableOffset,
-                               Sect->NumRelocationTableEntries, Sect->Flags,
-                               Sect->Reserved1, Sect->Reserved2)))
-      break;
+  for (unsigned i = 0; i != SLC.NumSections; ++i) {
+    macho::Section Sect = Obj.getSection(LCI, i);
+    DumpSectionData(Obj, i, StringRef(Sect.Name, 16),
+                    StringRef(Sect.SegmentName, 16), Sect.Address,
+                    Sect.Size, Sect.Offset, Sect.Align,
+                    Sect.RelocationTableOffset,
+                    Sect.NumRelocationTableEntries, Sect.Flags,
+                    Sect.Reserved1, Sect.Reserved2);
   }
   outs() << "  ])\n";
 
-  return Res;
+  return 0;
 }
 
-static int DumpSegment64Command(MachOObject &Obj,
-                               const MachOObject::LoadCommandInfo &LCI) {
-  InMemoryStruct<macho::Segment64LoadCommand> SLC;
-  Obj.ReadSegment64LoadCommand(LCI, SLC);
-  if (!SLC)
-    return Error("unable to read segment load command");
-
-  DumpSegmentCommandData(StringRef(SLC->Name, 16), SLC->VMAddress,
-                         SLC->VMSize, SLC->FileOffset, SLC->FileSize,
-                         SLC->MaxVMProtection, SLC->InitialVMProtection,
-                         SLC->NumSections, SLC->Flags);
+static int DumpSegment64Command(const MachOObjectFile &Obj,
+                                const MachOObjectFile::LoadCommandInfo &LCI) {
+  macho::Segment64LoadCommand SLC = Obj.getSegment64LoadCommand(LCI);
+  DumpSegmentCommandData(StringRef(SLC.Name, 16), SLC.VMAddress,
+                          SLC.VMSize, SLC.FileOffset, SLC.FileSize,
+                          SLC.MaxVMProtection, SLC.InitialVMProtection,
+                          SLC.NumSections, SLC.Flags);
 
   // Dump the sections.
-  int Res = 0;
   outs() << "  ('sections', [\n";
-  for (unsigned i = 0; i != SLC->NumSections; ++i) {
-    InMemoryStruct<macho::Section64> Sect;
-    Obj.ReadSection64(LCI, i, Sect);
-    if (!SLC) {
-      Res = Error("unable to read section '" + Twine(i) + "'");
-      break;
-    }
-
-    if ((Res = DumpSectionData(Obj, i, StringRef(Sect->Name, 16),
-                               StringRef(Sect->SegmentName, 16), Sect->Address,
-                               Sect->Size, Sect->Offset, Sect->Align,
-                               Sect->RelocationTableOffset,
-                               Sect->NumRelocationTableEntries, Sect->Flags,
-                               Sect->Reserved1, Sect->Reserved2,
-                               Sect->Reserved3)))
-      break;
+  for (unsigned i = 0; i != SLC.NumSections; ++i) {
+    macho::Section64 Sect = Obj.getSection64(LCI, i);
+
+    DumpSectionData(Obj, i, StringRef(Sect.Name, 16),
+                    StringRef(Sect.SegmentName, 16), Sect.Address,
+                    Sect.Size, Sect.Offset, Sect.Align,
+                    Sect.RelocationTableOffset,
+                    Sect.NumRelocationTableEntries, Sect.Flags,
+                    Sect.Reserved1, Sect.Reserved2,
+                    Sect.Reserved3);
   }
   outs() << "  ])\n";
 
-  return Res;
+  return 0;
 }
 
-static void DumpSymbolTableEntryData(MachOObject &Obj,
+static void DumpSymbolTableEntryData(const MachOObjectFile &Obj,
                                      unsigned Index, uint32_t StringIndex,
                                      uint8_t Type, uint8_t SectionIndex,
-                                     uint16_t Flags, uint64_t Value) {
+                                     uint16_t Flags, uint64_t Value,
+                                     StringRef StringTable) {
+  const char *Name = &StringTable.data()[StringIndex];
   outs() << "    # Symbol " << Index << "\n";
   outs() << "   (('n_strx', " << StringIndex << ")\n";
   outs() << "    ('n_type', " << format("0x%x", Type) << ")\n";
   outs() << "    ('n_sect', " << uint32_t(SectionIndex) << ")\n";
   outs() << "    ('n_desc', " << Flags << ")\n";
   outs() << "    ('n_value', " << Value << ")\n";
-  outs() << "    ('_string', '" << Obj.getStringAtIndex(StringIndex) << "')\n";
+  outs() << "    ('_string', '" << Name << "')\n";
   outs() << "   ),\n";
 }
 
-static int DumpSymtabCommand(MachOObject &Obj,
-                             const MachOObject::LoadCommandInfo &LCI) {
-  InMemoryStruct<macho::SymtabLoadCommand> SLC;
-  Obj.ReadSymtabLoadCommand(LCI, SLC);
-  if (!SLC)
-    return Error("unable to read segment load command");
-
-  outs() << "  ('symoff', " << SLC->SymbolTableOffset << ")\n";
-  outs() << "  ('nsyms', " << SLC->NumSymbolTableEntries << ")\n";
-  outs() << "  ('stroff', " << SLC->StringTableOffset << ")\n";
-  outs() << "  ('strsize', " << SLC->StringTableSize << ")\n";
+static int DumpSymtabCommand(const MachOObjectFile &Obj) {
+  macho::SymtabLoadCommand SLC = Obj.getSymtabLoadCommand();
 
-  // Cache the string table data.
-  Obj.RegisterStringTable(*SLC);
+  outs() << "  ('symoff', " << SLC.SymbolTableOffset << ")\n";
+  outs() << "  ('nsyms', " << SLC.NumSymbolTableEntries << ")\n";
+  outs() << "  ('stroff', " << SLC.StringTableOffset << ")\n";
+  outs() << "  ('strsize', " << SLC.StringTableSize << ")\n";
 
   // Dump the string data.
   outs() << "  ('_string_data', '";
-  outs().write_escaped(Obj.getStringTableData(),
+  StringRef StringTable = Obj.getStringTableData();
+  outs().write_escaped(StringTable,
                        /*UseHexEscapes=*/true) << "')\n";
 
   // Dump the symbol table.
-  int Res = 0;
   outs() << "  ('_symbols', [\n";
-  for (unsigned i = 0; i != SLC->NumSymbolTableEntries; ++i) {
+  error_code EC;
+  unsigned SymNum = 0;
+  for (symbol_iterator I = Obj.begin_symbols(), E = Obj.end_symbols(); I != E;
+       I.increment(EC), ++SymNum) {
+    DataRefImpl DRI = I->getRawDataRefImpl();
     if (Obj.is64Bit()) {
-      InMemoryStruct<macho::Symbol64TableEntry> STE;
-      Obj.ReadSymbol64TableEntry(SLC->SymbolTableOffset, i, STE);
-      if (!STE) {
-        Res = Error("unable to read symbol: '" + Twine(i) + "'");
-        break;
-      }
-
-      DumpSymbolTableEntryData(Obj, i, STE->StringIndex, STE->Type,
-                               STE->SectionIndex, STE->Flags, STE->Value);
+      macho::Symbol64TableEntry STE = Obj.getSymbol64TableEntry(DRI);
+      DumpSymbolTableEntryData(Obj, SymNum, STE.StringIndex, STE.Type,
+                               STE.SectionIndex, STE.Flags, STE.Value,
+                               StringTable);
     } else {
-      InMemoryStruct<macho::SymbolTableEntry> STE;
-      Obj.ReadSymbolTableEntry(SLC->SymbolTableOffset, i, STE);
-      if (!SLC) {
-        Res = Error("unable to read symbol: '" + Twine(i) + "'");
-        break;
-      }
-
-      DumpSymbolTableEntryData(Obj, i, STE->StringIndex, STE->Type,
-                               STE->SectionIndex, STE->Flags, STE->Value);
+      macho::SymbolTableEntry STE = Obj.getSymbolTableEntry(DRI);
+      DumpSymbolTableEntryData(Obj, SymNum, STE.StringIndex, STE.Type,
+                               STE.SectionIndex, STE.Flags, STE.Value,
+                               StringTable);
     }
   }
   outs() << "  ])\n";
 
-  return Res;
+  return 0;
 }
 
-static int DumpDysymtabCommand(MachOObject &Obj,
-                             const MachOObject::LoadCommandInfo &LCI) {
-  InMemoryStruct<macho::DysymtabLoadCommand> DLC;
-  Obj.ReadDysymtabLoadCommand(LCI, DLC);
-  if (!DLC)
-    return Error("unable to read segment load command");
-
-  outs() << "  ('ilocalsym', " << DLC->LocalSymbolsIndex << ")\n";
-  outs() << "  ('nlocalsym', " << DLC->NumLocalSymbols << ")\n";
-  outs() << "  ('iextdefsym', " << DLC->ExternalSymbolsIndex << ")\n";
-  outs() << "  ('nextdefsym', " << DLC->NumExternalSymbols << ")\n";
-  outs() << "  ('iundefsym', " << DLC->UndefinedSymbolsIndex << ")\n";
-  outs() << "  ('nundefsym', " << DLC->NumUndefinedSymbols << ")\n";
-  outs() << "  ('tocoff', " << DLC->TOCOffset << ")\n";
-  outs() << "  ('ntoc', " << DLC->NumTOCEntries << ")\n";
-  outs() << "  ('modtaboff', " << DLC->ModuleTableOffset << ")\n";
-  outs() << "  ('nmodtab', " << DLC->NumModuleTableEntries << ")\n";
-  outs() << "  ('extrefsymoff', " << DLC->ReferenceSymbolTableOffset << ")\n";
+static int DumpDysymtabCommand(const MachOObjectFile &Obj) {
+  macho::DysymtabLoadCommand DLC = Obj.getDysymtabLoadCommand();
+
+  outs() << "  ('ilocalsym', " << DLC.LocalSymbolsIndex << ")\n";
+  outs() << "  ('nlocalsym', " << DLC.NumLocalSymbols << ")\n";
+  outs() << "  ('iextdefsym', " << DLC.ExternalSymbolsIndex << ")\n";
+  outs() << "  ('nextdefsym', " << DLC.NumExternalSymbols << ")\n";
+  outs() << "  ('iundefsym', " << DLC.UndefinedSymbolsIndex << ")\n";
+  outs() << "  ('nundefsym', " << DLC.NumUndefinedSymbols << ")\n";
+  outs() << "  ('tocoff', " << DLC.TOCOffset << ")\n";
+  outs() << "  ('ntoc', " << DLC.NumTOCEntries << ")\n";
+  outs() << "  ('modtaboff', " << DLC.ModuleTableOffset << ")\n";
+  outs() << "  ('nmodtab', " << DLC.NumModuleTableEntries << ")\n";
+  outs() << "  ('extrefsymoff', " << DLC.ReferenceSymbolTableOffset << ")\n";
   outs() << "  ('nextrefsyms', "
-         << DLC->NumReferencedSymbolTableEntries << ")\n";
-  outs() << "  ('indirectsymoff', " << DLC->IndirectSymbolTableOffset << ")\n";
+         << DLC.NumReferencedSymbolTableEntries << ")\n";
+  outs() << "  ('indirectsymoff', " << DLC.IndirectSymbolTableOffset << ")\n";
   outs() << "  ('nindirectsyms', "
-         << DLC->NumIndirectSymbolTableEntries << ")\n";
-  outs() << "  ('extreloff', " << DLC->ExternalRelocationTableOffset << ")\n";
-  outs() << "  ('nextrel', " << DLC->NumExternalRelocationTableEntries << ")\n";
-  outs() << "  ('locreloff', " << DLC->LocalRelocationTableOffset << ")\n";
-  outs() << "  ('nlocrel', " << DLC->NumLocalRelocationTableEntries << ")\n";
+         << DLC.NumIndirectSymbolTableEntries << ")\n";
+  outs() << "  ('extreloff', " << DLC.ExternalRelocationTableOffset << ")\n";
+  outs() << "  ('nextrel', " << DLC.NumExternalRelocationTableEntries << ")\n";
+  outs() << "  ('locreloff', " << DLC.LocalRelocationTableOffset << ")\n";
+  outs() << "  ('nlocrel', " << DLC.NumLocalRelocationTableEntries << ")\n";
 
   // Dump the indirect symbol table.
-  int Res = 0;
   outs() << "  ('_indirect_symbols', [\n";
-  for (unsigned i = 0; i != DLC->NumIndirectSymbolTableEntries; ++i) {
-    InMemoryStruct<macho::IndirectSymbolTableEntry> ISTE;
-    Obj.ReadIndirectSymbolTableEntry(*DLC, i, ISTE);
-    if (!ISTE) {
-      Res = Error("unable to read segment load command");
-      break;
-    }
-
+  for (unsigned i = 0; i != DLC.NumIndirectSymbolTableEntries; ++i) {
+    macho::IndirectSymbolTableEntry ISTE =
+      Obj.getIndirectSymbolTableEntry(DLC, i);
     outs() << "    # Indirect Symbol " << i << "\n";
     outs() << "    (('symbol_index', "
-           << format("0x%x", ISTE->Index) << "),),\n";
+           << format("0x%x", ISTE.Index) << "),),\n";
   }
   outs() << "  ])\n";
 
-  return Res;
+  return 0;
 }
 
-static int DumpLinkeditDataCommand(MachOObject &Obj,
-                                   const MachOObject::LoadCommandInfo &LCI) {
-  InMemoryStruct<macho::LinkeditDataLoadCommand> LLC;
-  Obj.ReadLinkeditDataLoadCommand(LCI, LLC);
-  if (!LLC)
-    return Error("unable to read segment load command");
-
-  outs() << "  ('dataoff', " << LLC->DataOffset << ")\n"
-         << "  ('datasize', " << LLC->DataSize << ")\n"
+static int
+DumpLinkeditDataCommand(const MachOObjectFile &Obj,
+                        const MachOObjectFile::LoadCommandInfo &LCI) {
+  macho::LinkeditDataLoadCommand LLC = Obj.getLinkeditDataLoadCommand(LCI);
+  outs() << "  ('dataoff', " << LLC.DataOffset << ")\n"
+         << "  ('datasize', " << LLC.DataSize << ")\n"
          << "  ('_addresses', [\n";
 
   SmallVector<uint64_t, 8> Addresses;
-  Obj.ReadULEB128s(LLC->DataOffset, Addresses);
+  Obj.ReadULEB128s(LLC.DataOffset, Addresses);
   for (unsigned i = 0, e = Addresses.size(); i != e; ++i)
     outs() << "    # Address " << i << '\n'
            << "    ('address', " << format("0x%x", Addresses[i]) << "),\n";
@@ -332,28 +284,22 @@ static int DumpLinkeditDataCommand(MachOObject &Obj,
   return 0;
 }
 
-static int DumpDataInCodeDataCommand(MachOObject &Obj,
-                                     const MachOObject::LoadCommandInfo &LCI) {
-  InMemoryStruct<macho::LinkeditDataLoadCommand> LLC;
-  Obj.ReadLinkeditDataLoadCommand(LCI, LLC);
-  if (!LLC)
-    return Error("unable to read data-in-code load command");
-
-  outs() << "  ('dataoff', " << LLC->DataOffset << ")\n"
-         << "  ('datasize', " << LLC->DataSize << ")\n"
+static int
+DumpDataInCodeDataCommand(const MachOObjectFile &Obj,
+                          const MachOObjectFile::LoadCommandInfo &LCI) {
+  macho::LinkeditDataLoadCommand LLC = Obj.getLinkeditDataLoadCommand(LCI);
+  outs() << "  ('dataoff', " << LLC.DataOffset << ")\n"
+         << "  ('datasize', " << LLC.DataSize << ")\n"
          << "  ('_data_regions', [\n";
 
-
-  unsigned NumRegions = LLC->DataSize / 8;
+  unsigned NumRegions = LLC.DataSize / 8;
   for (unsigned i = 0; i < NumRegions; ++i) {
-    InMemoryStruct<macho::DataInCodeTableEntry> DICE;
-    Obj.ReadDataInCodeTableEntry(LLC->DataOffset, i, DICE);
-    if (!DICE)
-      return Error("unable to read DataInCodeTableEntry");
+    macho::DataInCodeTableEntry DICE =
+      Obj.getDataInCodeTableEntry(LLC.DataOffset, i);
     outs() << "    # DICE " << i << "\n"
-           << "    ('offset', " << DICE->Offset << ")\n"
-           << "    ('length', " << DICE->Length << ")\n"
-           << "    ('kind', " << DICE->Kind << ")\n";
+           << "    ('offset', " << DICE.Offset << ")\n"
+           << "    ('length', " << DICE.Length << ")\n"
+           << "    ('kind', " << DICE.Kind << ")\n";
   }
 
   outs() <<"  ])\n";
@@ -361,99 +307,111 @@ static int DumpDataInCodeDataCommand(MachOObject &Obj,
   return 0;
 }
 
-static int DumpLinkerOptionsCommand(MachOObject &Obj,
-                                    const MachOObject::LoadCommandInfo &LCI) {
-  InMemoryStruct<macho::LinkerOptionsLoadCommand> LOLC;
-  Obj.ReadLinkerOptionsLoadCommand(LCI, LOLC);
-  if (!LOLC)
-    return Error("unable to read linker options load command");
-
-  outs() << "  ('count', " << LOLC->Count << ")\n"
-         << "  ('_strings', [\n";
-
-  uint64_t DataSize = LOLC->Size - sizeof(macho::LinkerOptionsLoadCommand);
-  StringRef Data = Obj.getData(
-    LCI.Offset + sizeof(macho::LinkerOptionsLoadCommand), DataSize);
-  for (unsigned i = 0; i != LOLC->Count; ++i) {
-    std::pair<StringRef,StringRef> Split = Data.split('\0');
-    outs() << "\t\"";
-    outs().write_escaped(Split.first);
-    outs() << "\",\n";
-    Data = Split.second;
-  }
-  outs() <<"  ])\n";
+static int
+DumpLinkerOptionsCommand(const MachOObjectFile &Obj,
+                         const MachOObjectFile::LoadCommandInfo &LCI) {
+  macho::LinkerOptionsLoadCommand LOLC = Obj.getLinkerOptionsLoadCommand(LCI);
+   outs() << "  ('count', " << LOLC.Count << ")\n"
+          << "  ('_strings', [\n";
+
+   uint64_t DataSize = LOLC.Size - sizeof(macho::LinkerOptionsLoadCommand);
+   const char *P = LCI.Ptr + sizeof(macho::LinkerOptionsLoadCommand);
+   StringRef Data(P, DataSize);
+   for (unsigned i = 0; i != LOLC.Count; ++i) {
+     std::pair<StringRef,StringRef> Split = Data.split('\0');
+     outs() << "\t\"";
+     outs().write_escaped(Split.first);
+     outs() << "\",\n";
+     Data = Split.second;
+   }
+   outs() <<"  ])\n";
 
   return 0;
 }
 
-
-static int DumpLoadCommand(MachOObject &Obj, unsigned Index) {
-  const MachOObject::LoadCommandInfo &LCI = Obj.getLoadCommandInfo(Index);
-  int Res = 0;
-
-  outs() << "  # Load Command " << Index << "\n"
-         << " (('command', " << LCI.Command.Type << ")\n"
-         << "  ('size', " << LCI.Command.Size << ")\n";
-  switch (LCI.Command.Type) {
+static int DumpLoadCommand(const MachOObjectFile &Obj,
+                           MachOObjectFile::LoadCommandInfo &LCI) {
+  switch (LCI.C.Type) {
   case macho::LCT_Segment:
-    Res = DumpSegmentCommand(Obj, LCI);
-    break;
+    return DumpSegmentCommand(Obj, LCI);
   case macho::LCT_Segment64:
-    Res = DumpSegment64Command(Obj, LCI);
-    break;
+    return DumpSegment64Command(Obj, LCI);
   case macho::LCT_Symtab:
-    Res = DumpSymtabCommand(Obj, LCI);
-    break;
+    return DumpSymtabCommand(Obj);
   case macho::LCT_Dysymtab:
-    Res = DumpDysymtabCommand(Obj, LCI);
-    break;
+    return DumpDysymtabCommand(Obj);
   case macho::LCT_CodeSignature:
   case macho::LCT_SegmentSplitInfo:
   case macho::LCT_FunctionStarts:
-    Res = DumpLinkeditDataCommand(Obj, LCI);
-    break;
+    return DumpLinkeditDataCommand(Obj, LCI);
   case macho::LCT_DataInCode:
-    Res = DumpDataInCodeDataCommand(Obj, LCI);
-    break;
+    return DumpDataInCodeDataCommand(Obj, LCI);
   case macho::LCT_LinkerOptions:
-    Res = DumpLinkerOptionsCommand(Obj, LCI);
-    break;
+    return DumpLinkerOptionsCommand(Obj, LCI);
   default:
-    Warning("unknown load command: " + Twine(LCI.Command.Type));
-    break;
+    Warning("unknown load command: " + Twine(LCI.C.Type));
+    return 0;
   }
-  outs() << " ),\n";
+}
 
+
+static int DumpLoadCommand(const MachOObjectFile &Obj, unsigned Index,
+                           MachOObjectFile::LoadCommandInfo &LCI) {
+  outs() << "  # Load Command " << Index << "\n"
+         << " (('command', " << LCI.C.Type << ")\n"
+         << "  ('size', " << LCI.C.Size << ")\n";
+  int Res = DumpLoadCommand(Obj, LCI);
+  outs() << " ),\n";
   return Res;
 }
 
+static void printHeader(const MachOObjectFile *Obj,
+                        const macho::Header &Header) {
+  outs() << "('cputype', " << Header.CPUType << ")\n";
+  outs() << "('cpusubtype', " << Header.CPUSubtype << ")\n";
+  outs() << "('filetype', " << Header.FileType << ")\n";
+  outs() << "('num_load_commands', " << Header.NumLoadCommands << ")\n";
+  outs() << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n";
+  outs() << "('flag', " << Header.Flags << ")\n";
+
+  // Print extended header if 64-bit.
+  if (Obj->is64Bit()) {
+    macho::Header64Ext Header64Ext = Obj->getHeader64Ext();
+    outs() << "('reserved', " << Header64Ext.Reserved << ")\n";
+  }
+}
+
 int main(int argc, char **argv) {
   ProgramName = argv[0];
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
 
   cl::ParseCommandLineOptions(argc, argv, "llvm Mach-O dumping tool\n");
 
-  // Load the input file.
-  std::string ErrorStr;
-  OwningPtr<MemoryBuffer> InputBuffer;
-  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFile, InputBuffer))
-    return Error("unable to read input: '" + ec.message() + "'");
+  OwningPtr<Binary> Binary;
+  if (error_code EC = createBinary(InputFile, Binary))
+    return Error("unable to read input: '" + EC.message() + "'");
 
-  // Construct the Mach-O wrapper object.
-  OwningPtr<MachOObject> InputObject(
-    MachOObject::LoadFromBuffer(InputBuffer.take(), &ErrorStr));
+  const MachOObjectFile *InputObject = dyn_cast<MachOObjectFile>(Binary.get());
   if (!InputObject)
-    return Error("unable to load object: '" + ErrorStr + "'");
+    return Error("Not a MachO object");
 
   // Print the header
-  InputObject->printHeader(outs());
+  macho::Header Header = InputObject->getHeader();
+  printHeader(InputObject, Header);
 
   // Print the load commands.
   int Res = 0;
+  MachOObjectFile::LoadCommandInfo Command =
+    InputObject->getFirstLoadCommandInfo();
   outs() << "('load_commands', [\n";
-  for (unsigned i = 0; i != InputObject->getHeader().NumLoadCommands; ++i)
-    if ((Res = DumpLoadCommand(*InputObject, i)))
+  for (unsigned i = 0; ; ++i) {
+    if (DumpLoadCommand(*InputObject, i, Command))
       break;
+
+    if (i == Header.NumLoadCommands - 1)
+      break;
+    Command = InputObject->getNextLoadCommandInfo(Command);
+  }
   outs() << "])\n";
 
   return Res;
diff --git a/tools/obj2yaml/coff2yaml.cpp b/tools/obj2yaml/coff2yaml.cpp
index f0241d9..5106a4a 100644
--- a/tools/obj2yaml/coff2yaml.cpp
+++ b/tools/obj2yaml/coff2yaml.cpp
@@ -10,6 +10,7 @@
 #include "obj2yaml.h"
 #include "llvm/Object/COFF.h"
 
+using namespace llvm;
 
 template <typename One, typename Two>
 struct pod_pair { // I'd much rather use std::pair, but it's not a POD
@@ -17,8 +18,8 @@ struct pod_pair { // I'd much rather use std::pair, but it's not a POD
   Two second;
 };
 
-#define STRING_PAIR(x)  {llvm::COFF::x, #x}
-static const pod_pair<llvm::COFF::MachineTypes, const char *> 
+#define STRING_PAIR(x)  {COFF::x, #x}
+static const pod_pair<COFF::MachineTypes, const char *>
 MachineTypePairs [] = {
   STRING_PAIR(IMAGE_FILE_MACHINE_UNKNOWN),
   STRING_PAIR(IMAGE_FILE_MACHINE_AM33),
@@ -43,7 +44,7 @@ MachineTypePairs [] = {
   STRING_PAIR(IMAGE_FILE_MACHINE_WCEMIPSV2)
 };
 
-static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> 
+static const pod_pair<COFF::SectionCharacteristics, const char *>
 SectionCharacteristicsPairs1 [] = {
   STRING_PAIR(IMAGE_SCN_TYPE_NO_PAD),
   STRING_PAIR(IMAGE_SCN_CNT_CODE),
@@ -60,7 +61,7 @@ SectionCharacteristicsPairs1 [] = {
   STRING_PAIR(IMAGE_SCN_MEM_PRELOAD)
 };
 
-static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> 
+static const pod_pair<COFF::SectionCharacteristics, const char *>
 SectionCharacteristicsPairsAlignment [] = {
   STRING_PAIR(IMAGE_SCN_ALIGN_1BYTES),
   STRING_PAIR(IMAGE_SCN_ALIGN_2BYTES),
@@ -78,7 +79,7 @@ SectionCharacteristicsPairsAlignment [] = {
   STRING_PAIR(IMAGE_SCN_ALIGN_8192BYTES)
 };
 
-static const pod_pair<llvm::COFF::SectionCharacteristics, const char *> 
+static const pod_pair<COFF::SectionCharacteristics, const char *>
 SectionCharacteristicsPairs2 [] = {
   STRING_PAIR(IMAGE_SCN_LNK_NRELOC_OVFL),
   STRING_PAIR(IMAGE_SCN_MEM_DISCARDABLE),
@@ -89,8 +90,8 @@ SectionCharacteristicsPairs2 [] = {
   STRING_PAIR(IMAGE_SCN_MEM_READ),
   STRING_PAIR(IMAGE_SCN_MEM_WRITE)
 };
-  
-static const pod_pair<llvm::COFF::SymbolBaseType, const char *> 
+
+static const pod_pair<COFF::SymbolBaseType, const char *>
 SymbolBaseTypePairs [] = {
   STRING_PAIR(IMAGE_SYM_TYPE_NULL),
   STRING_PAIR(IMAGE_SYM_TYPE_VOID),
@@ -110,15 +111,15 @@ SymbolBaseTypePairs [] = {
   STRING_PAIR(IMAGE_SYM_TYPE_DWORD)
 };
 
-static const pod_pair<llvm::COFF::SymbolComplexType, const char *> 
+static const pod_pair<COFF::SymbolComplexType, const char *>
 SymbolComplexTypePairs [] = {
   STRING_PAIR(IMAGE_SYM_DTYPE_NULL),
   STRING_PAIR(IMAGE_SYM_DTYPE_POINTER),
   STRING_PAIR(IMAGE_SYM_DTYPE_FUNCTION),
   STRING_PAIR(IMAGE_SYM_DTYPE_ARRAY),
 };
-  
-static const pod_pair<llvm::COFF::SymbolStorageClass, const char *> 
+
+static const pod_pair<COFF::SymbolStorageClass, const char *>
 SymbolStorageClassPairs [] = {
   STRING_PAIR(IMAGE_SYM_CLASS_END_OF_FUNCTION),
   STRING_PAIR(IMAGE_SYM_CLASS_NULL),
@@ -149,7 +150,7 @@ SymbolStorageClassPairs [] = {
   STRING_PAIR(IMAGE_SYM_CLASS_CLR_TOKEN),
 };
 
-static const pod_pair<llvm::COFF::RelocationTypeX86, const char *> 
+static const pod_pair<COFF::RelocationTypeX86, const char *>
 RelocationTypeX86Pairs [] = {
   STRING_PAIR(IMAGE_REL_I386_ABSOLUTE),
   STRING_PAIR(IMAGE_REL_I386_DIR16),
@@ -181,7 +182,7 @@ RelocationTypeX86Pairs [] = {
   STRING_PAIR(IMAGE_REL_AMD64_SSPAN32)
 };
 
-static const pod_pair<llvm::COFF::RelocationTypesARM, const char *> 
+static const pod_pair<COFF::RelocationTypesARM, const char *>
 RelocationTypesARMPairs [] = {
   STRING_PAIR(IMAGE_REL_ARM_ABSOLUTE),
   STRING_PAIR(IMAGE_REL_ARM_ADDR32),
@@ -201,13 +202,8 @@ RelocationTypesARMPairs [] = {
 };
 #undef STRING_PAIR
 
-
-static const char endl = '\n';
-
-namespace yaml {  // COFF-specific yaml-writing specific routines
-
-static llvm::raw_ostream &writeName(llvm::raw_ostream &Out, 
-                             const char *Name, std::size_t NameSize) {
+static raw_ostream &writeName(raw_ostream &Out,
+                              const char *Name, std::size_t NameSize) {
   for (std::size_t i = 0; i < NameSize; ++i) {
     if (!Name[i]) break;
     Out << Name[i];
@@ -217,20 +213,19 @@ static llvm::raw_ostream &writeName(llvm::raw_ostream &Out,
 
 // Given an array of pod_pair<enum, const char *>, write all enums that match
 template <typename T, std::size_t N>
-static llvm::raw_ostream &writeBitMask(llvm::raw_ostream &Out, 
-              const pod_pair<T, const char *> (&Arr)[N], unsigned long Val) {
+static raw_ostream &writeBitMask(raw_ostream &Out,
+                                 const pod_pair<T, const char *> (&Arr)[N],
+                                 unsigned long Val) {
   for (std::size_t i = 0; i < N; ++i)
     if (Val & Arr[i].first)
       Out << Arr[i].second << ", ";
   return Out;
 }
 
-} // end of yaml namespace
-
 // Given an array of pod_pair<enum, const char *>, look up a value
 template <typename T, std::size_t N>
-const char *nameLookup(const pod_pair<T, const char *> (&Arr)[N], 
-                           unsigned long Val, const char *NotFound = NULL) {
+const char *nameLookup(const pod_pair<T, const char *> (&Arr)[N],
+                       unsigned long Val, const char *NotFound = NULL) {
   T n = static_cast<T>(Val);
   for (std::size_t i = 0; i < N; ++i)
     if (n == Arr[i].first)
@@ -238,124 +233,122 @@ const char *nameLookup(const pod_pair<T, const char *> (&Arr)[N],
   return NotFound;
 }
 
-
-static llvm::raw_ostream &yamlCOFFHeader(
-          const llvm::object::coff_file_header *Header,llvm::raw_ostream &Out) {
-
-  Out << "header: !Header" << endl;
+static void yamlCOFFHeader(const object::coff_file_header *Header,
+                           raw_ostream &Out) {
+  Out << "header: !Header\n";
   Out << "  Machine: ";
   Out << nameLookup(MachineTypePairs, Header->Machine, "# Unknown_MachineTypes")
       << " # (";
-  return yaml::writeHexNumber(Out, Header->Machine) << ")" << endl << endl;
+  objyaml::writeHexNumber(Out, Header->Machine) << ")\n\n";
 }
 
 
-static llvm::raw_ostream &yamlCOFFSections(llvm::object::COFFObjectFile &Obj, 
-                            std::size_t NumSections, llvm::raw_ostream &Out) {
-  llvm::error_code ec;
-  Out << "sections:" << endl;
-  for (llvm::object::section_iterator iter = Obj.begin_sections(); 
-                             iter != Obj.end_sections(); iter.increment(ec)) {
-    const llvm::object::coff_section *sect = Obj.getCOFFSection(iter);
-  
-    Out << "  - !Section" << endl;
+static void yamlCOFFSections(object::COFFObjectFile &Obj,
+                             std::size_t NumSections, raw_ostream &Out) {
+  error_code ec;
+  Out << "sections:\n";
+  for (object::section_iterator iter = Obj.begin_sections();
+       iter != Obj.end_sections(); iter.increment(ec)) {
+    const object::coff_section *sect = Obj.getCOFFSection(iter);
+
+    Out << "  - !Section\n";
     Out << "    Name: ";
-    yaml::writeName(Out, sect->Name, sizeof(sect->Name)) << endl;
+    writeName(Out, sect->Name, sizeof(sect->Name)) << '\n';
 
     Out << "    Characteristics: [";
-    yaml::writeBitMask(Out, SectionCharacteristicsPairs1, sect->Characteristics);
-    Out << nameLookup(SectionCharacteristicsPairsAlignment, 
-        sect->Characteristics & 0x00F00000, "# Unrecognized_IMAGE_SCN_ALIGN") 
+    writeBitMask(Out, SectionCharacteristicsPairs1, sect->Characteristics);
+    Out << nameLookup(SectionCharacteristicsPairsAlignment,
+        sect->Characteristics & 0x00F00000, "# Unrecognized_IMAGE_SCN_ALIGN")
         << ", ";
-    yaml::writeBitMask(Out, SectionCharacteristicsPairs2, sect->Characteristics);
+    writeBitMask(Out, SectionCharacteristicsPairs2, sect->Characteristics);
     Out << "] # ";
-    yaml::writeHexNumber(Out, sect->Characteristics) << endl;
+    objyaml::writeHexNumber(Out, sect->Characteristics) << '\n';
 
-    llvm::ArrayRef<uint8_t> sectionData;
-    Obj.getSectionContents(sect, sectionData);    
+    ArrayRef<uint8_t> sectionData;
+    Obj.getSectionContents(sect, sectionData);
     Out << "    SectionData: ";
-    yaml::writeHexStream(Out, sectionData) << endl;
+    objyaml::writeHexStream(Out, sectionData) << '\n';
     if (iter->begin_relocations() != iter->end_relocations())
       Out << "    Relocations:\n";
-    for (llvm::object::relocation_iterator rIter = iter->begin_relocations();
+    for (object::relocation_iterator rIter = iter->begin_relocations();
                        rIter != iter->end_relocations(); rIter.increment(ec)) {
-      const llvm::object::coff_relocation *reloc = Obj.getCOFFRelocation(rIter);
+      const object::coff_relocation *reloc = Obj.getCOFFRelocation(rIter);
 
-        Out << "      - !Relocation" << endl;
+        Out << "      - !Relocation\n";
         Out << "        VirtualAddress: " ;
-        yaml::writeHexNumber(Out, reloc->VirtualAddress) << endl;
-        Out << "        SymbolTableIndex: " << reloc->SymbolTableIndex << endl;
-        Out << "        Type: " 
-            << nameLookup(RelocationTypeX86Pairs, reloc->Type) << endl;
+        objyaml::writeHexNumber(Out, reloc->VirtualAddress) << '\n';
+        Out << "        SymbolTableIndex: " << reloc->SymbolTableIndex << '\n';
+        Out << "        Type: "
+            << nameLookup(RelocationTypeX86Pairs, reloc->Type) << '\n';
     // TODO: Use the correct reloc type for the machine.
-        Out << endl;
+        Out << '\n';
       }
 
-  } 
-  return Out;
+  }
 }
 
-static llvm::raw_ostream& yamlCOFFSymbols(llvm::object::COFFObjectFile &Obj, 
-                              std::size_t NumSymbols, llvm::raw_ostream &Out) {
-  llvm::error_code ec;
-  Out << "symbols:" << endl;
-  for (llvm::object::symbol_iterator iter = Obj.begin_symbols(); 
-                             iter != Obj.end_symbols(); iter.increment(ec)) {
+static void yamlCOFFSymbols(object::COFFObjectFile &Obj, std::size_t NumSymbols,
+                            raw_ostream &Out) {
+  error_code ec;
+  Out << "symbols:\n";
+  for (object::symbol_iterator iter = Obj.begin_symbols();
+       iter != Obj.end_symbols(); iter.increment(ec)) {
  // Gather all the info that we need
-    llvm::StringRef str;
-    const llvm::object::coff_symbol *symbol = Obj.getCOFFSymbol(iter);
+    StringRef str;
+    const object::coff_symbol *symbol = Obj.getCOFFSymbol(iter);
     Obj.getSymbolName(symbol, str);
     std::size_t  simpleType  = symbol->getBaseType();
     std::size_t complexType  = symbol->getComplexType();
     std::size_t storageClass = symbol->StorageClass;
-    
-    Out << "  - !Symbol" << endl;
-    Out << "    Name: " << str << endl; 
-
-    Out << "    Value: "         << symbol->Value << endl;
-    Out << "    SectionNumber: " << symbol->SectionNumber << endl;
-
-    Out << "    SimpleType: " 
-        << nameLookup(SymbolBaseTypePairs, simpleType, 
-            "# Unknown_SymbolBaseType") 
-        << " # (" << simpleType << ")" << endl;
-    
-    Out << "    ComplexType: " 
-        << nameLookup(SymbolComplexTypePairs, complexType, 
-                "# Unknown_SymbolComplexType") 
-        << " # (" << complexType << ")" << endl;
-    
-    Out << "    StorageClass: " 
+
+    Out << "  - !Symbol\n";
+    Out << "    Name: " << str << '\n';
+
+    Out << "    Value: "         << symbol->Value << '\n';
+    Out << "    SectionNumber: " << symbol->SectionNumber << '\n';
+
+    Out << "    SimpleType: "
+        << nameLookup(SymbolBaseTypePairs, simpleType,
+            "# Unknown_SymbolBaseType")
+        << " # (" << simpleType << ")\n";
+
+    Out << "    ComplexType: "
+        << nameLookup(SymbolComplexTypePairs, complexType,
+                "# Unknown_SymbolComplexType")
+        << " # (" << complexType << ")\n";
+
+    Out << "    StorageClass: "
         << nameLookup(SymbolStorageClassPairs, storageClass,
-              "# Unknown_StorageClass") 
-        << " # (" << (int) storageClass << ")" << endl;
+              "# Unknown_StorageClass")
+        << " # (" << (int) storageClass << ")\n";
 
     if (symbol->NumberOfAuxSymbols > 0) {
-      llvm::ArrayRef<uint8_t> aux = Obj.getSymbolAuxData(symbol);
-      Out << "    NumberOfAuxSymbols: " 
-          << (int) symbol->NumberOfAuxSymbols << endl;
+      ArrayRef<uint8_t> aux = Obj.getSymbolAuxData(symbol);
+      Out << "    NumberOfAuxSymbols: "
+          << (int) symbol->NumberOfAuxSymbols << '\n';
       Out << "    AuxillaryData: ";
-      yaml::writeHexStream(Out, aux);
+      objyaml::writeHexStream(Out, aux);
     }
-      
-    Out << endl;
-  }
 
-  return Out;
+    Out << '\n';
+  }
 }
 
 
-llvm::error_code coff2yaml(llvm::raw_ostream &Out, llvm::MemoryBuffer *TheObj) {
-  llvm::error_code ec;
-  llvm::object::COFFObjectFile obj(TheObj, ec);
-  if (!ec) {
-    const llvm::object::coff_file_header *hd;
-    ec = obj.getHeader(hd);
-    if (!ec) {
-      yamlCOFFHeader(hd, Out);
-      yamlCOFFSections(obj, hd->NumberOfSections, Out);
-      yamlCOFFSymbols(obj, hd->NumberOfSymbols, Out);
-    }
-  }
+error_code coff2yaml(raw_ostream &Out, MemoryBuffer *TheObj) {
+  error_code ec;
+  object::COFFObjectFile obj(TheObj, ec);
+  if (ec)
+    return ec;
+
+  const object::coff_file_header *hd;
+  ec = obj.getHeader(hd);
+  if (ec)
+    return ec;
+
+  yamlCOFFHeader(hd, Out);
+  yamlCOFFSections(obj, hd->NumberOfSections, Out);
+  yamlCOFFSymbols(obj, hd->NumberOfSymbols, Out);
+
   return ec;
 }
diff --git a/tools/obj2yaml/obj2yaml.cpp b/tools/obj2yaml/obj2yaml.cpp
index bdc461a..821c9ac 100644
--- a/tools/obj2yaml/obj2yaml.cpp
+++ b/tools/obj2yaml/obj2yaml.cpp
@@ -16,20 +16,19 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
 
-const char endl = '\n';
+using namespace llvm;
 
-namespace yaml {  // generic yaml-writing specific routines
+namespace objyaml {  // generic yaml-writing specific routines
 
 unsigned char printable(unsigned char Ch) {
   return Ch >= ' ' && Ch <= '~' ? Ch : '.';
 }
-  
-llvm::raw_ostream &writeHexStream(llvm::raw_ostream &Out, 
-                                     const llvm::ArrayRef<uint8_t> arr) {
+
+raw_ostream &writeHexStream(raw_ostream &Out, const ArrayRef<uint8_t> arr) {
   const char *hex = "0123456789ABCDEF";
   Out << " !hex \"";
 
-  typedef llvm::ArrayRef<uint8_t>::const_iterator iter_t;
+  typedef ArrayRef<uint8_t>::const_iterator iter_t;
   const iter_t end = arr.end();
   for (iter_t iter = arr.begin(); iter != end; ++iter)
     Out << hex[(*iter >> 4) & 0x0F] << hex[(*iter & 0x0F)];
@@ -37,49 +36,50 @@ llvm::raw_ostream &writeHexStream(llvm::raw_ostream &Out,
   Out << "\" # |";
   for (iter_t iter = arr.begin(); iter != end; ++iter)
     Out << printable(*iter);
-  Out << "|" << endl;
+  Out << "|\n";
 
   return Out;
-  }
+}
 
-llvm::raw_ostream &writeHexNumber(llvm::raw_ostream &Out, unsigned long long N) {
+raw_ostream &writeHexNumber(raw_ostream &Out, unsigned long long N) {
   if (N >= 10)
     Out << "0x";
   Out.write_hex(N);
   return Out;
 }
 
+} // end namespace yaml
+
+namespace {
+enum ObjectFileType {
+  coff
+};
 }
 
+cl::opt<ObjectFileType> InputFormat(
+    cl::desc("Choose input format"),
+    cl::values(clEnumVal(coff, "process COFF object files"), clEnumValEnd));
 
-using namespace llvm;
-enum ObjectFileType { coff };
+cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input file>"),
+                                   cl::init("-"));
 
-cl::opt<ObjectFileType> InputFormat(
-  cl::desc("Choose input format"),
-    cl::values(
-      clEnumVal(coff, "process COFF object files"),
-    clEnumValEnd));
-    
-cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
-
-int main(int argc, char * argv[]) {
+int main(int argc, char *argv[]) {
   cl::ParseCommandLineOptions(argc, argv);
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
-  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
 
-// Process the input file  
+  // Process the input file
   OwningPtr<MemoryBuffer> buf;
 
-// TODO: If this is an archive, then burst it and dump each entry
-  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, buf))
-    llvm::errs() << "Error: '" << ec.message() << "' opening file '" 
-              << InputFilename << "'" << endl;
-  else {
-    ec = coff2yaml(llvm::outs(), buf.take());
+  // TODO: If this is an archive, then burst it and dump each entry
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, buf)) {
+    errs() << "Error: '" << ec.message() << "' opening file '" << InputFilename
+           << "'\n";
+  } else {
+    ec = coff2yaml(outs(), buf.take());
     if (ec)
-      llvm::errs() << "Error: " << ec.message() << " dumping COFF file" << endl;
+      errs() << "Error: " << ec.message() << " dumping COFF file\n";
   }
 
   return 0;
diff --git a/tools/obj2yaml/obj2yaml.h b/tools/obj2yaml/obj2yaml.h
index 0bc376a..7d52a2d 100644
--- a/tools/obj2yaml/obj2yaml.h
+++ b/tools/obj2yaml/obj2yaml.h
@@ -10,15 +10,15 @@
 // source file, implement it.
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_UTILS_OBJ2YAML_H
-#define LLVM_UTILS_OBJ2YAML_H
+#ifndef LLVM_TOOLS_OBJ2YAML_H
+#define LLVM_TOOLS_OBJ2YAML_H
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 
-namespace yaml {  // routines for writing YAML
+namespace objyaml {  // routines for writing YAML
 // Write a hex stream:
 //    <Prefix> !hex: "<hex digits>" #|<ASCII chars>\n
   llvm::raw_ostream &writeHexStream
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index ba82bde..e385d7f5 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -589,7 +589,7 @@ int main(int argc, char **argv) {
   SMDiagnostic Err;
 
   // Load the input module...
-  std::auto_ptr<Module> M;
+  OwningPtr<Module> M;
   M.reset(ParseIRFile(InputFilename, Err, Context));
 
   if (M.get() == 0) {
@@ -656,7 +656,7 @@ int main(int argc, char **argv) {
   TargetMachine *Machine = 0;
   if (ModuleTriple.getArch())
     Machine = GetTargetMachine(Triple(ModuleTriple));
-  std::auto_ptr<TargetMachine> TM(Machine);
+  OwningPtr<TargetMachine> TM(Machine);
 
   // Add internal analysis passes from the target machine.
   if (TM.get())
diff --git a/utils/yaml2obj/CMakeLists.txt b/tools/yaml2obj/CMakeLists.txt
index f8b1197..f8b1197 100644
--- a/utils/yaml2obj/CMakeLists.txt
+++ b/tools/yaml2obj/CMakeLists.txt
diff --git a/utils/yaml2obj/Makefile b/tools/yaml2obj/Makefile
index e746d85..cb6f477 100644
--- a/utils/yaml2obj/Makefile
+++ b/tools/yaml2obj/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../..
 TOOLNAME = yaml2obj
-USEDLIBS = LLVMSupport.a
+LINK_COMPONENTS := support
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/utils/yaml2obj/yaml2obj.cpp b/tools/yaml2obj/yaml2obj.cpp
index 17b65ae..707b6b4 100644
--- a/utils/yaml2obj/yaml2obj.cpp
+++ b/tools/yaml2obj/yaml2obj.cpp
@@ -37,116 +37,38 @@ using namespace llvm;
 static cl::opt<std::string>
   Input(cl::Positional, cl::desc("<input>"), cl::init("-"));
 
-template<class T>
-typename llvm::enable_if_c<std::numeric_limits<T>::is_integer, bool>::type
-getAs(const llvm::yaml::ScalarNode *SN, T &Result) {
-  SmallString<4> Storage;
-  StringRef Value = SN->getValue(Storage);
-  if (Value.getAsInteger(0, Result))
-    return false;
-  return true;
-}
-
-// Given a container with begin and end with ::value_type of a character type.
-// Iterate through pairs of characters in the the set of [a-fA-F0-9] ignoring
-// all other characters.
-struct hex_pair_iterator {
-  StringRef::const_iterator Current, End;
-  typedef SmallVector<char, 2> value_type;
-  value_type Pair;
-  bool IsDone;
-
-  hex_pair_iterator(StringRef C)
-    : Current(C.begin()), End(C.end()), IsDone(false) {
-    // Initalize Pair.
-    ++*this;
-  }
-
-  // End iterator.
-  hex_pair_iterator() : Current(), End(), IsDone(true) {}
-
-  value_type operator *() const {
-    return Pair;
-  }
-
-  hex_pair_iterator operator ++() {
-    // We're at the end of the input.
-    if (Current == End) {
-      IsDone = true;
-      return *this;
-    }
-    Pair = value_type();
-    for (; Current != End && Pair.size() != 2; ++Current) {
-      // Is a valid hex digit.
-      if ((*Current >= '0' && *Current <= '9') ||
-          (*Current >= 'a' && *Current <= 'f') ||
-          (*Current >= 'A' && *Current <= 'F'))
-        Pair.push_back(*Current);
-    }
-    // Hit the end without getting 2 hex digits. Pair is invalid.
-    if (Pair.size() != 2)
-      IsDone = true;
-    return *this;
-  }
-
-  bool operator ==(const hex_pair_iterator Other) {
-    return (IsDone == Other.IsDone) ||
-           (Current == Other.Current && End == Other.End);
-  }
-
-  bool operator !=(const hex_pair_iterator Other) {
-    return !(*this == Other);
-  }
-};
-
-template <class ContainerOut>
-static bool hexStringToByteArray(StringRef Str, ContainerOut &Out) {
-  for (hex_pair_iterator I(Str), E; I != E; ++I) {
-    typename hex_pair_iterator::value_type Pair = *I;
-    typename ContainerOut::value_type Byte;
-    if (StringRef(Pair.data(), 2).getAsInteger(16, Byte))
-      return false;
-    Out.push_back(Byte);
-  }
-  return true;
-}
-
 // The structure of the yaml files is not an exact 1:1 match to COFF. In order
 // to use yaml::IO, we use these structures which are closer to the source.
 namespace COFFYAML {
-  struct Relocation {
-    uint32_t VirtualAddress;
-    uint32_t SymbolTableIndex;
-    COFF::RelocationTypeX86 Type;
-  };
-
   struct Section {
-    std::vector<COFF::SectionCharacteristics> Characteristics;
+    COFF::section Header;
+    unsigned Alignment;
     StringRef SectionData;
-    std::vector<Relocation> Relocations;
+    std::vector<COFF::relocation> Relocations;
     StringRef Name;
-  };
-
-  struct Header {
-    COFF::MachineTypes Machine;
-    std::vector<COFF::Characteristics> Characteristics;
+    Section() {
+      memset(&Header, 0, sizeof(COFF::section));
+    }
   };
 
   struct Symbol {
+    COFF::symbol Header;
     COFF::SymbolBaseType SimpleType;
-    uint8_t NumberOfAuxSymbols;
-    StringRef Name;
-    COFF::SymbolStorageClass StorageClass;
-    StringRef AuxillaryData;
     COFF::SymbolComplexType ComplexType;
-    uint32_t Value;
-    uint16_t SectionNumber;
+    StringRef AuxiliaryData;
+    StringRef Name;
+    Symbol() {
+      memset(&Header, 0, sizeof(COFF::symbol));
+    }
   };
 
   struct Object {
-    Header HeaderData;
+    COFF::header Header;
     std::vector<Section> Sections;
     std::vector<Symbol> Symbols;
+    Object() {
+      memset(&Header, 0, sizeof(COFF::header));
+    }
   };
 }
 
@@ -154,35 +76,20 @@ namespace COFFYAML {
 /// See docs/yaml2obj for the yaml scheema.
 struct COFFParser {
   COFFParser(COFFYAML::Object &Obj) : Obj(Obj) {
-    std::memset(&Header, 0, sizeof(Header));
     // A COFF string table always starts with a 4 byte size field. Offsets into
     // it include this size, so allocate it now.
     StringTable.append(4, 0);
   }
 
-  void parseHeader() {
-    Header.Machine = Obj.HeaderData.Machine;
-
-    const std::vector<COFF::Characteristics> &Characteristics =
-      Obj.HeaderData.Characteristics;
-    for (std::vector<COFF::Characteristics>::const_iterator I =
-           Characteristics.begin(), E = Characteristics.end(); I != E; ++I) {
-      uint16_t Characteristic = *I;
-      Header.Characteristics |= Characteristic;
-    }
-  }
-
   bool parseSections() {
     for (std::vector<COFFYAML::Section>::iterator i = Obj.Sections.begin(),
            e = Obj.Sections.end(); i != e; ++i) {
-      const COFFYAML::Section &YamlSection = *i;
-      Section Sec;
-      std::memset(&Sec.Header, 0, sizeof(Sec.Header));
+      COFFYAML::Section &Sec = *i;
 
       // If the name is less than 8 bytes, store it in place, otherwise
       // store it in the string table.
-      StringRef Name = YamlSection.Name;
-      std::fill_n(Sec.Header.Name, unsigned(COFF::NameSize), 0);
+      StringRef Name = Sec.Name;
+
       if (Name.size() <= COFF::NameSize) {
         std::copy(Name.begin(), Name.end(), Sec.Header.Name);
       } else {
@@ -197,20 +104,7 @@ struct COFFParser {
         std::copy(str.begin(), str.end(), Sec.Header.Name + 1);
       }
 
-      for (std::vector<COFF::SectionCharacteristics>::const_iterator i =
-             YamlSection.Characteristics.begin(),
-             e = YamlSection.Characteristics.end();
-           i != e; ++i) {
-        uint32_t Characteristic = *i;
-        Sec.Header.Characteristics |= Characteristic;
-      }
-
-      StringRef Data = YamlSection.SectionData;
-      if (!hexStringToByteArray(Data, Sec.Data)) {
-        errs() << "SectionData must be a collection of pairs of hex bytes";
-        return false;
-      }
-      Sections.push_back(Sec);
+      Sec.Header.Characteristics |= (Log2_32(Sec.Alignment) + 1) << 20;
     }
     return true;
   }
@@ -218,14 +112,11 @@ struct COFFParser {
   bool parseSymbols() {
     for (std::vector<COFFYAML::Symbol>::iterator i = Obj.Symbols.begin(),
            e = Obj.Symbols.end(); i != e; ++i) {
-      COFFYAML::Symbol YamlSymbol = *i;
-      Symbol Sym;
-      std::memset(&Sym.Header, 0, sizeof(Sym.Header));
+      COFFYAML::Symbol &Sym = *i;
 
       // If the name is less than 8 bytes, store it in place, otherwise
       // store it in the string table.
-      StringRef Name = YamlSymbol.Name;
-      std::fill_n(Sym.Header.Name, unsigned(COFF::NameSize), 0);
+      StringRef Name = Sym.Name;
       if (Name.size() <= COFF::NameSize) {
         std::copy(Name.begin(), Name.end(), Sym.Header.Name);
       } else {
@@ -235,24 +126,13 @@ struct COFFParser {
             Sym.Header.Name + 4) = Index;
       }
 
-      Sym.Header.Value = YamlSymbol.Value;
-      Sym.Header.Type |= YamlSymbol.SimpleType;
-      Sym.Header.Type |= YamlSymbol.ComplexType << COFF::SCT_COMPLEX_TYPE_SHIFT;
-      Sym.Header.StorageClass = YamlSymbol.StorageClass;
-      Sym.Header.SectionNumber = YamlSymbol.SectionNumber;
-
-      StringRef Data = YamlSymbol.AuxillaryData;
-      if (!hexStringToByteArray(Data, Sym.AuxSymbols)) {
-        errs() << "AuxillaryData must be a collection of pairs of hex bytes";
-        return false;
-      }
-      Symbols.push_back(Sym);
+      Sym.Header.Type = Sym.SimpleType;
+      Sym.Header.Type |= Sym.ComplexType << COFF::SCT_COMPLEX_TYPE_SHIFT;
     }
     return true;
   }
 
   bool parse() {
-    parseHeader();
     if (!parseSections())
       return false;
     if (!parseSymbols())
@@ -273,21 +153,7 @@ struct COFFParser {
   }
 
   COFFYAML::Object &Obj;
-  COFF::header Header;
-
-  struct Section {
-    COFF::section Header;
-    std::vector<uint8_t> Data;
-    std::vector<COFF::relocation> Relocations;
-  };
 
-  struct Symbol {
-    COFF::symbol Header;
-    std::vector<uint8_t> AuxSymbols;
-  };
-
-  std::vector<Section> Sections;
-  std::vector<Symbol> Symbols;
   StringMap<unsigned> StringTableMap;
   std::string StringTable;
 };
@@ -300,19 +166,25 @@ static bool layoutCOFF(COFFParser &CP) {
 
   // The section table starts immediately after the header, including the
   // optional header.
-  SectionTableStart = sizeof(COFF::header) + CP.Header.SizeOfOptionalHeader;
-  SectionTableSize = sizeof(COFF::section) * CP.Sections.size();
+  SectionTableStart = sizeof(COFF::header) + CP.Obj.Header.SizeOfOptionalHeader;
+  SectionTableSize = sizeof(COFF::section) * CP.Obj.Sections.size();
 
   uint32_t CurrentSectionDataOffset = SectionTableStart + SectionTableSize;
 
   // Assign each section data address consecutively.
-  for (std::vector<COFFParser::Section>::iterator i = CP.Sections.begin(),
-                                                  e = CP.Sections.end();
-                                                  i != e; ++i) {
-    if (!i->Data.empty()) {
-      i->Header.SizeOfRawData = i->Data.size();
+  for (std::vector<COFFYAML::Section>::iterator i = CP.Obj.Sections.begin(),
+                                                e = CP.Obj.Sections.end();
+                                                i != e; ++i) {
+    if (!i->SectionData.empty()) {
+      i->Header.SizeOfRawData = i->SectionData.size()/2;
       i->Header.PointerToRawData = CurrentSectionDataOffset;
       CurrentSectionDataOffset += i->Header.SizeOfRawData;
+      if (!i->Relocations.empty()) {
+        i->Header.PointerToRelocations = CurrentSectionDataOffset;
+        i->Header.NumberOfRelocations = i->Relocations.size();
+        CurrentSectionDataOffset += i->Header.NumberOfRelocations *
+          COFF::RelocationSize;
+      }
       // TODO: Handle alignment.
     } else {
       i->Header.SizeOfRawData = 0;
@@ -324,21 +196,22 @@ static bool layoutCOFF(COFFParser &CP) {
 
   // Calculate number of symbols.
   uint32_t NumberOfSymbols = 0;
-  for (std::vector<COFFParser::Symbol>::iterator i = CP.Symbols.begin(),
-                                                 e = CP.Symbols.end();
-                                                 i != e; ++i) {
-    if (i->AuxSymbols.size() % COFF::SymbolSize != 0) {
-      errs() << "AuxillaryData size not a multiple of symbol size!\n";
+  for (std::vector<COFFYAML::Symbol>::iterator i = CP.Obj.Symbols.begin(),
+                                               e = CP.Obj.Symbols.end();
+                                               i != e; ++i) {
+    unsigned AuxBytes = i->AuxiliaryData.size() / 2;
+    if (AuxBytes % COFF::SymbolSize != 0) {
+      errs() << "AuxiliaryData size not a multiple of symbol size!\n";
       return false;
     }
-    i->Header.NumberOfAuxSymbols = i->AuxSymbols.size() / COFF::SymbolSize;
+    i->Header.NumberOfAuxSymbols = AuxBytes / COFF::SymbolSize;
     NumberOfSymbols += 1 + i->Header.NumberOfAuxSymbols;
   }
 
   // Store all the allocated start addresses in the header.
-  CP.Header.NumberOfSections = CP.Sections.size();
-  CP.Header.NumberOfSymbols = NumberOfSymbols;
-  CP.Header.PointerToSymbolTable = SymbolTableStart;
+  CP.Obj.Header.NumberOfSections = CP.Obj.Sections.size();
+  CP.Obj.Header.NumberOfSymbols = NumberOfSymbols;
+  CP.Obj.Header.PointerToSymbolTable = SymbolTableStart;
 
   *reinterpret_cast<support::ulittle32_t *>(&CP.StringTable[0])
     = CP.StringTable.size();
@@ -367,19 +240,34 @@ binary_le_impl<value_type> binary_le(value_type V) {
   return binary_le_impl<value_type>(V);
 }
 
-void writeCOFF(COFFParser &CP, raw_ostream &OS) {
-  OS << binary_le(CP.Header.Machine)
-     << binary_le(CP.Header.NumberOfSections)
-     << binary_le(CP.Header.TimeDateStamp)
-     << binary_le(CP.Header.PointerToSymbolTable)
-     << binary_le(CP.Header.NumberOfSymbols)
-     << binary_le(CP.Header.SizeOfOptionalHeader)
-     << binary_le(CP.Header.Characteristics);
+static bool writeHexData(StringRef Data, raw_ostream &OS) {
+  unsigned Size = Data.size();
+  if (Size % 2)
+    return false;
+
+  for (unsigned I = 0; I != Size; I += 2) {
+    uint8_t Byte;
+    if (Data.substr(I,  2).getAsInteger(16, Byte))
+      return false;
+    OS.write(Byte);
+  }
+
+  return true;
+}
+
+bool writeCOFF(COFFParser &CP, raw_ostream &OS) {
+  OS << binary_le(CP.Obj.Header.Machine)
+     << binary_le(CP.Obj.Header.NumberOfSections)
+     << binary_le(CP.Obj.Header.TimeDateStamp)
+     << binary_le(CP.Obj.Header.PointerToSymbolTable)
+     << binary_le(CP.Obj.Header.NumberOfSymbols)
+     << binary_le(CP.Obj.Header.SizeOfOptionalHeader)
+     << binary_le(CP.Obj.Header.Characteristics);
 
   // Output section table.
-  for (std::vector<COFFParser::Section>::const_iterator i = CP.Sections.begin(),
-                                                        e = CP.Sections.end();
-                                                        i != e; ++i) {
+  for (std::vector<COFFYAML::Section>::iterator i = CP.Obj.Sections.begin(),
+                                                e = CP.Obj.Sections.end();
+                                                i != e; ++i) {
     OS.write(i->Header.Name, COFF::NameSize);
     OS << binary_le(i->Header.VirtualSize)
        << binary_le(i->Header.VirtualAddress)
@@ -393,41 +281,119 @@ void writeCOFF(COFFParser &CP, raw_ostream &OS) {
   }
 
   // Output section data.
-  for (std::vector<COFFParser::Section>::const_iterator i = CP.Sections.begin(),
-                                                        e = CP.Sections.end();
-                                                        i != e; ++i) {
-    if (!i->Data.empty())
-      OS.write(reinterpret_cast<const char*>(&i->Data[0]), i->Data.size());
+  for (std::vector<COFFYAML::Section>::iterator i = CP.Obj.Sections.begin(),
+                                                e = CP.Obj.Sections.end();
+                                                i != e; ++i) {
+    if (!i->SectionData.empty()) {
+      if (!writeHexData(i->SectionData, OS)) {
+        errs() << "SectionData must be a collection of pairs of hex bytes";
+        return false;
+      }
+    }
+    for (unsigned I2 = 0, E2 = i->Relocations.size(); I2 != E2; ++I2) {
+      const COFF::relocation &R = i->Relocations[I2];
+      OS << binary_le(R.VirtualAddress)
+         << binary_le(R.SymbolTableIndex)
+         << binary_le(R.Type);
+    }
   }
 
   // Output symbol table.
 
-  for (std::vector<COFFParser::Symbol>::const_iterator i = CP.Symbols.begin(),
-                                                       e = CP.Symbols.end();
-                                                       i != e; ++i) {
+  for (std::vector<COFFYAML::Symbol>::const_iterator i = CP.Obj.Symbols.begin(),
+                                                     e = CP.Obj.Symbols.end();
+                                                     i != e; ++i) {
     OS.write(i->Header.Name, COFF::NameSize);
     OS << binary_le(i->Header.Value)
        << binary_le(i->Header.SectionNumber)
        << binary_le(i->Header.Type)
        << binary_le(i->Header.StorageClass)
        << binary_le(i->Header.NumberOfAuxSymbols);
-    if (!i->AuxSymbols.empty())
-      OS.write( reinterpret_cast<const char*>(&i->AuxSymbols[0])
-              , i->AuxSymbols.size());
+    if (!i->AuxiliaryData.empty()) {
+      if (!writeHexData(i->AuxiliaryData, OS)) {
+        errs() << "AuxiliaryData must be a collection of pairs of hex bytes";
+        return false;
+      }
+    }
   }
 
   // Output string table.
   OS.write(&CP.StringTable[0], CP.StringTable.size());
+  return true;
 }
 
-LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Relocation)
-LLVM_YAML_IS_SEQUENCE_VECTOR(COFF::SectionCharacteristics)
-LLVM_YAML_IS_SEQUENCE_VECTOR(COFF::Characteristics)
+LLVM_YAML_IS_SEQUENCE_VECTOR(COFF::relocation)
 LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Section)
 LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Symbol)
 
 namespace llvm {
+
+namespace COFF {
+  Characteristics operator|(Characteristics a, Characteristics b) {
+    uint32_t Ret = static_cast<uint32_t>(a) | static_cast<uint32_t>(b);
+    return static_cast<Characteristics>(Ret);
+  }
+
+  SectionCharacteristics
+  operator|(SectionCharacteristics a, SectionCharacteristics b) {
+    uint32_t Ret = static_cast<uint32_t>(a) | static_cast<uint32_t>(b);
+    return static_cast<SectionCharacteristics>(Ret);
+  }
+}
+
 namespace yaml {
+
+#define BCase(X) IO.bitSetCase(Value, #X, COFF::X);
+
+template <>
+struct ScalarBitSetTraits<COFF::SectionCharacteristics> {
+  static void bitset(IO &IO, COFF::SectionCharacteristics &Value) {
+    BCase(IMAGE_SCN_TYPE_NO_PAD);
+    BCase(IMAGE_SCN_CNT_CODE);
+    BCase(IMAGE_SCN_CNT_INITIALIZED_DATA);
+    BCase(IMAGE_SCN_CNT_UNINITIALIZED_DATA);
+    BCase(IMAGE_SCN_LNK_OTHER);
+    BCase(IMAGE_SCN_LNK_INFO);
+    BCase(IMAGE_SCN_LNK_REMOVE);
+    BCase(IMAGE_SCN_LNK_COMDAT);
+    BCase(IMAGE_SCN_GPREL);
+    BCase(IMAGE_SCN_MEM_PURGEABLE);
+    BCase(IMAGE_SCN_MEM_16BIT);
+    BCase(IMAGE_SCN_MEM_LOCKED);
+    BCase(IMAGE_SCN_MEM_PRELOAD);
+    BCase(IMAGE_SCN_LNK_NRELOC_OVFL);
+    BCase(IMAGE_SCN_MEM_DISCARDABLE);
+    BCase(IMAGE_SCN_MEM_NOT_CACHED);
+    BCase(IMAGE_SCN_MEM_NOT_PAGED);
+    BCase(IMAGE_SCN_MEM_SHARED);
+    BCase(IMAGE_SCN_MEM_EXECUTE);
+    BCase(IMAGE_SCN_MEM_READ);
+    BCase(IMAGE_SCN_MEM_WRITE);
+  }
+};
+
+template <>
+struct ScalarBitSetTraits<COFF::Characteristics> {
+  static void bitset(IO &IO, COFF::Characteristics &Value) {
+    BCase(IMAGE_FILE_RELOCS_STRIPPED);
+    BCase(IMAGE_FILE_EXECUTABLE_IMAGE);
+    BCase(IMAGE_FILE_LINE_NUMS_STRIPPED);
+    BCase(IMAGE_FILE_LOCAL_SYMS_STRIPPED);
+    BCase(IMAGE_FILE_AGGRESSIVE_WS_TRIM);
+    BCase(IMAGE_FILE_LARGE_ADDRESS_AWARE);
+    BCase(IMAGE_FILE_BYTES_REVERSED_LO);
+    BCase(IMAGE_FILE_32BIT_MACHINE);
+    BCase(IMAGE_FILE_DEBUG_STRIPPED);
+    BCase(IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP);
+    BCase(IMAGE_FILE_NET_RUN_FROM_SWAP);
+    BCase(IMAGE_FILE_SYSTEM);
+    BCase(IMAGE_FILE_DLL);
+    BCase(IMAGE_FILE_UP_SYSTEM_ONLY);
+    BCase(IMAGE_FILE_BYTES_REVERSED_HI);
+  }
+};
+#undef BCase
+
 #define ECase(X) IO.enumCase(Value, #X, COFF::X);
 
 template <>
@@ -440,8 +406,6 @@ struct ScalarEnumerationTraits<COFF::SymbolComplexType> {
   }
 };
 
-// FIXME: We cannot use ScalarBitSetTraits because of
-// IMAGE_SYM_CLASS_END_OF_FUNCTION which is -1.
 template <>
 struct ScalarEnumerationTraits<COFF::SymbolStorageClass> {
   static void enumeration(IO &IO, COFF::SymbolStorageClass &Value) {
@@ -525,68 +489,6 @@ struct ScalarEnumerationTraits<COFF::MachineTypes> {
 };
 
 template <>
-struct ScalarEnumerationTraits<COFF::Characteristics> {
-  static void enumeration(IO &IO, COFF::Characteristics &Value) {
-    ECase(IMAGE_FILE_RELOCS_STRIPPED);
-    ECase(IMAGE_FILE_EXECUTABLE_IMAGE);
-    ECase(IMAGE_FILE_LINE_NUMS_STRIPPED);
-    ECase(IMAGE_FILE_LOCAL_SYMS_STRIPPED);
-    ECase(IMAGE_FILE_AGGRESSIVE_WS_TRIM);
-    ECase(IMAGE_FILE_LARGE_ADDRESS_AWARE);
-    ECase(IMAGE_FILE_BYTES_REVERSED_LO);
-    ECase(IMAGE_FILE_32BIT_MACHINE);
-    ECase(IMAGE_FILE_DEBUG_STRIPPED);
-    ECase(IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP);
-    ECase(IMAGE_FILE_NET_RUN_FROM_SWAP);
-    ECase(IMAGE_FILE_SYSTEM);
-    ECase(IMAGE_FILE_DLL);
-    ECase(IMAGE_FILE_UP_SYSTEM_ONLY);
-    ECase(IMAGE_FILE_BYTES_REVERSED_HI);
-  }
-};
-
-template <>
-struct ScalarEnumerationTraits<COFF::SectionCharacteristics> {
-  static void enumeration(IO &IO, COFF::SectionCharacteristics &Value) {
-    ECase(IMAGE_SCN_TYPE_NO_PAD);
-    ECase(IMAGE_SCN_CNT_CODE);
-    ECase(IMAGE_SCN_CNT_INITIALIZED_DATA);
-    ECase(IMAGE_SCN_CNT_UNINITIALIZED_DATA);
-    ECase(IMAGE_SCN_LNK_OTHER);
-    ECase(IMAGE_SCN_LNK_INFO);
-    ECase(IMAGE_SCN_LNK_REMOVE);
-    ECase(IMAGE_SCN_LNK_COMDAT);
-    ECase(IMAGE_SCN_GPREL);
-    ECase(IMAGE_SCN_MEM_PURGEABLE);
-    ECase(IMAGE_SCN_MEM_16BIT);
-    ECase(IMAGE_SCN_MEM_LOCKED);
-    ECase(IMAGE_SCN_MEM_PRELOAD);
-    ECase(IMAGE_SCN_ALIGN_1BYTES);
-    ECase(IMAGE_SCN_ALIGN_2BYTES);
-    ECase(IMAGE_SCN_ALIGN_4BYTES);
-    ECase(IMAGE_SCN_ALIGN_8BYTES);
-    ECase(IMAGE_SCN_ALIGN_16BYTES);
-    ECase(IMAGE_SCN_ALIGN_32BYTES);
-    ECase(IMAGE_SCN_ALIGN_64BYTES);
-    ECase(IMAGE_SCN_ALIGN_128BYTES);
-    ECase(IMAGE_SCN_ALIGN_256BYTES);
-    ECase(IMAGE_SCN_ALIGN_512BYTES);
-    ECase(IMAGE_SCN_ALIGN_1024BYTES);
-    ECase(IMAGE_SCN_ALIGN_2048BYTES);
-    ECase(IMAGE_SCN_ALIGN_4096BYTES);
-    ECase(IMAGE_SCN_ALIGN_8192BYTES);
-    ECase(IMAGE_SCN_LNK_NRELOC_OVFL);
-    ECase(IMAGE_SCN_MEM_DISCARDABLE);
-    ECase(IMAGE_SCN_MEM_NOT_CACHED);
-    ECase(IMAGE_SCN_MEM_NOT_PAGED);
-    ECase(IMAGE_SCN_MEM_SHARED);
-    ECase(IMAGE_SCN_MEM_EXECUTE);
-    ECase(IMAGE_SCN_MEM_READ);
-    ECase(IMAGE_SCN_MEM_WRITE);
-  }
-};
-
-template <>
 struct ScalarEnumerationTraits<COFF::RelocationTypeX86> {
   static void enumeration(IO &IO, COFF::RelocationTypeX86 &Value) {
     ECase(IMAGE_REL_I386_ABSOLUTE);
@@ -624,30 +526,84 @@ struct ScalarEnumerationTraits<COFF::RelocationTypeX86> {
 
 template <>
 struct MappingTraits<COFFYAML::Symbol> {
+  struct NStorageClass {
+    NStorageClass(IO&) : StorageClass(COFF::SymbolStorageClass(0)) {
+    }
+    NStorageClass(IO&, uint8_t S) : StorageClass(COFF::SymbolStorageClass(S)) {
+    }
+    uint8_t denormalize(IO &) {
+      return StorageClass;
+    }
+
+    COFF::SymbolStorageClass StorageClass;
+  };
+
   static void mapping(IO &IO, COFFYAML::Symbol &S) {
+    MappingNormalization<NStorageClass, uint8_t> NS(IO, S.Header.StorageClass);
+
     IO.mapRequired("SimpleType", S.SimpleType);
-    IO.mapOptional("NumberOfAuxSymbols", S.NumberOfAuxSymbols);
+    IO.mapOptional("NumberOfAuxSymbols", S.Header.NumberOfAuxSymbols);
     IO.mapRequired("Name", S.Name);
-    IO.mapRequired("StorageClass", S.StorageClass);
-    IO.mapOptional("AuxillaryData", S.AuxillaryData); // FIXME: typo
+    IO.mapRequired("StorageClass", NS->StorageClass);
+    IO.mapOptional("AuxiliaryData", S.AuxiliaryData);
     IO.mapRequired("ComplexType", S.ComplexType);
-    IO.mapRequired("Value", S.Value);
-    IO.mapRequired("SectionNumber", S.SectionNumber);
+    IO.mapRequired("Value", S.Header.Value);
+    IO.mapRequired("SectionNumber", S.Header.SectionNumber);
   }
 };
 
 template <>
-struct MappingTraits<COFFYAML::Header> {
-  static void mapping(IO &IO, COFFYAML::Header &H) {
-    IO.mapRequired("Machine", H.Machine);
-    IO.mapOptional("Characteristics", H.Characteristics);
+struct MappingTraits<COFF::header> {
+  struct NMachine {
+    NMachine(IO&) : Machine(COFF::MachineTypes(0)) {
+    }
+    NMachine(IO&, uint16_t M) : Machine(COFF::MachineTypes(M)) {
+    }
+    uint16_t denormalize(IO &) {
+      return Machine;
+    }
+    COFF::MachineTypes Machine;
+  };
+
+  struct NCharacteristics {
+    NCharacteristics(IO&) : Characteristics(COFF::Characteristics(0)) {
+    }
+    NCharacteristics(IO&, uint16_t C) :
+      Characteristics(COFF::Characteristics(C)) {
+    }
+    uint16_t denormalize(IO &) {
+      return Characteristics;
+    }
+
+    COFF::Characteristics Characteristics;
+  };
+
+  static void mapping(IO &IO, COFF::header &H) {
+    MappingNormalization<NMachine, uint16_t> NM(IO, H.Machine);
+    MappingNormalization<NCharacteristics, uint16_t> NC(IO, H.Characteristics);
+
+    IO.mapRequired("Machine", NM->Machine);
+    IO.mapOptional("Characteristics", NC->Characteristics);
   }
 };
 
 template <>
-struct MappingTraits<COFFYAML::Relocation> {
-  static void mapping(IO &IO, COFFYAML::Relocation &Rel) {
-    IO.mapRequired("Type", Rel.Type);
+struct MappingTraits<COFF::relocation> {
+  struct NType {
+    NType(IO &) : Type(COFF::RelocationTypeX86(0)) {
+    }
+    NType(IO &, uint16_t T) : Type(COFF::RelocationTypeX86(T)) {
+    }
+    uint16_t denormalize(IO &) {
+      return Type;
+    }
+    COFF::RelocationTypeX86 Type;
+  };
+
+  static void mapping(IO &IO, COFF::relocation &Rel) {
+    MappingNormalization<NType, uint16_t> NT(IO, Rel.Type);
+
+    IO.mapRequired("Type", NT->Type);
     IO.mapRequired("VirtualAddress", Rel.VirtualAddress);
     IO.mapRequired("SymbolTableIndex", Rel.SymbolTableIndex);
   }
@@ -655,11 +611,26 @@ struct MappingTraits<COFFYAML::Relocation> {
 
 template <>
 struct MappingTraits<COFFYAML::Section> {
+  struct NCharacteristics {
+    NCharacteristics(IO &) : Characteristics(COFF::SectionCharacteristics(0)) {
+    }
+    NCharacteristics(IO &, uint32_t C) :
+      Characteristics(COFF::SectionCharacteristics(C)) {
+    }
+    uint32_t denormalize(IO &) {
+      return Characteristics;
+    }
+    COFF::SectionCharacteristics Characteristics;
+  };
+
   static void mapping(IO &IO, COFFYAML::Section &Sec) {
+    MappingNormalization<NCharacteristics, uint32_t> NC(IO,
+                                                    Sec.Header.Characteristics);
     IO.mapOptional("Relocations", Sec.Relocations);
     IO.mapRequired("SectionData", Sec.SectionData);
-    IO.mapRequired("Characteristics", Sec.Characteristics);
+    IO.mapRequired("Characteristics", NC->Characteristics);
     IO.mapRequired("Name", Sec.Name);
+    IO.mapOptional("Alignment", Sec.Alignment);
   }
 };
 
@@ -667,7 +638,7 @@ template <>
 struct MappingTraits<COFFYAML::Object> {
   static void mapping(IO &IO, COFFYAML::Object &Obj) {
     IO.mapRequired("sections", Obj.Sections);
-    IO.mapRequired("header", Obj.HeaderData);
+    IO.mapRequired("header", Obj.Header);
     IO.mapRequired("symbols", Obj.Symbols);
   }
 };
@@ -702,5 +673,8 @@ int main(int argc, char **argv) {
     errs() << "yaml2obj: Failed to layout COFF file!\n";
     return 1;
   }
-  writeCOFF(CP, outs());
+  if (!writeCOFF(CP, outs())) {
+    errs() << "yaml2obj: Failed to write COFF file!\n";
+    return 1;
+  }
 }
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index a3f8bf3..4b7e418 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -13,3 +13,4 @@ add_subdirectory(Option)
 add_subdirectory(Support)
 add_subdirectory(Transforms)
 add_subdirectory(IR)
+add_subdirectory(DebugInfo)
diff --git a/unittests/DebugInfo/CMakeLists.txt b/unittests/DebugInfo/CMakeLists.txt
new file mode 100644
index 0000000..ec580b7
--- /dev/null
+++ b/unittests/DebugInfo/CMakeLists.txt
@@ -0,0 +1,13 @@
+set(LLVM_LINK_COMPONENTS
+  debuginfo
+  object
+  support
+  )
+
+set(DebugInfoSources
+  DWARFFormValueTest.cpp
+  )
+
+add_llvm_unittest(DebugInfoTests
+  ${DebugInfoSources}
+  )
diff --git a/unittests/DebugInfo/DWARFFormValueTest.cpp b/unittests/DebugInfo/DWARFFormValueTest.cpp
new file mode 100644
index 0000000..04b859b
--- /dev/null
+++ b/unittests/DebugInfo/DWARFFormValueTest.cpp
@@ -0,0 +1,31 @@
+//===- llvm/unittest/DebugInfo/DWARFFormValueTest.cpp ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARFFormValue.h"
+#include "llvm/Support/Dwarf.h"
+#include "gtest/gtest.h"
+using namespace llvm;
+using namespace dwarf;
+
+namespace {
+
+TEST(DWARFFormValue, FixedFormSizes) {
+  // Size of DW_FORM_addr and DW_FORM_ref_addr are equal in DWARF2,
+  // DW_FORM_ref_addr is always 4 bytes in DWARF32 starting from DWARF3.
+  const uint8_t *sizes = DWARFFormValue::getFixedFormSizes(4, 2);
+  EXPECT_EQ(sizes[DW_FORM_addr], sizes[DW_FORM_ref_addr]);
+  sizes = DWARFFormValue::getFixedFormSizes(8, 2);
+  EXPECT_EQ(sizes[DW_FORM_addr], sizes[DW_FORM_ref_addr]);
+  sizes = DWARFFormValue::getFixedFormSizes(8, 3);
+  EXPECT_EQ(4, sizes[DW_FORM_ref_addr]);
+  // Check that we don't have fixed form sizes for weird address sizes.
+  EXPECT_EQ(0, DWARFFormValue::getFixedFormSizes(16, 2));
+}
+
+} // end anonymous namespace
diff --git a/unittests/DebugInfo/Makefile b/unittests/DebugInfo/Makefile
new file mode 100644
index 0000000..999ded9
--- /dev/null
+++ b/unittests/DebugInfo/Makefile
@@ -0,0 +1,16 @@
+##===- unittests/DebugInfo/Makefile ------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TESTNAME = DebugInfo
+LINK_COMPONENTS := debuginfo object support
+
+include $(LEVEL)/Makefile.config
+
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
index 6ba8bc4..87f4824 100644
--- a/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
@@ -74,6 +74,8 @@ class JITEventListenerTest : public testing::Test {
   const OwningPtr<ExecutionEngine> EE;
 };
 
+// Tests on SystemZ disabled as we're running the old JIT
+#if !defined(__s390__)
 Function *buildFunction(Module *M) {
   Function *Result = Function::Create(
       TypeBuilder<int32_t(int32_t), false>::get(getGlobalContext()),
@@ -224,6 +226,7 @@ TEST_F(JITEventListenerTest, MatchesMachineCodeInfo) {
   EXPECT_EQ(1U, Listener.FreedEvents[0].Index);
   EXPECT_EQ(F_addr, Listener.FreedEvents[0].Code);
 }
+#endif
 
 class JITEnvironment : public testing::Environment {
   virtual void SetUp() {
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 30dadc9..e6f4cb9 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -35,6 +35,9 @@ using namespace llvm;
 
 namespace {
 
+// Tests on ARM, PowerPC and SystemZ disabled as we're running the old jit
+#if !defined(__arm__) && !defined(__powerpc__) && !defined(__s390__)
+
 Function *makeReturnGlobal(std::string Name, GlobalVariable *G, Module *M) {
   std::vector<Type*> params;
   FunctionType *FTy = FunctionType::get(G->getType()->getElementType(),
@@ -231,9 +234,6 @@ class JITTest : public testing::Test {
   OwningPtr<ExecutionEngine> TheJIT;
 };
 
-// Tests on ARM and PowerPC disabled as we're running the old jit
-#if !defined(__arm__) && !defined(__powerpc__)
-
 // Regression test for a bug.  The JIT used to allocate globals inside the same
 // memory block used for the function, and when the function code was freed,
 // the global was left in the same place.  This test allocates a function
@@ -302,8 +302,6 @@ TEST(JIT, GlobalInFunction) {
   EXPECT_EQ(3, *GPtr);
 }
 
-#endif // !defined(__arm__) && !defined(__powerpc__)
-
 // Regression test for a bug.  The JITEmitter wasn't checking to verify that
 // it hadn't run out of space while generating the DWARF exception information
 // for an emitted function.
@@ -348,8 +346,6 @@ int PlusOne(int arg) {
   return arg + 1;
 }
 
-// ARM and PowerPC tests disabled pending fix for PR10783.
-#if !defined(__arm__) && !defined(__powerpc__)
 TEST_F(JITTest, FarCallToKnownFunction) {
   // x86-64 can only make direct calls to functions within 32 bits of
   // the current PC.  To call anything farther away, we have to load
@@ -527,7 +523,6 @@ TEST_F(JITTest, ModuleDeletion) {
   EXPECT_EQ(RJMM->startExceptionTableCalls.size(),
             NumTablesDeallocated);
 }
-#endif // !defined(__arm__) && !defined(__powerpc__)
 
 // ARM, MIPS and PPC still emit stubs for calls since the target may be
 // too far away to call directly.  This #if can probably be removed when
@@ -573,9 +568,6 @@ TEST_F(JITTest, NoStubs) {
 }
 #endif  // !ARM && !PPC
 
-// Tests on ARM and PowerPC disabled as we're running the old jit
-#if !defined(__arm__) && !defined(__powerpc__)
-
 TEST_F(JITTest, FunctionPointersOutliveTheirCreator) {
   TheJIT->DisableLazyCompilation(true);
   LoadAssembly("define i8()* @get_foo_addr() { "
@@ -610,13 +602,9 @@ TEST_F(JITTest, FunctionPointersOutliveTheirCreator) {
 #endif
 }
 
-#endif //!defined(__arm__) && !defined(__powerpc__)
-
-// Tests on ARM and PowerPC disabled as we're running the old jit
-// In addition, ARM does not have an implementation
-// of replaceMachineCodeForFunction(), so recompileAndRelinkFunction
-// doesn't work.
-#if !defined(__arm__) && !defined(__powerpc__)
+// ARM does not have an implementation of replaceMachineCodeForFunction(),
+// so recompileAndRelinkFunction doesn't work.
+#if !defined(__arm__)
 TEST_F(JITTest, FunctionIsRecompiledAndRelinked) {
   Function *F = Function::Create(TypeBuilder<int(void), false>::get(Context),
                                  GlobalValue::ExternalLinkage, "test", M);
@@ -647,7 +635,7 @@ TEST_F(JITTest, FunctionIsRecompiledAndRelinked) {
   EXPECT_EQ(2, OrigFPtr())
     << "The old pointer's target should now jump to the new version";
 }
-#endif  // !defined(__arm__) && !defined(__powerpc__)
+#endif  // !defined(__arm__)
 
 }  // anonymous namespace
 // This variable is intentionally defined differently in the statically-compiled
@@ -657,9 +645,6 @@ extern "C" int32_t JITTest_AvailableExternallyGlobal;
 int32_t JITTest_AvailableExternallyGlobal LLVM_ATTRIBUTE_USED = 42;
 namespace {
 
-// Tests on ARM and PowerPC disabled as we're running the old jit
-#if !defined(__arm__) && !defined(__powerpc__)
-
 TEST_F(JITTest, AvailableExternallyGlobalIsntEmitted) {
   TheJIT->DisableLazyCompilation(true);
   LoadAssembly("@JITTest_AvailableExternallyGlobal = "
@@ -676,7 +661,6 @@ TEST_F(JITTest, AvailableExternallyGlobalIsntEmitted) {
   EXPECT_EQ(42, loader()) << "func should return 42 from the external global,"
                           << " not 7 from the IR version.";
 }
-#endif //!defined(__arm__) && !defined(__powerpc__)
 }  // anonymous namespace
 // This function is intentionally defined differently in the statically-compiled
 // program from the IR input to the JIT to assert that the JIT doesn't use its
@@ -687,8 +671,6 @@ extern "C" int32_t JITTest_AvailableExternallyFunction() {
 }
 namespace {
 
-// ARM and PowerPC tests disabled pending fix for PR10783.
-#if !defined(__arm__) && !defined(__powerpc__)
 TEST_F(JITTest, AvailableExternallyFunctionIsntCompiled) {
   TheJIT->DisableLazyCompilation(true);
   LoadAssembly("define available_externally i32 "
@@ -844,7 +826,7 @@ TEST(LazyLoadedJITTest, EagerCompiledRecursionThroughGhost) {
     (intptr_t)TheJIT->getPointerToFunction(recur1IR));
   EXPECT_EQ(3, recur1(4));
 }
-#endif // !defined(__arm__) && !defined(__powerpc__)
+#endif // !defined(__arm__) && !defined(__powerpc__) && !defined(__s390__)
 
 // This code is copied from JITEventListenerTest, but it only runs once for all
 // the tests in this directory.  Everything seems fine, but that's strange
diff --git a/unittests/ExecutionEngine/JIT/MultiJITTest.cpp b/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
index 5301467..4018cd5 100644
--- a/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
@@ -20,6 +20,9 @@ using namespace llvm;
 
 namespace {
 
+// ARM, PowerPC and SystemZ tests disabled pending fix for PR10783.
+#if !defined(__arm__) && !defined(__powerpc__) && !defined(__s390__)
+
 bool LoadAssemblyInto(Module *M, const char *assembly) {
   SMDiagnostic Error;
   bool success =
@@ -65,9 +68,6 @@ void createModule2(LLVMContext &Context2, Module *&M2, Function *&FooF2) {
   FooF2 = M2->getFunction("foo2");
 }
 
-// ARM and PowerPC tests disabled pending fix for PR10783.
-#if !defined(__arm__) && !defined(__powerpc__)
-
 TEST(MultiJitTest, EagerMode) {
   LLVMContext Context1;
   Module *M1 = 0;
@@ -176,6 +176,6 @@ TEST(MultiJitTest, JitPool) {
 #endif
   EXPECT_TRUE(sa == fa);
 }
-#endif  // !defined(__arm__) && !defined(__powerpc__)
+#endif  // !defined(__arm__) && !defined(__powerpc__) && !defined(__s390__)
 
 }  // anonymous namespace
diff --git a/unittests/ExecutionEngine/MCJIT/CMakeLists.txt b/unittests/ExecutionEngine/MCJIT/CMakeLists.txt
index c6b1f77..922cb7e 100644
--- a/unittests/ExecutionEngine/MCJIT/CMakeLists.txt
+++ b/unittests/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -9,7 +9,9 @@ set(LLVM_LINK_COMPONENTS
 
 set(MCJITTestsSources
   MCJITTest.cpp
+  MCJITCAPITest.cpp
   MCJITMemoryManagerTest.cpp
+  MCJITObjectCacheTest.cpp
   )
 
 if(MSVC)
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
new file mode 100644
index 0000000..07ea1af
--- /dev/null
+++ b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
@@ -0,0 +1,97 @@
+//===- MCJITTest.cpp - Unit tests for the MCJIT ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This test suite verifies basic MCJIT functionality when invoked form the C
+// API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Analysis.h"
+#include "llvm-c/Core.h"
+#include "llvm-c/ExecutionEngine.h"
+#include "llvm-c/Target.h"
+#include "llvm-c/Transforms/Scalar.h"
+#include "llvm/Support/Host.h"
+#include "MCJITTestAPICommon.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+class MCJITCAPITest : public testing::Test, public MCJITTestAPICommon {
+protected:
+  MCJITCAPITest() {
+    // The architectures below are known to be compatible with MCJIT as they
+    // are copied from test/ExecutionEngine/MCJIT/lit.local.cfg and should be
+    // kept in sync.
+    SupportedArchs.push_back(Triple::arm);
+    SupportedArchs.push_back(Triple::mips);
+    SupportedArchs.push_back(Triple::x86);
+    SupportedArchs.push_back(Triple::x86_64);
+
+    // The operating systems below are known to be sufficiently incompatible
+    // that they will fail the MCJIT C API tests.
+    UnsupportedOSs.push_back(Triple::Cygwin);
+  }
+};
+
+TEST_F(MCJITCAPITest, simple_function) {
+  SKIP_UNSUPPORTED_PLATFORM;
+  
+  char *error = 0;
+  
+  // Creates a function that returns 42, compiles it, and runs it.
+  
+  LLVMModuleRef module = LLVMModuleCreateWithName("simple_module");
+
+  LLVMSetTarget(module, HostTriple.c_str());
+  
+  LLVMValueRef function = LLVMAddFunction(
+    module, "simple_function", LLVMFunctionType(LLVMInt32Type(), 0, 0, 0));
+  LLVMSetFunctionCallConv(function, LLVMCCallConv);
+  
+  LLVMBasicBlockRef entry = LLVMAppendBasicBlock(function, "entry");
+  LLVMBuilderRef builder = LLVMCreateBuilder();
+  LLVMPositionBuilderAtEnd(builder, entry);
+  LLVMBuildRet(builder, LLVMConstInt(LLVMInt32Type(), 42, 0));
+  
+  LLVMVerifyModule(module, LLVMAbortProcessAction, &error);
+  LLVMDisposeMessage(error);
+  
+  LLVMDisposeBuilder(builder);
+  
+  LLVMMCJITCompilerOptions options;
+  LLVMInitializeMCJITCompilerOptions(&options, sizeof(options));
+  options.OptLevel = 2;
+  
+  // Just ensure that this field still exists.
+  options.NoFramePointerElim = false;
+  
+  LLVMExecutionEngineRef engine;
+  ASSERT_EQ(
+    0, LLVMCreateMCJITCompilerForModule(&engine, module, &options,
+                                        sizeof(options), &error));
+  
+  LLVMPassManagerRef pass = LLVMCreatePassManager();
+  LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+  LLVMAddConstantPropagationPass(pass);
+  LLVMAddInstructionCombiningPass(pass);
+  LLVMRunPassManager(pass, module);
+  LLVMDisposePassManager(pass);
+  
+  union {
+    void *raw;
+    int (*usable)();
+  } functionPointer;
+  functionPointer.raw = LLVMGetPointerToGlobal(engine, function);
+  
+  EXPECT_EQ(42, functionPointer.usable());
+  
+  LLVMDisposeExecutionEngine(engine);
+}
+
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITObjectCacheTest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITObjectCacheTest.cpp
new file mode 100644
index 0000000..0061e30
--- /dev/null
+++ b/unittests/ExecutionEngine/MCJIT/MCJITObjectCacheTest.cpp
@@ -0,0 +1,240 @@
+//===- MCJITObjectCacheTest.cpp - Unit tests for MCJIT object caching -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/ExecutionEngine/ObjectCache.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "MCJITTestBase.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+class TestObjectCache : public ObjectCache {
+public:
+  TestObjectCache() : DuplicateInserted(false) { }
+
+  virtual ~TestObjectCache() {
+    // Free any buffers we've allocated.
+    SmallVector<MemoryBuffer *, 2>::iterator it, end;
+    end = AllocatedBuffers.end();
+    for (it = AllocatedBuffers.begin(); it != end; ++it) {
+      delete *it;
+    }
+    AllocatedBuffers.clear();
+  }
+
+  virtual void notifyObjectCompiled(const Module *M, const MemoryBuffer *Obj) {
+    // If we've seen this module before, note that.
+    const std::string ModuleID = M->getModuleIdentifier();
+    if (ObjMap.find(ModuleID) != ObjMap.end())
+      DuplicateInserted = true;
+    // Store a copy of the buffer in our map.
+    ObjMap[ModuleID] = copyBuffer(Obj);
+  }
+
+  // Test-harness-specific functions
+  bool wereDuplicatesInserted() { return DuplicateInserted; }
+
+  bool wasModuleLookedUp(const Module *M) {
+    return ModulesLookedUp.find(M->getModuleIdentifier())
+                                      != ModulesLookedUp.end();
+  }
+
+  const MemoryBuffer* getObjectInternal(const Module* M) {
+    // Look for the module in our map.
+    const std::string ModuleID = M->getModuleIdentifier();
+    StringMap<const MemoryBuffer *>::iterator it = ObjMap.find(ModuleID);
+    if (it == ObjMap.end())
+      return 0;
+    return it->second;
+  }
+
+protected:
+  virtual const MemoryBuffer* getObject(const Module* M) {
+    const MemoryBuffer* BufferFound = getObjectInternal(M);
+    ModulesLookedUp.insert(M->getModuleIdentifier());
+    return BufferFound;
+  }
+
+private:
+  MemoryBuffer *copyBuffer(const MemoryBuffer *Buf) {
+    // Create a local copy of the buffer.
+    MemoryBuffer *NewBuffer = MemoryBuffer::getMemBufferCopy(Buf->getBuffer());
+    AllocatedBuffers.push_back(NewBuffer);
+    return NewBuffer;
+  }
+
+  StringMap<const MemoryBuffer *> ObjMap;
+  StringSet<>                     ModulesLookedUp;
+  SmallVector<MemoryBuffer *, 2>  AllocatedBuffers;
+  bool                            DuplicateInserted;
+};
+
+class MCJITObjectCacheTest : public testing::Test, public MCJITTestBase {
+protected:
+
+  enum {
+    OriginalRC = 6,
+    ReplacementRC = 7
+  };
+
+  virtual void SetUp() {
+    M.reset(createEmptyModule("<main>"));
+    Main = insertMainFunction(M.get(), OriginalRC);
+  }
+
+  void compileAndRun(int ExpectedRC = OriginalRC) {
+    // This function shouldn't be called until after SetUp.
+    ASSERT_TRUE(0 != TheJIT);
+    ASSERT_TRUE(0 != Main);
+
+    TheJIT->finalizeObject();
+    void *vPtr = TheJIT->getPointerToFunction(Main);
+
+    static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
+
+    EXPECT_TRUE(0 != vPtr)
+      << "Unable to get pointer to main() from JIT";
+
+    int (*FuncPtr)(void) = (int(*)(void))(intptr_t)vPtr;
+    int returnCode = FuncPtr();
+    EXPECT_EQ(returnCode, ExpectedRC);
+  }
+
+  Function *Main;
+};
+
+TEST_F(MCJITObjectCacheTest, SetNullObjectCache) {
+  SKIP_UNSUPPORTED_PLATFORM;
+
+  createJIT(M.take());
+
+  TheJIT->setObjectCache(NULL);
+
+  compileAndRun();
+}
+
+
+TEST_F(MCJITObjectCacheTest, VerifyBasicObjectCaching) {
+  SKIP_UNSUPPORTED_PLATFORM;
+
+  OwningPtr<TestObjectCache>  Cache(new TestObjectCache);
+
+  // Save a copy of the module pointer before handing it off to MCJIT.
+  const Module * SavedModulePointer = M.get();
+
+  createJIT(M.take());
+
+  TheJIT->setObjectCache(Cache.get());
+
+  // Verify that our object cache does not contain the module yet.
+  const MemoryBuffer *ObjBuffer = Cache->getObjectInternal(SavedModulePointer);
+  EXPECT_EQ(0, ObjBuffer);
+
+  compileAndRun();
+
+  // Verify that MCJIT tried to look-up this module in the cache.
+  EXPECT_TRUE(Cache->wasModuleLookedUp(SavedModulePointer));
+
+  // Verify that our object cache now contains the module.
+  ObjBuffer = Cache->getObjectInternal(SavedModulePointer);
+  EXPECT_TRUE(0 != ObjBuffer);
+
+  // Verify that the cache was only notified once.
+  EXPECT_FALSE(Cache->wereDuplicatesInserted());
+}
+
+TEST_F(MCJITObjectCacheTest, VerifyLoadFromCache) {
+  SKIP_UNSUPPORTED_PLATFORM;
+
+  OwningPtr<TestObjectCache>  Cache(new TestObjectCache);
+
+  // Compile this module with an MCJIT engine
+  createJIT(M.take());
+  TheJIT->setObjectCache(Cache.get());
+  TheJIT->finalizeObject();
+
+  // Destroy the MCJIT engine we just used
+  TheJIT.reset();
+
+  // Create a new memory manager.
+  MM = new SectionMemoryManager;
+
+  // Create a new module and save it. Use a different return code so we can
+  // tell if MCJIT compiled this module or used the cache.
+  M.reset(createEmptyModule("<main>"));
+  Main = insertMainFunction(M.get(), ReplacementRC);
+  const Module * SecondModulePointer = M.get();
+
+  // Create a new MCJIT instance to load this module then execute it.
+  createJIT(M.take());
+  TheJIT->setObjectCache(Cache.get());
+  compileAndRun();
+
+  // Verify that MCJIT tried to look-up this module in the cache.
+  EXPECT_TRUE(Cache->wasModuleLookedUp(SecondModulePointer));
+
+  // Verify that MCJIT didn't try to cache this again.
+  EXPECT_FALSE(Cache->wereDuplicatesInserted());
+}
+
+TEST_F(MCJITObjectCacheTest, VerifyNonLoadFromCache) {
+  SKIP_UNSUPPORTED_PLATFORM;
+
+  OwningPtr<TestObjectCache>  Cache(new TestObjectCache);
+
+  // Compile this module with an MCJIT engine
+  createJIT(M.take());
+  TheJIT->setObjectCache(Cache.get());
+  TheJIT->finalizeObject();
+
+  // Destroy the MCJIT engine we just used
+  TheJIT.reset();
+
+  // Create a new memory manager.
+  MM = new SectionMemoryManager;
+
+  // Create a new module and save it. Use a different return code so we can
+  // tell if MCJIT compiled this module or used the cache. Note that we use
+  // a new module name here so the module shouldn't be found in the cache.
+  M.reset(createEmptyModule("<not-main>"));
+  Main = insertMainFunction(M.get(), ReplacementRC);
+  const Module * SecondModulePointer = M.get();
+
+  // Create a new MCJIT instance to load this module then execute it.
+  createJIT(M.take());
+  TheJIT->setObjectCache(Cache.get());
+
+  // Verify that our object cache does not contain the module yet.
+  const MemoryBuffer *ObjBuffer = Cache->getObjectInternal(SecondModulePointer);
+  EXPECT_EQ(0, ObjBuffer);
+
+  // Run the function and look for the replacement return code.
+  compileAndRun(ReplacementRC);
+
+  // Verify that MCJIT tried to look-up this module in the cache.
+  EXPECT_TRUE(Cache->wasModuleLookedUp(SecondModulePointer));
+
+  // Verify that our object cache now contains the module.
+  ObjBuffer = Cache->getObjectInternal(SecondModulePointer);
+  EXPECT_TRUE(0 != ObjBuffer);
+
+  // Verify that MCJIT didn't try to cache this again.
+  EXPECT_FALSE(Cache->wereDuplicatesInserted());
+}
+
+} // Namespace
+
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTestAPICommon.h b/unittests/ExecutionEngine/MCJIT/MCJITTestAPICommon.h
new file mode 100644
index 0000000..8160a18
--- /dev/null
+++ b/unittests/ExecutionEngine/MCJIT/MCJITTestAPICommon.h
@@ -0,0 +1,77 @@
+//===- MCJITTestBase.h - Common base class for MCJIT Unit tests  ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements functionality shared by both MCJIT C API tests, and
+// the C++ API tests.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MCJIT_TEST_API_COMMON_H
+#define MCJIT_TEST_API_COMMON_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/TargetSelect.h"
+
+// Used to skip tests on unsupported architectures and operating systems.
+// To skip a test, add this macro at the top of a test-case in a suite that
+// inherits from MCJITTestBase. See MCJITTest.cpp for examples.
+#define SKIP_UNSUPPORTED_PLATFORM \
+  do \
+    if (!ArchSupportsMCJIT() || !OSSupportsMCJIT()) \
+      return; \
+  while(0)
+
+namespace llvm {
+
+class MCJITTestAPICommon {
+protected:
+  MCJITTestAPICommon()
+    : HostTriple(sys::getProcessTriple())
+  {
+    InitializeNativeTarget();
+    InitializeNativeTargetAsmPrinter();
+
+#ifdef LLVM_ON_WIN32
+    // On Windows, generate ELF objects by specifying "-elf" in triple
+    HostTriple += "-elf";
+#endif // LLVM_ON_WIN32
+    HostTriple = Triple::normalize(HostTriple);
+  }
+
+  /// Returns true if the host architecture is known to support MCJIT
+  bool ArchSupportsMCJIT() {
+    Triple Host(HostTriple);
+    if (std::find(SupportedArchs.begin(), SupportedArchs.end(), Host.getArch())
+        == SupportedArchs.end()) {
+      return false;
+    }
+    return true;
+  }
+
+  /// Returns true if the host OS is known to support MCJIT
+  bool OSSupportsMCJIT() {
+    Triple Host(HostTriple);
+    if (std::find(UnsupportedOSs.begin(), UnsupportedOSs.end(), Host.getOS())
+        == UnsupportedOSs.end()) {
+      return true;
+    }
+    return false;
+  }
+
+  std::string HostTriple;
+  SmallVector<Triple::ArchType, 4> SupportedArchs;
+  SmallVector<Triple::OSType, 4> UnsupportedOSs;
+};
+
+} // namespace llvm
+
+#endif // MCJIT_TEST_API_COMMON_H
+
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h b/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
index fc774ab..b0e98a8 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
+++ b/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
@@ -17,8 +17,6 @@
 #ifndef MCJIT_TEST_BASE_H
 #define MCJIT_TEST_BASE_H
 
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/Config/config.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
@@ -28,21 +26,11 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/TypeBuilder.h"
 #include "llvm/Support/CodeGen.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetSelect.h"
-
-// Used to skip tests on unsupported architectures and operating systems.
-// To skip a test, add this macro at the top of a test-case in a suite that
-// inherits from MCJITTestBase. See MCJITTest.cpp for examples.
-#define SKIP_UNSUPPORTED_PLATFORM \
-  do \
-    if (!ArchSupportsMCJIT() || !OSSupportsMCJIT()) \
-      return; \
-  while(0);
+#include "MCJITTestAPICommon.h"
 
 namespace llvm {
 
-class MCJITTestBase {
+class MCJITTestBase : public MCJITTestAPICommon {
 protected:
 
   MCJITTestBase()
@@ -52,17 +40,7 @@ protected:
     , MArch("")
     , Builder(Context)
     , MM(new SectionMemoryManager)
-    , HostTriple(sys::getProcessTriple())
   {
-    InitializeNativeTarget();
-    InitializeNativeTargetAsmPrinter();
-
-#ifdef LLVM_ON_WIN32
-    // On Windows, generate ELF objects by specifying "-elf" in triple
-    HostTriple += "-elf";
-#endif // LLVM_ON_WIN32
-    HostTriple = Triple::normalize(HostTriple);
-
     // The architectures below are known to be compatible with MCJIT as they
     // are copied from test/ExecutionEngine/MCJIT/lit.local.cfg and should be
     // kept in sync.
@@ -78,26 +56,6 @@ protected:
     UnsupportedOSs.push_back(Triple::Darwin);
   }
 
-  /// Returns true if the host architecture is known to support MCJIT
-  bool ArchSupportsMCJIT() {
-    Triple Host(HostTriple);
-    if (std::find(SupportedArchs.begin(), SupportedArchs.end(), Host.getArch())
-        == SupportedArchs.end()) {
-      return false;
-    }
-    return true;
-  }
-
-  /// Returns true if the host OS is known to support MCJIT
-  bool OSSupportsMCJIT() {
-    Triple Host(HostTriple);
-    if (std::find(UnsupportedOSs.begin(), UnsupportedOSs.end(), Host.getOS())
-        == UnsupportedOSs.end()) {
-      return true;
-    }
-    return false;
-  }
-
   Module *createEmptyModule(StringRef Name) {
     Module * M = new Module(Name, Context);
     M->setTargetTriple(Triple::normalize(HostTriple));
@@ -232,10 +190,6 @@ protected:
   IRBuilder<> Builder;
   JITMemoryManager *MM;
 
-  std::string HostTriple;
-  SmallVector<Triple::ArchType, 4> SupportedArchs;
-  SmallVector<Triple::OSType, 4> UnsupportedOSs;
-
   OwningPtr<Module> M;
 };
 
diff --git a/unittests/ExecutionEngine/Makefile b/unittests/ExecutionEngine/Makefile
index c779a6a..38e667f 100644
--- a/unittests/ExecutionEngine/Makefile
+++ b/unittests/ExecutionEngine/Makefile
@@ -11,9 +11,10 @@ LEVEL = ../..
 TESTNAME = ExecutionEngine
 LINK_COMPONENTS :=interpreter
 
+include $(LEVEL)/Makefile.config
+
 ifeq ($(TARGET_HAS_JIT),1)
-	PARALLEL_DIRS = JIT MCJIT
+  PARALLEL_DIRS = JIT MCJIT
 endif
 
-include $(LEVEL)/Makefile.config
 include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/IR/CMakeLists.txt b/unittests/IR/CMakeLists.txt
index aed4597..c53043e 100644
--- a/unittests/IR/CMakeLists.txt
+++ b/unittests/IR/CMakeLists.txt
@@ -13,9 +13,11 @@ set(IRSources
   MDBuilderTest.cpp
   MetadataTest.cpp
   PassManagerTest.cpp
+  PatternMatch.cpp
   TypeBuilderTest.cpp
   TypesTest.cpp
   ValueMapTest.cpp
+  ValueTest.cpp
   VerifierTest.cpp
   WaymarkTest.cpp
   )
diff --git a/unittests/IR/PatternMatch.cpp b/unittests/IR/PatternMatch.cpp
new file mode 100644
index 0000000..7c6d8ce
--- /dev/null
+++ b/unittests/IR/PatternMatch.cpp
@@ -0,0 +1,265 @@
+//===---- llvm/unittest/IR/PatternMatch.cpp - PatternMatch unit tests ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/NoFolder.h"
+#include "llvm/Support/PatternMatch.h"
+#include "gtest/gtest.h"
+
+using namespace llvm::PatternMatch;
+
+namespace llvm {
+namespace {
+
+/// Ordered floating point minimum/maximum tests.
+
+static void m_OrdFMin_expect_match_and_delete(Value *Cmp, Value *Select,
+                                              Value *L, Value *R) {
+  Value *MatchL, *MatchR;
+  EXPECT_TRUE(m_OrdFMin(m_Value(MatchL), m_Value(MatchR)).match(Select));
+  EXPECT_EQ(L, MatchL);
+  EXPECT_EQ(R, MatchR);
+  delete Select;
+  delete Cmp;
+}
+
+static void m_OrdFMin_expect_nomatch_and_delete(Value *Cmp, Value *Select,
+                                                Value *L, Value *R) {
+  Value *MatchL, *MatchR;
+  EXPECT_FALSE(m_OrdFMin(m_Value(MatchL), m_Value(MatchR)).match(Select));
+  delete Select;
+  delete Cmp;
+}
+
+static void m_OrdFMax_expect_match_and_delete(Value *Cmp, Value *Select,
+                                              Value *L, Value *R) {
+  Value *MatchL, *MatchR;
+  EXPECT_TRUE(m_OrdFMax(m_Value(MatchL), m_Value(MatchR)).match(Select));
+  EXPECT_EQ(L, MatchL);
+  EXPECT_EQ(R, MatchR);
+  delete Select;
+  delete Cmp;
+}
+
+static void m_OrdFMax_expect_nomatch_and_delete(Value *Cmp, Value *Select,
+                                                Value *L, Value *R) {
+  Value *MatchL, *MatchR;
+  EXPECT_FALSE(m_OrdFMax(m_Value(MatchL), m_Value(MatchR)).match(Select));
+  delete Select;
+  delete Cmp;
+}
+
+
+
+TEST(PatternMatchTest, FloatingPointOrderedMin) {
+  LLVMContext &C(getGlobalContext());
+  IRBuilder<true, NoFolder> Builder(C);
+
+  Type *FltTy = Builder.getFloatTy();
+  Value *L = ConstantFP::get(FltTy, 1.0);
+  Value *R = ConstantFP::get(FltTy, 2.0);
+
+  // Test OLT.
+  Value *Cmp = Builder.CreateFCmpOLT(L, R);
+  Value *Select = Builder.CreateSelect(Cmp, L, R);
+  m_OrdFMin_expect_match_and_delete(Cmp, Select, L, R);
+
+  // Test OLE.
+  Cmp = Builder.CreateFCmpOLE(L, R);
+  Select = Builder.CreateSelect(Cmp, L, R);
+  m_OrdFMin_expect_match_and_delete(Cmp, Select, L, R);
+
+  // Test no match on OGE.
+  Cmp = Builder.CreateFCmpOGE(L, R);
+  Select = Builder.CreateSelect(Cmp, L, R);
+  m_OrdFMin_expect_nomatch_and_delete(Cmp, Select, L, R);
+
+  // Test no match on OGT.
+  Cmp = Builder.CreateFCmpOGT(L, R);
+  Select = Builder.CreateSelect(Cmp, L, R);
+  m_OrdFMin_expect_nomatch_and_delete(Cmp, Select, L, R);
+
+  // Test match on OGE with inverted select.
+  Cmp = Builder.CreateFCmpOGE(L, R);
+  Select = Builder.CreateSelect(Cmp, R, L);
+  m_OrdFMin_expect_match_and_delete(Cmp, Select, L, R);
+
+  // Test match on OGT with inverted select.
+  Cmp = Builder.CreateFCmpOGT(L, R);
+  Select = Builder.CreateSelect(Cmp, R, L);
+  m_OrdFMin_expect_match_and_delete(Cmp, Select, L, R);
+}
+
+TEST(PatternMatchTest, FloatingPointOrderedMax) {
+  LLVMContext &C(getGlobalContext());
+  IRBuilder<true, NoFolder> Builder(C);
+
+  Type *FltTy = Builder.getFloatTy();
+  Value *L = ConstantFP::get(FltTy, 1.0);
+  Value *R = ConstantFP::get(FltTy, 2.0);
+
+  // Test OGT.
+  Value *Cmp = Builder.CreateFCmpOGT(L, R);
+  Value *Select = Builder.CreateSelect(Cmp, L, R);
+  m_OrdFMax_expect_match_and_delete(Cmp, Select, L, R);
+
+  // Test OGE.
+  Cmp = Builder.CreateFCmpOGE(L, R);
+  Select = Builder.CreateSelect(Cmp, L, R);
+  m_OrdFMax_expect_match_and_delete(Cmp, Select, L, R);
+
+  // Test no match on OLE.
+  Cmp = Builder.CreateFCmpOLE(L, R);
+  Select = Builder.CreateSelect(Cmp, L, R);
+  m_OrdFMax_expect_nomatch_and_delete(Cmp, Select, L, R);
+
+  // Test no match on OLT.
+  Cmp = Builder.CreateFCmpOLT(L, R);
+  Select = Builder.CreateSelect(Cmp, L, R);
+  m_OrdFMax_expect_nomatch_and_delete(Cmp, Select, L, R);
+
+  // Test match on OLE with inverted select.
+  Cmp = Builder.CreateFCmpOLE(L, R);
+  Select = Builder.CreateSelect(Cmp, R, L);
+  m_OrdFMax_expect_match_and_delete(Cmp, Select, L, R);
+
+  // Test match on OLT with inverted select.
+  Cmp = Builder.CreateFCmpOLT(L, R);
+  Select = Builder.CreateSelect(Cmp, R, L);
+  m_OrdFMax_expect_match_and_delete(Cmp, Select, L, R);
+}
+
+/// Unordered floating point minimum/maximum tests.
+
+static void m_UnordFMin_expect_match_and_delete(Value *Cmp, Value *Select,
+                                              Value *L, Value *R) {
+  Value *MatchL, *MatchR;
+  EXPECT_TRUE(m_UnordFMin(m_Value(MatchL), m_Value(MatchR)).match(Select));
+  EXPECT_EQ(L, MatchL);
+  EXPECT_EQ(R, MatchR);
+  delete Select;
+  delete Cmp;
+}
+
+static void m_UnordFMin_expect_nomatch_and_delete(Value *Cmp, Value *Select,
+                                                Value *L, Value *R) {
+  Value *MatchL, *MatchR;
+  EXPECT_FALSE(m_UnordFMin(m_Value(MatchL), m_Value(MatchR)).match(Select));
+  delete Select;
+  delete Cmp;
+}
+
+static void m_UnordFMax_expect_match_and_delete(Value *Cmp, Value *Select,
+                                              Value *L, Value *R) {
+  Value *MatchL, *MatchR;
+  EXPECT_TRUE(m_UnordFMax(m_Value(MatchL), m_Value(MatchR)).match(Select));
+  EXPECT_EQ(L, MatchL);
+  EXPECT_EQ(R, MatchR);
+  delete Select;
+  delete Cmp;
+}
+
+static void m_UnordFMax_expect_nomatch_and_delete(Value *Cmp, Value *Select,
+                                                Value *L, Value *R) {
+  Value *MatchL, *MatchR;
+  EXPECT_FALSE(m_UnordFMax(m_Value(MatchL), m_Value(MatchR)).match(Select));
+  delete Select;
+  delete Cmp;
+}
+
+TEST(PatternMatchTest, FloatingPointUnorderedMin) {
+  LLVMContext &C(getGlobalContext());
+  IRBuilder<true, NoFolder> Builder(C);
+
+  Type *FltTy = Builder.getFloatTy();
+  Value *L = ConstantFP::get(FltTy, 1.0);
+  Value *R = ConstantFP::get(FltTy, 2.0);
+
+  // Test ULT.
+  Value *Cmp = Builder.CreateFCmpULT(L, R);
+  Value *Select = Builder.CreateSelect(Cmp, L, R);
+  m_UnordFMin_expect_match_and_delete(Cmp, Select, L, R);
+
+  // Test ULE.
+  Cmp = Builder.CreateFCmpULE(L, R);
+  Select = Builder.CreateSelect(Cmp, L, R);
+  m_UnordFMin_expect_match_and_delete(Cmp, Select, L, R);
+
+  // Test no match on UGE.
+  Cmp = Builder.CreateFCmpUGE(L, R);
+  Select = Builder.CreateSelect(Cmp, L, R);
+  m_UnordFMin_expect_nomatch_and_delete(Cmp, Select, L, R);
+
+  // Test no match on UGT.
+  Cmp = Builder.CreateFCmpUGT(L, R);
+  Select = Builder.CreateSelect(Cmp, L, R);
+  m_UnordFMin_expect_nomatch_and_delete(Cmp, Select, L, R);
+
+  // Test match on UGE with inverted select.
+  Cmp = Builder.CreateFCmpUGE(L, R);
+  Select = Builder.CreateSelect(Cmp, R, L);
+  m_UnordFMin_expect_match_and_delete(Cmp, Select, L, R);
+
+  // Test match on UGT with inverted select.
+  Cmp = Builder.CreateFCmpUGT(L, R);
+  Select = Builder.CreateSelect(Cmp, R, L);
+  m_UnordFMin_expect_match_and_delete(Cmp, Select, L, R);
+}
+
+TEST(PatternMatchTest, FloatingPointUnorderedMax) {
+  LLVMContext &C(getGlobalContext());
+  IRBuilder<true, NoFolder> Builder(C);
+
+  Type *FltTy = Builder.getFloatTy();
+  Value *L = ConstantFP::get(FltTy, 1.0);
+  Value *R = ConstantFP::get(FltTy, 2.0);
+
+  // Test UGT.
+  Value *Cmp = Builder.CreateFCmpUGT(L, R);
+  Value *Select = Builder.CreateSelect(Cmp, L, R);
+  m_UnordFMax_expect_match_and_delete(Cmp, Select, L, R);
+
+  // Test UGE.
+  Cmp = Builder.CreateFCmpUGE(L, R);
+  Select = Builder.CreateSelect(Cmp, L, R);
+  m_UnordFMax_expect_match_and_delete(Cmp, Select, L, R);
+
+  // Test no match on ULE.
+  Cmp = Builder.CreateFCmpULE(L, R);
+  Select = Builder.CreateSelect(Cmp, L, R);
+  m_UnordFMax_expect_nomatch_and_delete(Cmp, Select, L, R);
+
+  // Test no match on ULT.
+  Cmp = Builder.CreateFCmpULT(L, R);
+  Select = Builder.CreateSelect(Cmp, L, R);
+  m_UnordFMax_expect_nomatch_and_delete(Cmp, Select, L, R);
+
+  // Test match on ULE with inverted select.
+  Cmp = Builder.CreateFCmpULE(L, R);
+  Select = Builder.CreateSelect(Cmp, R, L);
+  m_UnordFMax_expect_match_and_delete(Cmp, Select, L, R);
+
+  // Test match on ULT with inverted select.
+  Cmp = Builder.CreateFCmpULT(L, R);
+  Select = Builder.CreateSelect(Cmp, R, L);
+  m_UnordFMax_expect_match_and_delete(Cmp, Select, L, R);
+}
+
+} // anonymous namespace.
+} // llvm namespace.
diff --git a/unittests/IR/ValueTest.cpp b/unittests/IR/ValueTest.cpp
new file mode 100644
index 0000000..52efb1a
--- /dev/null
+++ b/unittests/IR/ValueTest.cpp
@@ -0,0 +1,46 @@
+//===- llvm/unittest/IR/ValueTest.cpp - Value unit tests ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/Parser.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gtest/gtest.h"
+using namespace llvm;
+
+namespace {
+
+TEST(ValueTest, UsedInBasicBlock) {
+  LLVMContext C;
+
+  const char *ModuleString = "define void @f(i32 %x, i32 %y) {\n"
+                             "bb0:\n"
+                             "  %y1 = add i32 %y, 1\n"
+                             "  %y2 = add i32 %y, 1\n"
+                             "  %y3 = add i32 %y, 1\n"
+                             "  %y4 = add i32 %y, 1\n"
+                             "  %y5 = add i32 %y, 1\n"
+                             "  %y6 = add i32 %y, 1\n"
+                             "  %y7 = add i32 %y, 1\n"
+                             "  %y8 = add i32 %x, 1\n"
+                             "  ret void\n"
+                             "}\n";
+  SMDiagnostic Err;
+  Module *M = ParseAssemblyString(ModuleString, NULL, Err, C);
+
+  Function *F = M->getFunction("f");
+
+  EXPECT_FALSE(F->isUsedInBasicBlock(F->begin()));
+  EXPECT_TRUE((++F->arg_begin())->isUsedInBasicBlock(F->begin()));
+  EXPECT_TRUE(F->arg_begin()->isUsedInBasicBlock(F->begin()));
+}
+
+} // end anonymous namespace
diff --git a/unittests/Makefile b/unittests/Makefile
index 926459a..61d6061 100644
--- a/unittests/Makefile
+++ b/unittests/Makefile
@@ -9,7 +9,8 @@
 
 LEVEL = ..
 
-PARALLEL_DIRS = ADT ExecutionEngine Support Transforms IR Analysis Bitcode
+PARALLEL_DIRS = ADT ExecutionEngine Support Transforms IR Analysis Bitcode \
+								DebugInfo
 
 include $(LEVEL)/Makefile.common
 
diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt
index b4b982f..e6cafbc 100644
--- a/unittests/Support/CMakeLists.txt
+++ b/unittests/Support/CMakeLists.txt
@@ -10,6 +10,7 @@ add_llvm_unittest(SupportTests
   BlockFrequencyTest.cpp
   Casting.cpp
   CommandLineTest.cpp
+  CompressionTest.cpp
   ConstantRangeTest.cpp
   DataExtractorTest.cpp
   EndianTest.cpp
@@ -23,6 +24,7 @@ add_llvm_unittest(SupportTests
   MemoryTest.cpp
   Path.cpp
   ProcessTest.cpp
+  ProgramTest.cpp
   RegexTest.cpp
   SwapByteOrderTest.cpp
   TimeValue.cpp
diff --git a/unittests/Support/CommandLineTest.cpp b/unittests/Support/CommandLineTest.cpp
index 43c8cbd..cd235d2 100644
--- a/unittests/Support/CommandLineTest.cpp
+++ b/unittests/Support/CommandLineTest.cpp
@@ -41,6 +41,45 @@ class TempEnvVar {
   const char *const name;
 };
 
+cl::OptionCategory TestCategory("Test Options", "Description");
+cl::opt<int> TestOption("test-option", cl::desc("old description"));
+TEST(CommandLineTest, ModifyExisitingOption) {
+  const char Description[] = "New description";
+  const char ArgString[] = "new-test-option";
+  const char ValueString[] = "Integer";
+
+  StringMap<cl::Option*> Map;
+  cl::getRegisteredOptions(Map);
+
+  ASSERT_TRUE(Map.count("test-option") == 1) <<
+    "Could not find option in map.";
+
+  cl::Option *Retrieved = Map["test-option"];
+  ASSERT_EQ(&TestOption, Retrieved) << "Retrieved wrong option.";
+
+  ASSERT_EQ(&cl::GeneralCategory,Retrieved->Category) <<
+    "Incorrect default option category.";
+
+  Retrieved->setCategory(TestCategory);
+  ASSERT_EQ(&TestCategory,Retrieved->Category) <<
+    "Failed to modify option's option category.";
+
+  Retrieved->setDescription(Description);
+  ASSERT_STREQ(Retrieved->HelpStr, Description) <<
+    "Changing option description failed.";
+
+  Retrieved->setArgStr(ArgString);
+  ASSERT_STREQ(ArgString, Retrieved->ArgStr) <<
+    "Failed to modify option's Argument string.";
+
+  Retrieved->setValueStr(ValueString);
+  ASSERT_STREQ(Retrieved->ValueStr, ValueString) <<
+    "Failed to modify option's Value string.";
+
+  Retrieved->setHiddenFlag(cl::Hidden);
+  ASSERT_EQ(cl::Hidden, TestOption.getOptionHiddenFlag()) <<
+    "Failed to modify option's hidden flag.";
+}
 #ifndef SKIP_ENVIRONMENT_TESTS
 
 const char test_env_var[] = "LLVM_TEST_COMMAND_LINE_FLAGS";
@@ -55,6 +94,12 @@ TEST(CommandLineTest, ParseEnvironment) {
 
 // This test used to make valgrind complain
 // ("Conditional jump or move depends on uninitialised value(s)")
+//
+// Warning: Do not run any tests after this one that try to gain access to
+// registered command line options because this will likely result in a
+// SEGFAULT. This can occur because the cl::opt in the test below is declared
+// on the stack which will be destroyed after the test completes but the
+// command line system will still hold a pointer to a deallocated cl::Option.
 TEST(CommandLineTest, ParseEnvironmentToLocalVar) {
   // Put cl::opt on stack to check for proper initialization of fields.
   cl::opt<std::string> EnvironmentTestOptionLocal("env-test-opt-local");
@@ -66,4 +111,11 @@ TEST(CommandLineTest, ParseEnvironmentToLocalVar) {
 
 #endif  // SKIP_ENVIRONMENT_TESTS
 
+TEST(CommandLineTest, UseOptionCategory) {
+  cl::opt<int> TestOption2("test-option", cl::cat(TestCategory));
+
+  ASSERT_EQ(&TestCategory,TestOption2.Category) << "Failed to assign Option "
+                                                  "Category.";
+}
+
 }  // anonymous namespace
diff --git a/unittests/Support/CompressionTest.cpp b/unittests/Support/CompressionTest.cpp
new file mode 100644
index 0000000..c8e2cd9
--- /dev/null
+++ b/unittests/Support/CompressionTest.cpp
@@ -0,0 +1,68 @@
+//===- llvm/unittest/Support/CompressionTest.cpp - Compression tests ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements unit tests for the Compression functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Compression.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ
+
+void TestZlibCompression(StringRef Input, zlib::CompressionLevel Level) {
+  OwningPtr<MemoryBuffer> Compressed;
+  OwningPtr<MemoryBuffer> Uncompressed;
+  EXPECT_EQ(zlib::StatusOK, zlib::compress(Input, Compressed, Level));
+  // Check that uncompressed buffer is the same as original.
+  EXPECT_EQ(zlib::StatusOK, zlib::uncompress(Compressed->getBuffer(),
+                                             Uncompressed, Input.size()));
+  EXPECT_EQ(Input.size(), Uncompressed->getBufferSize());
+  EXPECT_EQ(0,
+            memcmp(Input.data(), Uncompressed->getBufferStart(), Input.size()));
+  if (Input.size() > 0) {
+    // Uncompression fails if expected length is too short.
+    EXPECT_EQ(zlib::StatusBufferTooShort,
+              zlib::uncompress(Compressed->getBuffer(), Uncompressed,
+                               Input.size() - 1));
+  }
+}
+
+TEST(CompressionTest, Zlib) {
+  TestZlibCompression("", zlib::DefaultCompression);
+
+  TestZlibCompression("hello, world!", zlib::NoCompression);
+  TestZlibCompression("hello, world!", zlib::BestSizeCompression);
+  TestZlibCompression("hello, world!", zlib::BestSpeedCompression);
+  TestZlibCompression("hello, world!", zlib::DefaultCompression);
+
+  const size_t kSize = 1024;
+  char BinaryData[kSize];
+  for (size_t i = 0; i < kSize; ++i) {
+    BinaryData[i] = i & 255;
+  }
+  StringRef BinaryDataStr(BinaryData, kSize);
+
+  TestZlibCompression(BinaryDataStr, zlib::NoCompression);
+  TestZlibCompression(BinaryDataStr, zlib::BestSizeCompression);
+  TestZlibCompression(BinaryDataStr, zlib::BestSpeedCompression);
+  TestZlibCompression(BinaryDataStr, zlib::DefaultCompression);
+}
+
+#endif
+
+}
diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp
index 4511259..eec8c62 100644
--- a/unittests/Support/Path.cpp
+++ b/unittests/Support/Path.cpp
@@ -298,12 +298,19 @@ TEST_F(FileSystemTest, DirectoryIteration) {
   ASSERT_LT(z0, za1);
 }
 
+const char elf[] = {0x7f, 'E', 'L', 'F', 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+
 TEST_F(FileSystemTest, Magic) {
   struct type {
     const char *filename;
     const char *magic_str;
-    size_t      magic_str_len;
-  } types [] = {{"magic.archive", "!<arch>\x0A", 8}};
+    size_t magic_str_len;
+    fs::file_magic magic;
+  } types [] = {
+    {"magic.archive", "!<arch>\x0A", 8, fs::file_magic::archive},
+    {"magic.elf", elf, sizeof(elf),
+     fs::file_magic::elf_relocatable}
+  };
 
   // Create some files filled with magic.
   for (type *i = types, *e = types + (sizeof(types) / sizeof(type)); i != e;
@@ -320,6 +327,7 @@ TEST_F(FileSystemTest, Magic) {
     bool res = false;
     ASSERT_NO_ERROR(fs::has_magic(file_pathname.c_str(), magic, res));
     EXPECT_TRUE(res);
+    EXPECT_EQ(i->magic, fs::identify_magic(magic));
   }
 }
 
diff --git a/unittests/Support/ProgramTest.cpp b/unittests/Support/ProgramTest.cpp
new file mode 100644
index 0000000..6cbb054
--- /dev/null
+++ b/unittests/Support/ProgramTest.cpp
@@ -0,0 +1,89 @@
+//===- unittest/Support/ProgramTest.cpp -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "gtest/gtest.h"
+
+#include <stdlib.h>
+#if defined(__APPLE__)
+# include <crt_externs.h>
+#elif !defined(_MSC_VER)
+// Forward declare environ in case it's not provided by stdlib.h.
+extern char **environ;
+#endif
+
+// From TestMain.cpp.
+extern const char *TestMainArgv0;
+
+namespace {
+
+using namespace llvm;
+using namespace sys;
+
+static cl::opt<std::string>
+ProgramTestStringArg1("program-test-string-arg1");
+static cl::opt<std::string>
+ProgramTestStringArg2("program-test-string-arg2");
+
+static void CopyEnvironment(std::vector<const char *> &out) {
+#ifdef __APPLE__
+  char **envp = *_NSGetEnviron();
+#else
+  // environ seems to work for Windows and most other Unices.
+  char **envp = environ;
+#endif
+  while (*envp != 0) {
+    out.push_back(*envp);
+    ++envp;
+  }
+}
+
+TEST(ProgramTest, CreateProcessTrailingSlash) {
+  if (getenv("LLVM_PROGRAM_TEST_CHILD")) {
+    if (ProgramTestStringArg1 == "has\\\\ trailing\\" &&
+        ProgramTestStringArg2 == "has\\\\ trailing\\") {
+      exit(0);  // Success!  The arguments were passed and parsed.
+    }
+    exit(1);
+  }
+
+  Path my_exe = Path::GetMainExecutable(TestMainArgv0, &ProgramTestStringArg1);
+  const char *argv[] = {
+    my_exe.c_str(),
+    "--gtest_filter=ProgramTest.CreateProcessTrailingSlashChild",
+    "-program-test-string-arg1", "has\\\\ trailing\\",
+    "-program-test-string-arg2", "has\\\\ trailing\\",
+    0
+  };
+
+  // Add LLVM_PROGRAM_TEST_CHILD to the environment of the child.
+  std::vector<const char *> envp;
+  CopyEnvironment(envp);
+  envp.push_back("LLVM_PROGRAM_TEST_CHILD=1");
+  envp.push_back(0);
+
+  std::string error;
+  bool ExecutionFailed;
+  // Redirect stdout and stdin to NUL, but let stderr through.
+#ifdef LLVM_ON_WIN32
+  Path nul("NUL");
+#else
+  Path nul("/dev/null");
+#endif
+  const Path *redirects[] = { &nul, &nul, 0 };
+  int rc = Program::ExecuteAndWait(my_exe, argv, &envp[0], redirects,
+                                   /*secondsToWait=*/10, /*memoryLimit=*/0,
+                                   &error, &ExecutionFailed);
+  EXPECT_FALSE(ExecutionFailed) << error;
+  EXPECT_EQ(0, rc);
+}
+
+} // end anonymous namespace
diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp
index cd304e7..e19ae5b 100644
--- a/unittests/Transforms/Utils/Cloning.cpp
+++ b/unittests/Transforms/Utils/Cloning.cpp
@@ -7,12 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/Instructions.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Constant.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -143,4 +146,31 @@ TEST_F(CloneInstruction, Exact) {
   EXPECT_TRUE(this->clone(SDiv)->isExact());
 }
 
+TEST_F(CloneInstruction, Attributes) {
+  Type *ArgTy1[] = { Type::getInt32PtrTy(context) };
+  FunctionType *FT1 =  FunctionType::get(Type::getVoidTy(context), ArgTy1, false);
+
+  Function *F1 = Function::Create(FT1, Function::ExternalLinkage);
+  BasicBlock *BB = BasicBlock::Create(context, "", F1);
+  IRBuilder<> Builder(BB);
+  Builder.CreateRetVoid();
+
+  Function *F2 = Function::Create(FT1, Function::ExternalLinkage);
+
+  Attribute::AttrKind AK[] = { Attribute::NoCapture };
+  AttributeSet AS = AttributeSet::get(context, 0, AK);
+  Argument *A = F1->arg_begin();
+  A->addAttr(AS);
+
+  SmallVector<ReturnInst*, 4> Returns;
+  ValueToValueMapTy VMap;
+  VMap[A] = UndefValue::get(A->getType());
+
+  CloneFunctionInto(F2, F1, VMap, false, Returns);
+  EXPECT_FALSE(F2->arg_begin()->hasNoCaptureAttr());
+
+  delete F1;
+  delete F2;
+}
+
 }
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index b0ef67a..cd7bb5a 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -85,6 +85,9 @@ public:
 
   Pattern(bool matchEOF = false) : MatchEOF(matchEOF) { }
 
+  /// getLoc - Return the location in source code.
+  SMLoc getLoc() const { return PatternLoc; }
+
   /// ParsePattern - Parse the given string into the Pattern.  SM provides the
   /// SourceMgr used for error reports, and LineNumber is the line number in
   /// the input file from which the pattern string was read.
@@ -581,7 +584,7 @@ struct CheckString {
   /// NotStrings - These are all of the strings that are disallowed from
   /// occurring between this match string and the previous one (or start of
   /// file).
-  std::vector<std::pair<SMLoc, Pattern> > NotStrings;
+  std::vector<Pattern> NotStrings;
 
   CheckString(const Pattern &P, SMLoc L, bool isCheckNext)
     : Pat(P), Loc(L), IsCheckNext(isCheckNext) {}
@@ -604,7 +607,7 @@ static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB,
       continue;
     }
 
-    // If current char is not a horizontal whitespace or if horizontal 
+    // If current char is not a horizontal whitespace or if horizontal
     // whitespace canonicalization is disabled, dump it to output as is.
     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
       NewFile.push_back(*Ptr);
@@ -649,7 +652,7 @@ static bool ReadCheckFile(SourceMgr &SM,
 
   // Find all instances of CheckPrefix followed by : in the file.
   StringRef Buffer = F->getBuffer();
-  std::vector<std::pair<SMLoc, Pattern> > NotMatches;
+  std::vector<Pattern> NotMatches;
 
   // LineNumber keeps track of the line on which CheckPrefix instances are
   // found.
@@ -716,8 +719,7 @@ static bool ReadCheckFile(SourceMgr &SM,
 
     // Handle CHECK-NOT.
     if (IsCheckNot) {
-      NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()),
-                                          P));
+      NotMatches.push_back(P);
       continue;
     }
 
@@ -877,14 +879,13 @@ int main(int argc, char **argv) {
     for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size();
          ChunkNo != e; ++ChunkNo) {
       size_t MatchLen = 0;
-      size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion,
-                                                             MatchLen,
-                                                             VariableTable);
+      size_t Pos = CheckStr.NotStrings[ChunkNo].Match(SkippedRegion, MatchLen,
+                                                      VariableTable);
       if (Pos == StringRef::npos) continue;
 
       SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos), SourceMgr::DK_Error,
                       CheckPrefix+"-NOT: string occurred!");
-      SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first, SourceMgr::DK_Note,
+      SM.PrintMessage(CheckStr.NotStrings[ChunkNo].getLoc(), SourceMgr::DK_Note,
                       CheckPrefix+"-NOT: pattern specified here");
       return 1;
     }
diff --git a/utils/Makefile b/utils/Makefile
index 7a3c17d..ecb30be 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ..
 PARALLEL_DIRS := FileCheck FileUpdate TableGen PerfectShuffle \
-	      count fpcmp llvm-lit not unittest yaml2obj
+	      count fpcmp llvm-lit not unittest
 
 EXTRA_DIST := check-each-file codegen-diff countloc.sh \
               DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 6faf819..218af21 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -1322,27 +1322,6 @@ void AsmMatcherInfo::buildInfo() {
       if (CGI.TheDef->getValueAsBit("isCodeGenOnly"))
         continue;
 
-      // Validate the operand list to ensure we can handle this instruction.
-      for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
-        const CGIOperandList::OperandInfo &OI = CGI.Operands[i];
-
-        // Validate tied operands.
-        if (OI.getTiedRegister() != -1) {
-          // If we have a tied operand that consists of multiple MCOperands,
-          // reject it.  We reject aliases and ignore instructions for now.
-          if (OI.MINumOperands != 1) {
-            // FIXME: Should reject these.  The ARM backend hits this with $lane
-            // in a bunch of instructions. The right answer is unclear.
-            DEBUG({
-                errs() << "warning: '" << CGI.TheDef->getName() << "': "
-                     << "ignoring instruction with multi-operand tied operand '"
-                     << OI.Name << "'\n";
-              });
-            continue;
-          }
-        }
-      }
-
       OwningPtr<MatchableInfo> II(new MatchableInfo(CGI));
 
       II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix);
@@ -1529,7 +1508,9 @@ buildInstructionOperandReference(MatchableInfo *II,
   // we want to canonicalize to:
   //   "inc $dst"
   // so that we know how to provide the $dst operand when filling in the result.
-  int OITied = Operands[Idx].getTiedRegister();
+  int OITied = -1;
+  if (Operands[Idx].MINumOperands == 1)
+    OITied = Operands[Idx].getTiedRegister();
   if (OITied != -1) {
     // The tied operand index is an MIOperand index, find the operand that
     // contains it.
@@ -1578,7 +1559,9 @@ void MatchableInfo::buildInstructionResultOperands() {
     const CGIOperandList::OperandInfo &OpInfo = ResultInst->Operands[i];
 
     // If this is a tied operand, just copy from the previously handled operand.
-    int TiedOp = OpInfo.getTiedRegister();
+    int TiedOp = -1;
+    if (OpInfo.MINumOperands == 1)
+      TiedOp = OpInfo.getTiedRegister();
     if (TiedOp != -1) {
       ResOperands.push_back(ResOperand::getTiedOp(TiedOp));
       continue;
@@ -1586,10 +1569,15 @@ void MatchableInfo::buildInstructionResultOperands() {
 
     // Find out what operand from the asmparser this MCInst operand comes from.
     int SrcOperand = findAsmOperandNamed(OpInfo.Name);
-    if (OpInfo.Name.empty() || SrcOperand == -1)
-      PrintFatalError(TheDef->getLoc(), "Instruction '" +
-                    TheDef->getName() + "' has operand '" + OpInfo.Name +
-                    "' that doesn't appear in asm string!");
+    if (OpInfo.Name.empty() || SrcOperand == -1) {
+      // This may happen for operands that are tied to a suboperand of a
+      // complex operand.  Simply use a dummy value here; nobody should
+      // use this operand slot.
+      // FIXME: The long term goal is for the MCOperand list to not contain
+      // tied operands at all.
+      ResOperands.push_back(ResOperand::getImmOp(0));
+      continue;
+    }
 
     // Check if the one AsmOperand populates the entire operand.
     unsigned NumOperands = OpInfo.MINumOperands;
@@ -1620,7 +1608,9 @@ void MatchableInfo::buildAliasResultOperands() {
     const CGIOperandList::OperandInfo *OpInfo = &ResultInst->Operands[i];
 
     // If this is a tied operand, just copy from the previously handled operand.
-    int TiedOp = OpInfo->getTiedRegister();
+    int TiedOp = -1;
+    if (OpInfo->MINumOperands == 1)
+      TiedOp = OpInfo->getTiedRegister();
     if (TiedOp != -1) {
       ResOperands.push_back(ResOperand::getTiedOp(TiedOp));
       continue;
@@ -1843,13 +1833,12 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
       case MatchableInfo::ResOperand::TiedOperand: {
         // If this operand is tied to a previous one, just copy the MCInst
         // operand from the earlier one.We can only tie single MCOperand values.
-        //assert(OpInfo.MINumOperands == 1 && "Not a singular MCOperand");
+        assert(OpInfo.MINumOperands == 1 && "Not a singular MCOperand");
         unsigned TiedOp = OpInfo.TiedOperandNum;
         assert(i > TiedOp && "Tied operand precedes its target!");
         Signature += "__Tie" + utostr(TiedOp);
         ConversionRow.push_back(CVT_Tied);
         ConversionRow.push_back(TiedOp);
-        // FIXME: Handle the operand number lookup for tied operands.
         break;
       }
       case MatchableInfo::ResOperand::ImmOperand: {
@@ -2296,29 +2285,25 @@ static std::string GetAliasRequiredFeatures(Record *R,
   return Result;
 }
 
-/// emitMnemonicAliases - If the target has any MnemonicAlias<> definitions,
-/// emit a function for them and return true, otherwise return false.
-static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info) {
-  // Ignore aliases when match-prefix is set.
-  if (!MatchPrefix.empty())
-    return false;
-
-  std::vector<Record*> Aliases =
-    Info.getRecords().getAllDerivedDefinitions("MnemonicAlias");
-  if (Aliases.empty()) return false;
-
-  OS << "static void applyMnemonicAliases(StringRef &Mnemonic, "
-        "unsigned Features) {\n";
-
+static void emitMnemonicAliasVariant(raw_ostream &OS,const AsmMatcherInfo &Info,
+                                     std::vector<Record*> &Aliases,
+                                     unsigned Indent = 0,
+                                  StringRef AsmParserVariantName = StringRef()){
   // Keep track of all the aliases from a mnemonic.  Use an std::map so that the
   // iteration order of the map is stable.
   std::map<std::string, std::vector<Record*> > AliasesFromMnemonic;
 
   for (unsigned i = 0, e = Aliases.size(); i != e; ++i) {
     Record *R = Aliases[i];
+    // FIXME: Allow AssemblerVariantName to be a comma separated list.
+    std::string AsmVariantName = R->getValueAsString("AsmVariantName");
+    if (AsmVariantName != AsmParserVariantName)
+      continue;
     AliasesFromMnemonic[R->getValueAsString("FromMnemonic")].push_back(R);
   }
-
+  if (AliasesFromMnemonic.empty())
+    return;
+    
   // Process each alias a "from" mnemonic at a time, building the code executed
   // by the string remapper.
   std::vector<StringMatcher::StringPair> Cases;
@@ -2370,8 +2355,39 @@ static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info) {
 
     Cases.push_back(std::make_pair(I->first, MatchCode));
   }
+  StringMatcher("Mnemonic", Cases, OS).Emit(Indent);
+}
+
+/// emitMnemonicAliases - If the target has any MnemonicAlias<> definitions,
+/// emit a function for them and return true, otherwise return false.
+static bool emitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info,
+                                CodeGenTarget &Target) {
+  // Ignore aliases when match-prefix is set.
+  if (!MatchPrefix.empty())
+    return false;
+
+  std::vector<Record*> Aliases =
+    Info.getRecords().getAllDerivedDefinitions("MnemonicAlias");
+  if (Aliases.empty()) return false;
+
+  OS << "static void applyMnemonicAliases(StringRef &Mnemonic, "
+    "unsigned Features, unsigned VariantID) {\n";
+  OS << "  switch (VariantID) {\n";
+  unsigned VariantCount = Target.getAsmParserVariantCount();
+  for (unsigned VC = 0; VC != VariantCount; ++VC) {
+    Record *AsmVariant = Target.getAsmParserVariant(VC);
+    int AsmParserVariantNo = AsmVariant->getValueAsInt("Variant");
+    std::string AsmParserVariantName = AsmVariant->getValueAsString("Name");
+    OS << "    case " << AsmParserVariantNo << ":\n";
+    emitMnemonicAliasVariant(OS, Info, Aliases, /*Indent=*/2,
+                             AsmParserVariantName);
+    OS << "    break;\n";
+  }
+  OS << "  }\n";
+
+  // Emit aliases that apply to all variants.
+  emitMnemonicAliasVariant(OS, Info, Aliases);
 
-  StringMatcher("Mnemonic", Cases, OS).Emit();
   OS << "}\n\n";
 
   return true;
@@ -2674,7 +2690,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "#undef GET_MATCHER_IMPLEMENTATION\n\n";
 
   // Generate the function that remaps for mnemonic aliases.
-  bool HasMnemonicAliases = emitMnemonicAliases(OS, Info);
+  bool HasMnemonicAliases = emitMnemonicAliases(OS, Info, Target);
 
   // Generate the convertToMCInst function to convert operands into an MCInst.
   // Also, generate the convertToMapAndConstraints function for MS-style inline
@@ -2832,9 +2848,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
 
   if (HasMnemonicAliases) {
     OS << "  // Process all MnemonicAliases to remap the mnemonic.\n";
-    OS << "  // FIXME : Add an entry in AsmParserVariant to check this.\n";
-    OS << "  if (!VariantID)\n";
-    OS << "    applyMnemonicAliases(Mnemonic, AvailableFeatures);\n\n";
+    OS << "  applyMnemonicAliases(Mnemonic, AvailableFeatures, VariantID);\n\n";
   }
 
   // Emit code to compute the class list for this operand vector.
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index c02f084..112ff65 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -1367,6 +1367,56 @@ void CodeGenSchedModels::inferFromRW(const IdxVec &OperWrites,
   inferFromTransitions(LastTransitions, FromClassIdx, *this);
 }
 
+// Check if any processor resource group contains all resource records in
+// SubUnits.
+bool CodeGenSchedModels::hasSuperGroup(RecVec &SubUnits, CodeGenProcModel &PM) {
+  for (unsigned i = 0, e = PM.ProcResourceDefs.size(); i < e; ++i) {
+    if (!PM.ProcResourceDefs[i]->isSubClassOf("ProcResGroup"))
+      continue;
+    RecVec SuperUnits =
+      PM.ProcResourceDefs[i]->getValueAsListOfDefs("Resources");
+    RecIter RI = SubUnits.begin(), RE = SubUnits.end();
+    for ( ; RI != RE; ++RI) {
+      if (std::find(SuperUnits.begin(), SuperUnits.end(), *RI)
+          == SuperUnits.end()) {
+        break;
+      }
+    }
+    if (RI == RE)
+      return true;
+  }
+  return false;
+}
+
+// Verify that overlapping groups have a common supergroup.
+void CodeGenSchedModels::verifyProcResourceGroups(CodeGenProcModel &PM) {
+  for (unsigned i = 0, e = PM.ProcResourceDefs.size(); i < e; ++i) {
+    if (!PM.ProcResourceDefs[i]->isSubClassOf("ProcResGroup"))
+      continue;
+    RecVec CheckUnits =
+      PM.ProcResourceDefs[i]->getValueAsListOfDefs("Resources");
+    for (unsigned j = i+1; j < e; ++j) {
+      if (!PM.ProcResourceDefs[j]->isSubClassOf("ProcResGroup"))
+        continue;
+      RecVec OtherUnits =
+        PM.ProcResourceDefs[j]->getValueAsListOfDefs("Resources");
+      if (std::find_first_of(CheckUnits.begin(), CheckUnits.end(),
+                             OtherUnits.begin(), OtherUnits.end())
+          != CheckUnits.end()) {
+        // CheckUnits and OtherUnits overlap
+        OtherUnits.insert(OtherUnits.end(), CheckUnits.begin(),
+                          CheckUnits.end());
+        if (!hasSuperGroup(OtherUnits, PM)) {
+          PrintFatalError((PM.ProcResourceDefs[i])->getLoc(),
+                          "proc resource group overlaps with "
+                          + PM.ProcResourceDefs[j]->getName()
+                          + " but no supergroup contains both.");
+        }
+      }
+    }
+  }
+}
+
 // Collect and sort WriteRes, ReadAdvance, and ProcResources.
 void CodeGenSchedModels::collectProcResources() {
   // Add any subtarget-specific SchedReadWrites that are directly associated
@@ -1437,6 +1487,7 @@ void CodeGenSchedModels::collectProcResources() {
         dbgs() << (*RI)->getName() << " ";
       }
       dbgs() << '\n');
+    verifyProcResourceGroups(PM);
   }
 }
 
diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h
index e5b9118..2e0a149 100644
--- a/utils/TableGen/CodeGenSchedule.h
+++ b/utils/TableGen/CodeGenSchedule.h
@@ -380,6 +380,9 @@ private:
   void inferFromItinClass(Record *ItinClassDef, unsigned FromClassIdx);
   void inferFromInstRWs(unsigned SCIdx);
 
+  bool hasSuperGroup(RecVec &SubUnits, CodeGenProcModel &PM);
+  void verifyProcResourceGroups(CodeGenProcModel &PM);
+
   void collectProcResources();
 
   void collectItinProcResources(Record *ItinClassDef);
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 98892e1..4918b1b 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -782,41 +782,46 @@ Record *SubtargetEmitter::FindReadAdvance(const CodeGenSchedRW &SchedRead,
 }
 
 // Expand an explicit list of processor resources into a full list of implied
-// resource groups that cover them.
-//
-// FIXME: Effectively consider a super-resource a group that include all of its
-// subresources to allow mixing and matching super-resources and groups.
-//
-// FIXME: Warn if two overlapping groups don't have a common supergroup.
+// resource groups and super resources that cover them.
 void SubtargetEmitter::ExpandProcResources(RecVec &PRVec,
                                            std::vector<int64_t> &Cycles,
-                                           const CodeGenProcModel &ProcModel) {
+                                           const CodeGenProcModel &PM) {
   // Default to 1 resource cycle.
   Cycles.resize(PRVec.size(), 1);
   for (unsigned i = 0, e = PRVec.size(); i != e; ++i) {
+    Record *PRDef = PRVec[i];
     RecVec SubResources;
-    if (PRVec[i]->isSubClassOf("ProcResGroup")) {
-      SubResources = PRVec[i]->getValueAsListOfDefs("Resources");
-      std::sort(SubResources.begin(), SubResources.end(), LessRecord());
-    }
+    if (PRDef->isSubClassOf("ProcResGroup"))
+      SubResources = PRDef->getValueAsListOfDefs("Resources");
     else {
-      SubResources.push_back(PRVec[i]);
+      SubResources.push_back(PRDef);
+      PRDef = SchedModels.findProcResUnits(PRVec[i], PM);
+      for (Record *SubDef = PRDef;
+           SubDef->getValueInit("Super")->isComplete();) {
+        if (SubDef->isSubClassOf("ProcResGroup")) {
+          // Disallow this for simplicitly.
+          PrintFatalError(SubDef->getLoc(), "Processor resource group "
+                          " cannot be a super resources.");
+        }
+        Record *SuperDef =
+          SchedModels.findProcResUnits(SubDef->getValueAsDef("Super"), PM);
+        PRVec.push_back(SuperDef);
+        Cycles.push_back(Cycles[i]);
+        SubDef = SuperDef;
+      }
     }
-    for (RecIter PRI = ProcModel.ProcResourceDefs.begin(),
-           PRE = ProcModel.ProcResourceDefs.end();
+    for (RecIter PRI = PM.ProcResourceDefs.begin(),
+           PRE = PM.ProcResourceDefs.end();
          PRI != PRE; ++PRI) {
-      if (*PRI == PRVec[i] || !(*PRI)->isSubClassOf("ProcResGroup"))
+      if (*PRI == PRDef || !(*PRI)->isSubClassOf("ProcResGroup"))
         continue;
       RecVec SuperResources = (*PRI)->getValueAsListOfDefs("Resources");
-      std::sort(SuperResources.begin(), SuperResources.end(), LessRecord());
       RecIter SubI = SubResources.begin(), SubE = SubResources.end();
-      RecIter SuperI = SuperResources.begin(), SuperE = SuperResources.end();
-      for ( ; SubI != SubE && SuperI != SuperE; ++SuperI) {
-        if (*SubI < *SuperI)
+      for( ; SubI != SubE; ++SubI) {
+        if (std::find(SuperResources.begin(), SuperResources.end(), *SubI)
+            == SuperResources.end()) {
           break;
-        else if (*SuperI < *SubI)
-          continue;
-        ++SubI;
+        }
       }
       if (SubI == SubE) {
         PRVec.push_back(*PRI);
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index 61b9813..46f2052 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -29,23 +29,25 @@ using namespace llvm;
   MAP(C4, 36)           \
   MAP(C8, 37)           \
   MAP(C9, 38)           \
-  MAP(E8, 39)           \
-  MAP(F0, 40)           \
-  MAP(F8, 41)           \
-  MAP(F9, 42)           \
-  MAP(D0, 45)           \
-  MAP(D1, 46)           \
-  MAP(D4, 47)           \
-  MAP(D5, 48)           \
-  MAP(D6, 49)           \
-  MAP(D8, 50)           \
-  MAP(D9, 51)           \
-  MAP(DA, 52)           \
-  MAP(DB, 53)           \
-  MAP(DC, 54)           \
-  MAP(DD, 55)           \
-  MAP(DE, 56)           \
-  MAP(DF, 57)
+  MAP(CA, 39)           \
+  MAP(CB, 40)           \
+  MAP(E8, 41)           \
+  MAP(F0, 42)           \
+  MAP(F8, 45)           \
+  MAP(F9, 46)           \
+  MAP(D0, 47)           \
+  MAP(D1, 48)           \
+  MAP(D4, 49)           \
+  MAP(D5, 50)           \
+  MAP(D6, 51)           \
+  MAP(D8, 52)           \
+  MAP(D9, 53)           \
+  MAP(DA, 54)           \
+  MAP(DB, 55)           \
+  MAP(DC, 56)           \
+  MAP(DD, 57)           \
+  MAP(DE, 58)           \
+  MAP(DF, 59)
 
 // A clone of X86 since we can't depend on something that is generated.
 namespace X86Local {
diff --git a/utils/git-svn/git-svnrevert b/utils/git-svn/git-svnrevert
new file mode 100755
index 0000000..06a9c44
--- /dev/null
+++ b/utils/git-svn/git-svnrevert
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+if [ $# -ne 1 ]; then
+    echo "Invalid arguments!"
+    echo "$0 <commit to revert>"
+    exit 1
+fi
+
+if [ -n "$(git status -uno -s --porcelain)" ]; then
+    echo "You have unstashed changes. Please stash and then revert."
+    git status -uno
+    exit 1
+fi
+
+COMMIT=$1
+
+SVN_REVISION=$(git svn find-rev "$COMMIT")
+if [ $? -ne 0 ]; then
+    echo "Error! Could not find an svn revision for commit $COMMIT!"
+    exit 1
+fi
+
+# Grab the one line message for our revert commit message.
+ONE_LINE_MSG=$(git log --oneline $COMMIT -1 | cut -f2- -d " ")
+
+# Revert the commit.
+git revert --no-commit $COMMIT 2>/dev/null
+if [ $? -ne 0 ]; then
+    echo "Error! Failed to revert commit $COMMIT. Resetting to head."
+    git reset --hard HEAD
+    exit 1
+fi
+
+# Create a template in our .git directory.
+TEMPLATE="`git rev-parse --git-dir`/git-svn-revert-template"
+cat > $TEMPLATE <<EOF
+Revert "$ONE_LINE_MSG"
+
+This reverts commit r$SVN_REVISION.
+EOF
+
+# Begin the commit but give our user an opportunity to edit it.
+git commit --file="$TEMPLATE" --edit
+if [ $? -ne 0 ]; then
+    echo "Error! Failed to commit reverting commit for commit $COMMIT. Reverting to head."
+    git reset --hard HEAD
+    rm -rf $TEMPLATE
+    exit 1
+fi
+
+rm -rf $TEMPLATE
+
diff --git a/utils/git-svn/git-svnup b/utils/git-svn/git-svnup
new file mode 100755
index 0000000..3321f6b
--- /dev/null
+++ b/utils/git-svn/git-svnup
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+if [ -n "`git status -uno -s --porcelain`" ]; then
+    echo "You have unstashed changes. Can not update repository..."
+    git status -uno
+    exit 1
+fi
+
+git fetch
+OLD_BRANCH=$(git rev-parse --abbrev-ref HEAD)
+git checkout master 2> /dev/null
+git svn rebase -l
+git checkout $OLD_BRANCH 2> /dev/null
+
+exit 0
diff --git a/utils/lit/lit/discovery.py b/utils/lit/lit/discovery.py
index c869a67..64a9510 100644
--- a/utils/lit/lit/discovery.py
+++ b/utils/lit/lit/discovery.py
@@ -137,7 +137,7 @@ def getTestsInSuite(ts, path_in_suite, litConfig,
     # Search subdirectories.
     for filename in os.listdir(source_path):
         # FIXME: This doesn't belong here?
-        if filename in ('Output', '.svn') or filename in lc.excludes:
+        if filename in ('Output', '.svn', '.git') or filename in lc.excludes:
             continue
 
         # Ignore non-directories.
@@ -147,20 +147,31 @@ def getTestsInSuite(ts, path_in_suite, litConfig,
 
         # Check for nested test suites, first in the execpath in case there is a
         # site configuration and then in the source path.
-        file_execpath = ts.getExecPath(path_in_suite + (filename,))
+        subpath = path_in_suite + (filename,)
+        file_execpath = ts.getExecPath(subpath)
         if dirContainsTestSuite(file_execpath, litConfig):
-            sub_ts, subiter = getTests(file_execpath, litConfig,
-                                       testSuiteCache, localConfigCache)
+            sub_ts, subpath_in_suite = getTestSuite(file_execpath, litConfig,
+                                                    testSuiteCache)
         elif dirContainsTestSuite(file_sourcepath, litConfig):
-            sub_ts, subiter = getTests(file_sourcepath, litConfig,
-                                       testSuiteCache, localConfigCache)
+            sub_ts, subpath_in_suite = getTestSuite(file_sourcepath, litConfig,
+                                                    testSuiteCache)
         else:
-            # Otherwise, continue loading from inside this test suite.
-            subiter = getTestsInSuite(ts, path_in_suite + (filename,),
-                                      litConfig, testSuiteCache,
-                                      localConfigCache)
             sub_ts = None
 
+        # If the this directory recursively maps back to the current test suite,
+        # disregard it (this can happen if the exec root is located inside the
+        # current test suite, for example).
+        if sub_ts is ts:
+            continue
+
+        # Otherwise, load from the nested test suite, if present.
+        if sub_ts is not None:
+            subiter = getTestsInSuite(sub_ts, subpath_in_suite, litConfig,
+                                      testSuiteCache, localConfigCache)
+        else:
+            subiter = getTestsInSuite(ts, subpath, litConfig, testSuiteCache,
+                                      localConfigCache)
+
         N = 0
         for res in subiter:
             N += 1
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index da961ee..de97a8e 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -219,7 +219,7 @@ def main(builtinParameters = {}):
     group.add_option("", "--shuffle", dest="shuffle",
                      help="Run tests in random order",
                      action="store_true", default=False)
-    group.add_option("", "--filter", dest="filter", metavar="EXPRESSION",
+    group.add_option("", "--filter", dest="filter", metavar="REGEX",
                      help=("Only run tests with paths matching the given "
                            "regular expression"),
                      action="store", default=None)
diff --git a/utils/lit/tests/Inputs/discovery/lit.cfg b/utils/lit/tests/Inputs/discovery/lit.cfg
index 3513bff..4049ab1 100644
--- a/utils/lit/tests/Inputs/discovery/lit.cfg
+++ b/utils/lit/tests/Inputs/discovery/lit.cfg
@@ -1,5 +1,10 @@
 config.name = 'top-level-suite'
 config.suffixes = ['.txt']
 config.test_format = lit.formats.ShTest()
-config.test_source_root = None
-config.test_exec_root = None
+
+# We intentionally don't set the source root or exec root directories here,
+# because this suite gets reused for testing the exec root behavior (in
+# ../exec-discovery).
+#
+#config.test_source_root = None
+#config.test_exec_root = None
diff --git a/utils/lit/tests/Inputs/exec-discovery-in-tree/lit.cfg b/utils/lit/tests/Inputs/exec-discovery-in-tree/lit.cfg
new file mode 100644
index 0000000..342b2fd
--- /dev/null
+++ b/utils/lit/tests/Inputs/exec-discovery-in-tree/lit.cfg
@@ -0,0 +1,7 @@
+# Verify that the site configuration was loaded.
+if config.test_source_root is None or config.test_exec_root is None:
+    lit.fatal("No site specific configuration")
+
+config.name = 'exec-discovery-in-tree-suite'
+config.suffixes = ['.txt']
+config.test_format = lit.formats.ShTest()
diff --git a/utils/lit/tests/Inputs/exec-discovery-in-tree/obj/lit.site.cfg b/utils/lit/tests/Inputs/exec-discovery-in-tree/obj/lit.site.cfg
new file mode 100644
index 0000000..de9a3d0c
--- /dev/null
+++ b/utils/lit/tests/Inputs/exec-discovery-in-tree/obj/lit.site.cfg
@@ -0,0 +1,4 @@
+import os
+config.test_exec_root = os.path.dirname(__file__)
+config.test_source_root = os.path.dirname(config.test_exec_root)
+lit.load_config(config, os.path.join(config.test_source_root, "lit.cfg"))
+\ No newline at end of file
diff --git a/utils/lit/tests/Inputs/exec-discovery-in-tree/test-one.txt b/utils/lit/tests/Inputs/exec-discovery-in-tree/test-one.txt
new file mode 100644
index 0000000..b80b60b
--- /dev/null
+++ b/utils/lit/tests/Inputs/exec-discovery-in-tree/test-one.txt
@@ -0,0 +1 @@
+# RUN: true
diff --git a/utils/lit/tests/Inputs/exec-discovery/lit.site.cfg b/utils/lit/tests/Inputs/exec-discovery/lit.site.cfg
new file mode 100644
index 0000000..796569a
--- /dev/null
+++ b/utils/lit/tests/Inputs/exec-discovery/lit.site.cfg
@@ -0,0 +1,5 @@
+# Load the discovery suite, but with a separate exec root.
+import os
+config.test_exec_root = os.path.dirname(__file__)
+config.test_source_root = os.path.join(os.path.dirname(config.test_exec_root), "discovery")
+lit.load_config(config, os.path.join(config.test_source_root, "lit.cfg"))
diff --git a/utils/lit/tests/Inputs/progress-bar/lit.cfg b/utils/lit/tests/Inputs/progress-bar/lit.cfg
new file mode 100644
index 0000000..4878b65
--- /dev/null
+++ b/utils/lit/tests/Inputs/progress-bar/lit.cfg
@@ -0,0 +1,5 @@
+config.name = 'shtest-shell'
+config.suffixes = ['.txt']
+config.test_format = lit.formats.ShTest()
+config.test_source_root = None
+config.test_exec_root = None
diff --git a/utils/lit/tests/Inputs/progress-bar/test-1.txt b/utils/lit/tests/Inputs/progress-bar/test-1.txt
new file mode 100644
index 0000000..49932c3
--- /dev/null
+++ b/utils/lit/tests/Inputs/progress-bar/test-1.txt
@@ -0,0 +1 @@
+# RUN: false
diff --git a/utils/lit/tests/Inputs/progress-bar/test-2.txt b/utils/lit/tests/Inputs/progress-bar/test-2.txt
new file mode 100644
index 0000000..49932c3
--- /dev/null
+++ b/utils/lit/tests/Inputs/progress-bar/test-2.txt
@@ -0,0 +1 @@
+# RUN: false
diff --git a/utils/lit/tests/Inputs/progress-bar/test-3.txt b/utils/lit/tests/Inputs/progress-bar/test-3.txt
new file mode 100644
index 0000000..49932c3
--- /dev/null
+++ b/utils/lit/tests/Inputs/progress-bar/test-3.txt
@@ -0,0 +1 @@
+# RUN: false
diff --git a/utils/lit/tests/Inputs/progress-bar/test-4.txt b/utils/lit/tests/Inputs/progress-bar/test-4.txt
new file mode 100644
index 0000000..49932c3
--- /dev/null
+++ b/utils/lit/tests/Inputs/progress-bar/test-4.txt
@@ -0,0 +1 @@
+# RUN: false
diff --git a/utils/lit/tests/discovery.py b/utils/lit/tests/discovery.py
index 54b99d3..56d9dd0 100644
--- a/utils/lit/tests/discovery.py
+++ b/utils/lit/tests/discovery.py
@@ -5,17 +5,17 @@
 # RUN: FileCheck --check-prefix=CHECK-BASIC-OUT < %t.out %s
 # RUN: FileCheck --check-prefix=CHECK-BASIC-ERR < %t.err %s
 #
-# CHECK-BASIC-ERR: loading suite config '{{.*}}/tests/Inputs/discovery/lit.cfg'
-# CHECK-BASIC-ERR: loading local config '{{.*}}/tests/Inputs/discovery/subdir/lit.local.cfg'
-# CHECK-BASIC-ERR: loading suite config '{{.*}}/tests/Inputs/discovery/subsuite/lit.cfg'
+# CHECK-BASIC-ERR: loading suite config '{{.*}}/discovery/lit.cfg'
+# CHECK-BASIC-ERR: loading local config '{{.*}}/discovery/subdir/lit.local.cfg'
+# CHECK-BASIC-ERR: loading suite config '{{.*}}/discovery/subsuite/lit.cfg'
 #
 # CHECK-BASIC-OUT: -- Test Suites --
 # CHECK-BASIC-OUT:   sub-suite - 2 tests
-# CHECK-BASIC-OUT:     Source Root:
-# CHECK-BASIC-OUT:     Exec Root  :
+# CHECK-BASIC-OUT:     Source Root: {{.*/discovery/subsuite$}}
+# CHECK-BASIC-OUT:     Exec Root  : {{.*/discovery/subsuite$}}
 # CHECK-BASIC-OUT:   top-level-suite - 3 tests
-# CHECK-BASIC-OUT:     Source Root:
-# CHECK-BASIC-OUT:     Exec Root  :
+# CHECK-BASIC-OUT:     Source Root: {{.*/discovery$}}
+# CHECK-BASIC-OUT:     Exec Root  : {{.*/discovery$}}
 #
 # CHECK-BASIC-OUT: -- Testing: 5 tests, 1 threads --
 # CHECK-BASIC-OUT: PASS: sub-suite :: test-one
@@ -23,3 +23,74 @@
 # CHECK-BASIC-OUT: PASS: top-level-suite :: subdir/test-three
 # CHECK-BASIC-OUT: PASS: top-level-suite :: test-one
 # CHECK-BASIC-OUT: PASS: top-level-suite :: test-two
+
+
+# Check discovery when exact test names are given.
+#
+# RUN: %{lit} \
+# RUN:     %{inputs}/discovery/subdir/test-three.py \
+# RUN:     %{inputs}/discovery/subsuite/test-one.txt \
+# RUN:   -j 1 --no-execute --show-suites -v > %t.out
+# RUN: FileCheck --check-prefix=CHECK-EXACT-TEST < %t.out %s
+#
+# CHECK-EXACT-TEST: -- Testing: 2 tests, 1 threads --
+# CHECK-EXACT-TEST: PASS: sub-suite :: test-one
+# CHECK-EXACT-TEST: PASS: top-level-suite :: subdir/test-three
+
+
+# Check discovery when using an exec path.
+#
+# RUN: %{lit} %{inputs}/exec-discovery \
+# RUN:   -j 1 --debug --no-execute --show-suites -v > %t.out 2> %t.err
+# RUN: FileCheck --check-prefix=CHECK-ASEXEC-OUT < %t.out %s
+# RUN: FileCheck --check-prefix=CHECK-ASEXEC-ERR < %t.err %s
+#
+# CHECK-ASEXEC-ERR: loading suite config '{{.*}}/exec-discovery/lit.site.cfg'
+# CHECK-ASEXEC-ERR: load_config from '{{.*}}/discovery/lit.cfg'
+# CHECK-ASEXEC-ERR: loaded config '{{.*}}/discovery/lit.cfg'
+# CHECK-ASEXEC-ERR: loaded config '{{.*}}/exec-discovery/lit.site.cfg'
+# CHECK-ASEXEC-ERR: loading local config '{{.*}}/discovery/subdir/lit.local.cfg'
+# CHECK-ASEXEC-ERR: loading suite config '{{.*}}/discovery/subsuite/lit.cfg'
+#
+# CHECK-ASEXEC-OUT: -- Test Suites --
+# CHECK-ASEXEC-OUT:   sub-suite - 2 tests
+# CHECK-ASEXEC-OUT:     Source Root: {{.*/discovery/subsuite$}}
+# CHECK-ASEXEC-OUT:     Exec Root  : {{.*/discovery/subsuite$}}
+# CHECK-ASEXEC-OUT:   top-level-suite - 3 tests
+# CHECK-ASEXEC-OUT:     Source Root: {{.*/discovery$}}
+# CHECK-ASEXEC-OUT:     Exec Root  : {{.*/exec-discovery$}}
+#
+# CHECK-ASEXEC-OUT: -- Testing: 5 tests, 1 threads --
+# CHECK-ASEXEC-OUT: PASS: sub-suite :: test-one
+# CHECK-ASEXEC-OUT: PASS: sub-suite :: test-two
+# CHECK-ASEXEC-OUT: PASS: top-level-suite :: subdir/test-three
+# CHECK-ASEXEC-OUT: PASS: top-level-suite :: test-one
+# CHECK-ASEXEC-OUT: PASS: top-level-suite :: test-two
+
+
+# Check discovery when exact test names are given.
+#
+# FIXME: Note that using a path into a subsuite doesn't work correctly here.
+#
+# RUN: %{lit} \
+# RUN:     %{inputs}/exec-discovery/subdir/test-three.py \
+# RUN:   -j 1 --no-execute --show-suites -v > %t.out
+# RUN: FileCheck --check-prefix=CHECK-ASEXEC-EXACT-TEST < %t.out %s
+#
+# CHECK-ASEXEC-EXACT-TEST: -- Testing: 1 tests, 1 threads --
+# CHECK-ASEXEC-EXACT-TEST: PASS: top-level-suite :: subdir/test-three
+
+
+# Check that we don't recurse infinitely when loading an site specific test
+# suite located inside the test source root.
+#
+# RUN: %{lit} \
+# RUN:     %{inputs}/exec-discovery-in-tree/obj/ \
+# RUN:   -j 1 --no-execute --show-suites -v > %t.out
+# RUN: FileCheck --check-prefix=CHECK-ASEXEC-INTREE < %t.out %s
+#
+#      CHECK-ASEXEC-INTREE:   exec-discovery-in-tree-suite - 1 tests
+# CHECK-ASEXEC-INTREE-NEXT:     Source Root: {{.*/exec-discovery-in-tree$}}
+# CHECK-ASEXEC-INTREE-NEXT:     Exec Root  : {{.*/exec-discovery-in-tree/obj$}}
+# CHECK-ASEXEC-INTREE-NEXT: -- Testing: 1 tests, 1 threads --
+# CHECK-ASEXEC-INTREE-NEXT: PASS: exec-discovery-in-tree-suite :: test-one
diff --git a/utils/lit/tests/progress-bar.py b/utils/lit/tests/progress-bar.py
new file mode 100644
index 0000000..d046748
--- /dev/null
+++ b/utils/lit/tests/progress-bar.py
@@ -0,0 +1,13 @@
+# Check the simple progress bar.
+#
+# RUN: not %{lit} -j 1 -s %{inputs}/progress-bar > %t.out
+# RUN: FileCheck < %t.out %s
+#
+# CHECK: Testing: 0 .. 10.. 20
+# CHECK: FAIL: shtest-shell :: test-1.txt (1 of 4)
+# CHECK: Testing: 0 .. 10.. 20.. 30.. 40.. 
+# CHECK: FAIL: shtest-shell :: test-2.txt (2 of 4)
+# CHECK: Testing: 0 .. 10.. 20.. 30.. 40.. 50.. 60.. 70
+# CHECK: FAIL: shtest-shell :: test-3.txt (3 of 4)
+# CHECK: Testing: 0 .. 10.. 20.. 30.. 40.. 50.. 60.. 70.. 80.. 90.. 
+# CHECK: FAIL: shtest-shell :: test-4.txt (4 of 4)
diff --git a/utils/release/tag.sh b/utils/release/tag.sh
index 399d5c5..c327174 100755
--- a/utils/release/tag.sh
+++ b/utils/release/tag.sh
@@ -32,7 +32,7 @@ function usage() {
 
 function tag_version() {
     set -x
-    for proj in llvm cfe dragonegg test-suite compiler-rt ; do
+    for proj in llvm cfe dragonegg test-suite compiler-rt libcxx clang-tools-extra ; do
         if svn ls $base_url/$proj/branches/release_$release > /dev/null 2>&1 ; then
             if [ $rebranch = "no" ]; then
                 continue
@@ -49,7 +49,7 @@ function tag_version() {
 
 function tag_release_candidate() {
     set -x
-    for proj in llvm cfe dragonegg test-suite compiler-rt ; do
+    for proj in llvm cfe dragonegg test-suite compiler-rt libcxx clang-tools-extra ; do
         if ! svn ls $base_url/$proj/tags/RELEASE_$release > /dev/null 2>&1 ; then
             svn mkdir -m "Creating release directory for release_$release." $base_url/$proj/tags/RELEASE_$release
         fi
diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh
index a62e829..86200fd 100755
--- a/utils/release/test-release.sh
+++ b/utils/release/test-release.sh
@@ -35,7 +35,7 @@ do_objc="yes"
 do_64bit="yes"
 do_debug="no"
 do_asserts="no"
-do_compare="yes"
+do_compare="no"
 BuildDir="`pwd`"
 
 function usage() {
diff --git a/utils/unittest/UnitTestMain/TestMain.cpp b/utils/unittest/UnitTestMain/TestMain.cpp
index ce32b73..5387512 100644
--- a/utils/unittest/UnitTestMain/TestMain.cpp
+++ b/utils/unittest/UnitTestMain/TestMain.cpp
@@ -20,11 +20,16 @@
 # endif
 #endif
 
+const char *TestMainArgv0;
+
 int main(int argc, char **argv) {
   llvm::sys::PrintStackTraceOnErrorSignal();
   testing::InitGoogleTest(&argc, argv);
   llvm::cl::ParseCommandLineOptions(argc, argv);
 
+  // Make it easy for a test to re-execute itself by saving argv[0].
+  TestMainArgv0 = argv[0];
+
 # if defined(LLVM_ON_WIN32)
   // Disable all of the possible ways Windows conspires to make automated
   // testing impossible.